diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 0000000..b318eda --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,1298 @@ +# VoicedForm MLP - System Design Document + +**Version:** 1.0 +**Last Updated:** 2025-11-14 +**Status:** Draft + +--- + +## 1. System Architecture Overview + +### 1.1 High-Level Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Client (Browser) │ +│ ┌──────────────┐ ┌──────────────┐ ┌────────────────────┐ │ +│ │ React UI │ │ WebSocket │ │ Web Audio API │ │ +│ │ Components │ │ Client │ │ (Microphone) │ │ +│ └──────┬───────┘ └──────┬───────┘ └─────────┬──────────┘ │ +└─────────┼──────────────────┼────────────────────┼──────────────┘ + │ │ │ + │ HTTPS │ WSS │ Audio Chunks + │ │ │ +┌─────────▼──────────────────▼────────────────────▼──────────────┐ +│ Next.js Server │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ App Router (Pages + API Routes) │ │ +│ ├─────────────────┬───────────────────┬──────────────────┤ │ +│ │ Auth Layer │ Business Logic │ WebSocket Server│ │ +│ │ (NextAuth.js) │ (Form/Template) │ (ws/Socket.io) │ │ +│ └────────┬────────┴────────┬──────────┴─────────┬────────┘ │ +└───────────┼─────────────────┼────────────────────┼────────────┘ + │ │ │ + │ │ │ + ┌───────▼──────┐ ┌──────▼──────────┐ ┌─────▼──────────┐ + │ Google │ │ Supabase │ │ Whisper │ + │ OAuth │ │ Postgres │ │ Service │ + │ │ │ + Storage │ │ (Python) │ + └──────────────┘ └────────┬────────┘ └────────────────┘ + │ + ┌──────▼──────┐ + │ OpenAI │ + │ API │ + │ (LLM) │ + └─────────────┘ + │ + ┌──────▼──────┐ + │ Gmail │ + │ API │ + └─────────────┘ +``` + +### 1.2 Architecture Patterns + +| Pattern | Implementation | Rationale | +|---------|----------------|-----------| +| **Monolithic Frontend** | Single Next.js application | Simplicity for MLP, easier deployment | +| **API Routes** | Next.js serverless functions | Built-in, no separate backend needed | +| **Real-time Communication** | WebSockets for audio streaming | Low latency for voice transcription | +| **Database-per-Service** | Single Postgres database | Simplified data consistency | +| **Stateless Services** | JWT-based sessions | Horizontal scalability | +| **Event-Driven** | WebSocket events for transcription | Reactive UI updates | + +### 1.3 Component Responsibilities + +#### **Next.js Application** +- Serve React frontend +- Handle authentication flow +- Manage template/form CRUD operations +- Proxy WebSocket connections to Whisper +- Generate PDFs +- Send emails via Gmail API +- Enforce access control + +#### **Whisper Service** +- Accept WebSocket connections +- Receive audio chunks +- Perform speech-to-text transcription +- Return partial/final transcripts +- Handle multiple concurrent connections + +#### **Supabase** +- Store user accounts +- Store templates and form sessions +- Store PDFs (via Storage) +- Enforce Row Level Security (RLS) +- Handle database migrations + +#### **OpenAI API** +- LLM-based field normalization +- Ambiguity resolution +- Natural language date parsing + +#### **Gmail API** +- Send emails with PDF attachments +- Manage OAuth tokens + +--- + +## 2. Detailed Component Design + +### 2.1 Frontend Architecture + +``` +app/ +├── (public)/ +│ ├── page.tsx # Landing page +│ └── layout.tsx # Public layout (no auth) +├── (protected)/ +│ ├── dashboard/ +│ │ └── page.tsx # Dashboard +│ ├── templates/ +│ │ ├── new/ +│ │ │ └── page.tsx # Create template +│ │ └── [id]/ +│ │ └── page.tsx # Edit template +│ ├── forms/ +│ │ └── [sessionId]/ +│ │ ├── page.tsx # Voice completion +│ │ └── review/ +│ │ └── page.tsx # Review & edit +│ └── layout.tsx # Protected layout (auth check) +├── api/ +│ ├── auth/ +│ │ ├── [...nextauth]/ +│ │ │ └── route.ts # NextAuth config +│ │ └── callback/ +│ │ └── route.ts # OAuth callback +│ ├── templates/ +│ │ ├── route.ts # GET (list), POST (create) +│ │ └── [id]/ +│ │ └── route.ts # GET, PUT, DELETE +│ ├── forms/ +│ │ ├── create/ +│ │ │ └── route.ts # POST (start session) +│ │ └── [sessionId]/ +│ │ ├── route.ts # GET (session details) +│ │ ├── update/ +│ │ │ └── route.ts # POST (save field) +│ │ ├── complete/ +│ │ │ └── route.ts # POST (mark complete) +│ │ ├── pdf/ +│ │ │ └── route.ts # POST (generate PDF) +│ │ └── email/ +│ │ └── route.ts # POST (send email) +│ └── transcribe/ +│ └── route.ts # WebSocket endpoint +├── components/ +│ ├── auth/ +│ │ └── SignInButton.tsx +│ ├── dashboard/ +│ │ ├── TemplateCard.tsx +│ │ └── RecentSessions.tsx +│ ├── templates/ +│ │ ├── TemplateEditor.tsx +│ │ ├── SectionEditor.tsx +│ │ └── FieldEditor.tsx +│ ├── forms/ +│ │ ├── VoiceRecorder.tsx +│ │ ├── TranscriptDisplay.tsx +│ │ ├── FieldPrompt.tsx +│ │ ├── ProgressIndicator.tsx +│ │ └── NavigationControls.tsx +│ ├── review/ +│ │ ├── FormReview.tsx +│ │ ├── EditableField.tsx +│ │ └── ValidationStatus.tsx +│ └── shared/ +│ ├── Button.tsx +│ ├── Input.tsx +│ └── Modal.tsx +├── lib/ +│ ├── supabase.ts # Supabase client +│ ├── openai.ts # OpenAI client +│ ├── gmail.ts # Gmail API wrapper +│ ├── validation.ts # Field validation logic +│ ├── normalization.ts # Value normalization +│ └── pdf-generator.ts # PDF creation +├── types/ +│ ├── template.ts +│ ├── form.ts +│ └── user.ts +└── middleware.ts # Auth middleware +``` + +### 2.2 Key React Components + +#### VoiceRecorder Component +```typescript +interface VoiceRecorderProps { + onTranscript: (text: string, isFinal: boolean) => void; + onError: (error: Error) => void; + fieldHint?: string; +} + +// State: +// - isRecording: boolean +// - audioContext: AudioContext +// - mediaStream: MediaStream +// - websocket: WebSocket + +// Methods: +// - startRecording(): Initialize mic, connect WebSocket +// - stopRecording(): Close connections, finalize transcript +// - sendAudioChunk(chunk: ArrayBuffer): Send to Whisper +``` + +#### TemplateEditor Component +```typescript +interface TemplateEditorProps { + templateId?: string; // undefined for new template +} + +// State: +// - template: Template +// - sections: Section[] +// - isDirty: boolean +// - validationErrors: ValidationError[] + +// Methods: +// - addSection() +// - removeSection(id) +// - addField(sectionId) +// - updateField(sectionId, fieldId, updates) +// - saveTemplate() +// - validateSchema(): ValidationError[] +``` + +#### FormReview Component +```typescript +interface FormReviewProps { + sessionId: string; +} + +// State: +// - values: FormValue[] +// - editingFieldKey: string | null +// - isGenerating: boolean + +// Methods: +// - editField(fieldKey) +// - saveFieldEdit(fieldKey, newValue) +// - generateAndSendPDF(recipientEmail) +``` + +### 2.3 Backend Services + +#### Validation Service (`lib/validation.ts`) +```typescript +interface ValidationResult { + isValid: boolean; + normalizedValue: any; + status: 'ok' | 'warning' | 'error'; + message?: string; +} + +class FieldValidator { + async validate( + rawValue: string, + fieldSchema: FieldSchema, + context?: ValidationContext + ): Promise { + // 1. Deterministic validation by type + // 2. Check constraints + // 3. If ambiguous, call LLM + // 4. Return result + } + + private validateString(value: string, constraints: Constraints): ValidationResult + private validateNumber(value: string, constraints: Constraints): ValidationResult + private validateDate(value: string, constraints: Constraints): ValidationResult + private validateEnum(value: string, constraints: Constraints): ValidationResult + private async validateWithLLM(value: string, schema: FieldSchema): Promise +} +``` + +#### PDF Generator (`lib/pdf-generator.ts`) +```typescript +interface PDFOptions { + sessionId: string; + templateName: string; + values: FormValue[]; + schema: TemplateSchema; + generatedBy: string; + generatedAt: Date; +} + +async function generatePDF(options: PDFOptions): Promise<{ + path: string; + sizeBytes: number; +}> { + // Using @react-pdf/renderer: + // 1. Create PDF document structure + // 2. Render header with metadata + // 3. Iterate sections and fields + // 4. Style and format + // 5. Save to storage + // 6. Return path +} +``` + +#### Gmail Service (`lib/gmail.ts`) +```typescript +interface EmailOptions { + to: string; + subject: string; + body: string; + attachmentPath: string; + attachmentName: string; +} + +class GmailService { + private oauthClient: OAuth2Client; + + async sendEmail(options: EmailOptions): Promise { + // 1. Get user's OAuth token + // 2. Refresh if expired + // 3. Create MIME message with attachment + // 4. Send via Gmail API + // 5. Handle errors/retry logic + } + + async getAuthUrl(): string // For OAuth flow + async handleCallback(code: string): Promise +} +``` + +### 2.4 Whisper Service Architecture + +```python +# whisper_service.py +import asyncio +import websockets +import whisper +import numpy as np + +class WhisperServer: + def __init__(self, model_name='base.en'): + self.model = whisper.load_model(model_name) + self.connections = set() + + async def handle_connection(self, websocket, path): + """Handle WebSocket connection from client.""" + self.connections.add(websocket) + audio_buffer = [] + + try: + async for message in websocket: + # Receive binary audio chunk + audio_chunk = np.frombuffer(message, dtype=np.float32) + audio_buffer.append(audio_chunk) + + # Every N chunks, send partial transcript + if len(audio_buffer) >= 10: # ~1 second of audio + audio = np.concatenate(audio_buffer) + result = self.model.transcribe(audio) + await websocket.send(json.dumps({ + 'type': 'partial', + 'text': result['text'] + })) + + # Connection closed, send final transcript + if audio_buffer: + audio = np.concatenate(audio_buffer) + result = self.model.transcribe(audio) + await websocket.send(json.dumps({ + 'type': 'final', + 'text': result['text'] + })) + + except Exception as e: + await websocket.send(json.dumps({ + 'type': 'error', + 'message': str(e) + })) + finally: + self.connections.remove(websocket) + + def run(self, host='0.0.0.0', port=8765): + start_server = websockets.serve(self.handle_connection, host, port) + asyncio.get_event_loop().run_until_complete(start_server) + asyncio.get_event_loop().run_forever() + +if __name__ == '__main__': + server = WhisperServer(model_name='base.en') + server.run() +``` + +**Deployment:** +- Docker container with Python 3.10+ +- Pre-loaded Whisper model in image +- Exposed on port 8765 +- Environment variables: `WHISPER_MODEL`, `PORT` + +--- + +## 3. Database Design + +### 3.1 Entity-Relationship Diagram + +``` +┌──────────────┐ +│ users │ +│──────────────│ +│ id (PK) │───┐ +│ auth_prov... │ │ +│ email │ │ +│ name │ │ +│ created_at │ │ +└──────────────┘ │ + │ + ┌────────┴────────┐ + │ │ +┌─────────▼──────┐ ┌───────▼─────────┐ +│ templates │ │ form_sessions │ +│────────────────│ │─────────────────│ +│ id (PK) │◄─┤ id (PK) │ +│ owner_id (FK) │ │ template_id (FK)│ +│ name │ │ user_id (FK) │ +│ schema (jsonb) │ │ status │ +│ created_at │ │ created_at │ +└────────────────┘ └────────┬────────┘ + │ + ┌────────┴────────────┐ + │ │ + ┌───────────▼──────────┐ ┌───────▼────────┐ + │ form_session_values │ │ pdf_documents │ + │──────────────────────│ │────────────────│ + │ id (PK) │ │ id (PK) │ + │ session_id (FK) │ │ session_id (FK)│ + │ field_key │ │ path_or_url │ + │ value_raw │ │ file_size_bytes│ + │ value_normalized │ │ created_at │ + │ validation_status │ └────────────────┘ + │ created_at │ + └──────────────────────┘ +``` + +### 3.2 Table Definitions (SQL) + +```sql +-- Enable UUID extension +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; + +-- Users table +CREATE TABLE users ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + auth_provider_id TEXT UNIQUE NOT NULL, + email TEXT UNIQUE NOT NULL, + name TEXT, + avatar_url TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + last_login_at TIMESTAMPTZ +); + +CREATE INDEX idx_users_email ON users(email); +CREATE INDEX idx_users_auth_provider ON users(auth_provider_id); + +-- Templates table +CREATE TABLE templates ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + owner_user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + name TEXT NOT NULL, + description TEXT, + schema JSONB NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + deleted_at TIMESTAMPTZ, + + CONSTRAINT valid_schema CHECK (jsonb_typeof(schema) = 'object') +); + +CREATE INDEX idx_templates_owner ON templates(owner_user_id); +CREATE INDEX idx_templates_deleted ON templates(deleted_at) WHERE deleted_at IS NULL; + +-- Form sessions table +CREATE TABLE form_sessions ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + template_id UUID NOT NULL REFERENCES templates(id) ON DELETE RESTRICT, + user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + status TEXT NOT NULL DEFAULT 'draft', + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + completed_at TIMESTAMPTZ, + + CONSTRAINT valid_status CHECK (status IN ('draft', 'in_progress', 'completed', 'sent')) +); + +CREATE INDEX idx_sessions_user ON form_sessions(user_id); +CREATE INDEX idx_sessions_template ON form_sessions(template_id); +CREATE INDEX idx_sessions_status ON form_sessions(status); + +-- Form session values table +CREATE TABLE form_session_values ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + session_id UUID NOT NULL REFERENCES form_sessions(id) ON DELETE CASCADE, + field_key TEXT NOT NULL, + value_raw TEXT, + value_normalized JSONB, + validation_status TEXT DEFAULT 'ok', + validation_message TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + + CONSTRAINT valid_validation_status CHECK (validation_status IN ('ok', 'warning', 'error')), + CONSTRAINT unique_session_field UNIQUE (session_id, field_key) +); + +CREATE INDEX idx_values_session ON form_session_values(session_id); + +-- PDF documents table +CREATE TABLE pdf_documents ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + session_id UUID NOT NULL REFERENCES form_sessions(id) ON DELETE CASCADE, + path_or_url TEXT NOT NULL, + file_size_bytes INTEGER, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX idx_pdfs_session ON pdf_documents(session_id); + +-- Update timestamps trigger +CREATE OR REPLACE FUNCTION update_updated_at_column() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER update_users_updated_at BEFORE UPDATE ON users + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER update_templates_updated_at BEFORE UPDATE ON templates + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER update_sessions_updated_at BEFORE UPDATE ON form_sessions + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER update_values_updated_at BEFORE UPDATE ON form_session_values + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); +``` + +### 3.3 Row Level Security (RLS) Policies + +```sql +-- Enable RLS +ALTER TABLE users ENABLE ROW LEVEL SECURITY; +ALTER TABLE templates ENABLE ROW LEVEL SECURITY; +ALTER TABLE form_sessions ENABLE ROW LEVEL SECURITY; +ALTER TABLE form_session_values ENABLE ROW LEVEL SECURITY; +ALTER TABLE pdf_documents ENABLE ROW LEVEL SECURITY; + +-- Users: Can only read own record +CREATE POLICY users_read_own ON users + FOR SELECT USING (auth.uid() = id); + +-- Templates: Owner can do anything +CREATE POLICY templates_owner_all ON templates + FOR ALL USING (owner_user_id = auth.uid()); + +-- Form sessions: User can manage own sessions +CREATE POLICY sessions_user_all ON form_sessions + FOR ALL USING (user_id = auth.uid()); + +-- Form values: Can access if owns session +CREATE POLICY values_via_session ON form_session_values + FOR ALL USING ( + EXISTS ( + SELECT 1 FROM form_sessions + WHERE form_sessions.id = form_session_values.session_id + AND form_sessions.user_id = auth.uid() + ) + ); + +-- PDFs: Can access if owns session +CREATE POLICY pdfs_via_session ON pdf_documents + FOR ALL USING ( + EXISTS ( + SELECT 1 FROM form_sessions + WHERE form_sessions.id = pdf_documents.session_id + AND form_sessions.user_id = auth.uid() + ) + ); +``` + +### 3.4 Sample Data + +```sql +-- Insert sample user +INSERT INTO users (auth_provider_id, email, name) VALUES + ('google_12345', 'john@example.com', 'John Doe'); + +-- Insert sample template +INSERT INTO templates (owner_user_id, name, description, schema) VALUES + ( + (SELECT id FROM users WHERE email = 'john@example.com'), + 'Accident Report', + 'Template for workplace accident reporting', + '{ + "sections": [ + { + "section_name": "Incident Details", + "fields": [ + { + "label": "Date of Incident", + "field_key": "incident_date", + "type": "date", + "constraints": {"required": true}, + "hint": "When did the incident occur?" + }, + { + "label": "Description", + "field_key": "description", + "type": "paragraph", + "constraints": {"required": true, "min_length": 10}, + "hint": "Describe what happened in detail" + } + ] + } + ] + }'::jsonb + ); +``` + +--- + +## 4. Sequence Diagrams + +### 4.1 Authentication Flow + +``` +User Browser Next.js Google Supabase + │ │ │ │ │ + │──Click "Sign In"──>│ │ │ │ + │ │──GET /api/auth/signin──>│ │ + │ │ │──Redirect─>│ │ + │ │<─────OAuth Dialog────────│ │ + │──Authorize──>│ │ │ │ + │ │──────Auth Code────────> │ │ + │ │ │<─Tokens────│ │ + │ │ │──Create/Update User───>│ + │ │ │<────User Record────────│ + │ │<──Set Cookie + Redirect─│ │ + │ │──GET /dashboard──────> │ │ + │<─Dashboard──>│<────HTML───────────────│ │ +``` + +### 4.2 Voice Form Completion Flow + +``` +Browser Next.js Whisper OpenAI Supabase + │ │ │ │ │ + │──Start Recording──> │ │ │ + │──Audio Chunks─────────────> │ │ + │ │<──Partial Transcript─│ │ + │<─Display────│ │ │ │ + │──Stop Recording──> │ │ │ + │ │<───Final Transcript──│ │ + │ │──Validate──> │ │ + │ │──Is Ambiguous? │ │ + │ │────Yes──────────────>│ │ + │ │<──Normalized Value───│ │ + │<─Show Value─│ │ │ │ + │──Accept─────> │ │ │ + │ │──Save Field──────────────────────>│ + │ │<─────Success─────────────────────│ + │<─Next Field─│ │ │ │ +``` + +### 4.3 PDF Generation & Email Flow + +``` +Browser Next.js Supabase Gmail API Storage + │ │ │ │ │ + │──Click "Generate & Send"──>│ │ │ + │ │──Fetch Session Data─────>│ │ + │ │<───Values────│ │ │ + │ │──Generate PDF│ │ │ + │ │──Save PDF────────────────────────────>│ + │ │<──PDF URL────────────────────────────│ + │ │──Insert pdf_documents──>│ │ + │ │──Prepare Email──> │ │ + │ │──Attach PDF──> │ │ + │ │──────────────────────────>│ │ + │ │<──────Send Success───────│ │ + │ │──Update Session Status──>│ │ + │<─Success────│ │ │ │ +``` + +### 4.4 Template Creation Flow + +``` +Browser Next.js Supabase + │ │ │ + │──Fill Form─>│ │ + │──Add Sections/Fields───> │ + │──Click "Save"────────────>│ + │ │──Validate Schema + │ │──INSERT INTO templates───>│ + │ │<────Template ID───────────│ + │<─Redirect to /dashboard──│ │ +``` + +--- + +## 5. API Design + +### 5.1 REST API Endpoints + +#### Authentication +``` +POST /api/auth/signin + → Redirect to Google OAuth + +GET /api/auth/callback?code=... + ← 302 Redirect to /dashboard + ← Set-Cookie: session_token + +POST /api/auth/signout + ← 200 OK + ← Clear-Cookie +``` + +#### Templates +``` +GET /api/templates?limit=20&offset=0 + ← 200 OK + ← [{id, name, description, created_at, num_sessions}] + +POST /api/templates + ← 201 Created + ← {id, name, description, schema, created_at} + +GET /api/templates/:id + ← 200 OK + ← {id, name, description, schema, owner_user_id, created_at} + +PUT /api/templates/:id + ← 200 OK + ← {id, name, description, schema, updated_at} + +DELETE /api/templates/:id + ← 204 No Content +``` + +#### Form Sessions +``` +POST /api/forms/create + Body: {template_id} + ← 201 Created + ← {session_id, redirect_url: "/forms/:session_id"} + +GET /api/forms/:sessionId + ← 200 OK + ← { + session: {id, status, created_at}, + template: {name, schema}, + values: [{field_key, value_normalized, validation_status}] + } + +POST /api/forms/:sessionId/update + Body: {field_key, value_raw, value_normalized} + ← 200 OK + ← {validation_status, validation_message} + +POST /api/forms/:sessionId/complete + ← 200 OK + ← {redirect_url: "/forms/:sessionId/review"} + +POST /api/forms/:sessionId/pdf + ← 201 Created + ← {pdf_url, document_id} + +POST /api/forms/:sessionId/email + Body: {recipient_email, subject, message} + ← 200 OK + ← {sent: true} +``` + +### 5.2 WebSocket Protocol + +**Endpoint:** `wss://app.voicedform.com/api/transcribe?session_id=...` + +**Client → Server Messages:** +```json +// Binary audio chunk (ArrayBuffer) +[Float32Array audio data] +``` + +**Server → Client Messages:** +```json +// Partial transcript +{ + "type": "partial", + "text": "This is a partial trans", + "timestamp": "2025-11-14T10:30:00Z" +} + +// Final transcript +{ + "type": "final", + "text": "This is a partial transcript.", + "confidence": 0.95, + "timestamp": "2025-11-14T10:30:02Z" +} + +// Error +{ + "type": "error", + "message": "Transcription service unavailable", + "code": "SERVICE_ERROR" +} +``` + +### 5.3 Error Response Format + +All API errors follow this structure: +```json +{ + "error": { + "code": "VALIDATION_ERROR", + "message": "Template schema is invalid", + "details": { + "field": "schema.sections[0].fields[1].type", + "issue": "Invalid field type 'textarea'. Must be one of: string, paragraph, number, date, enum" + } + } +} +``` + +**Standard Error Codes:** +- `AUTH_REQUIRED`: 401 Unauthorized +- `FORBIDDEN`: 403 Forbidden +- `NOT_FOUND`: 404 Not Found +- `VALIDATION_ERROR`: 400 Bad Request +- `CONFLICT`: 409 Conflict +- `INTERNAL_ERROR`: 500 Internal Server Error + +--- + +## 6. Security Architecture + +### 6.1 Authentication Flow + +``` +1. User clicks "Sign in with Google" +2. Next.js redirects to Google OAuth consent page +3. User authorizes application +4. Google redirects to /api/auth/callback with code +5. Next.js exchanges code for tokens +6. Next.js creates/updates user in Supabase +7. Next.js issues JWT session token (HTTP-only cookie) +8. Subsequent requests include cookie for auth +``` + +### 6.2 Authorization Model + +| Resource | Rule | +|----------|------| +| Templates | User can only access own templates | +| Form Sessions | User can only access sessions they created | +| PDF Documents | User can only download PDFs for their sessions | +| Admin Routes | N/A (no admin for MLP) | + +**Implementation:** +- Middleware checks session token on all `/dashboard`, `/templates`, `/forms` routes +- API routes validate user ID from token matches resource owner +- Supabase RLS enforces at database level + +### 6.3 Data Protection + +| Data Type | Protection | +|-----------|------------| +| Passwords | N/A (OAuth only) | +| Session Tokens | HTTP-only, Secure, SameSite=Lax cookies | +| API Keys (OpenAI, Gmail) | Environment variables, never sent to client | +| User Data | Encrypted at rest (Supabase default) | +| Audio Streams | TLS in transit, not persisted | +| PDFs | Supabase Storage with signed URLs (1-hour expiry) | + +### 6.4 Threat Model & Mitigations + +| Threat | Mitigation | +|--------|------------| +| XSS | React automatic escaping, CSP headers | +| CSRF | SameSite cookies, custom headers for mutations | +| SQL Injection | Parameterized queries via Supabase client | +| Auth Token Theft | HTTP-only cookies, short expiry (7 days) | +| Unauthorized Access | RLS policies, server-side auth checks | +| DDoS on Whisper | Rate limiting, WebSocket connection limits | +| Prompt Injection (LLM) | Input sanitization, system prompt hardening | + +--- + +## 7. Performance Optimization + +### 7.1 Frontend Optimizations + +| Technique | Implementation | +|-----------|----------------| +| Code Splitting | Next.js automatic route-based splitting | +| Lazy Loading | `React.lazy()` for heavy components (PDF preview) | +| Image Optimization | Next.js `` component | +| Caching | `Cache-Control` headers for static assets | +| Prefetching | `` for dashboard navigation | + +### 7.2 Backend Optimizations + +| Technique | Implementation | +|-----------|----------------| +| Database Indexing | All foreign keys and frequently queried fields | +| Connection Pooling | Supabase handles automatically | +| Query Optimization | Select only needed columns, avoid N+1 queries | +| Caching | Redis for session data (future enhancement) | +| API Rate Limiting | `next-rate-limit` middleware | + +### 7.3 Audio/Transcription Optimizations + +| Technique | Implementation | +|-----------|----------------| +| Audio Compression | Use Opus codec if possible | +| Chunking | 1-second chunks to balance latency/accuracy | +| Model Selection | Use `tiny.en` for speed vs `base.en` for accuracy | +| Batching | Process multiple chunks together when possible | +| Connection Reuse | Keep WebSocket alive across fields | + +### 7.4 Performance Targets + +| Metric | Target | Measurement | +|--------|--------|-------------| +| Time to Interactive (TTI) | <3s | Lighthouse | +| API Response Time (p95) | <500ms | Server logs | +| Transcription Latency (p95) | <1.5s | Client-side timing | +| LLM Validation (p95) | <3s | Server logs | +| PDF Generation | <5s | Server logs | +| Database Query (p95) | <100ms | Supabase dashboard | + +--- + +## 8. Deployment Architecture + +### 8.1 Hosting Plan + +| Component | Platform | Rationale | +|-----------|----------|-----------| +| Next.js App | Vercel | Built for Next.js, easy deployment, edge functions | +| Supabase | Supabase Cloud | Managed Postgres + Storage, generous free tier | +| Whisper Service | Modal | Serverless GPU, WebSocket support, auto-scaling | +| OpenAI API | OpenAI Cloud | Pay-per-use | +| Gmail API | Google Cloud | Free within limits | + +### 8.2 Environment Variables + +**Next.js (.env.local):** +```bash +# Supabase +NEXT_PUBLIC_SUPABASE_URL=https://xxx.supabase.co +NEXT_PUBLIC_SUPABASE_ANON_KEY=eyJ... +SUPABASE_SERVICE_ROLE_KEY=eyJ... + +# NextAuth +NEXTAUTH_URL=https://app.voicedform.com +NEXTAUTH_SECRET=random_32_char_string + +# Google OAuth +GOOGLE_CLIENT_ID=xxx.apps.googleusercontent.com +GOOGLE_CLIENT_SECRET=GOCSPX-xxx + +# OpenAI +OPENAI_API_KEY=sk-xxx + +# Gmail API +GMAIL_CLIENT_ID=xxx.apps.googleusercontent.com +GMAIL_CLIENT_SECRET=GOCSPX-xxx + +# Whisper Service (Modal deployment) +WHISPER_WS_URL=wss://your-workspace--voicedform-whisper-transcribe-websocket.modal.run +``` + +**Note**: Whisper service is deployed to Modal. No additional environment variables needed for Whisper as configuration is handled in the Modal deployment script (`whisper-service/whisper_server.py`). + +### 8.3 Deployment Pipeline + +``` +Developer GitHub Vercel Production + │ │ │ │ + │──git push───> │ │ │ + │ │──Webhook────>│ │ + │ │ │──Build Next.js │ + │ │ │──Run Tests │ + │ │ │──Deploy Edge───>│ + │ │ │<──Success──────│ + │<───Notification────────────────│ │ +``` + +**Whisper Service Deployment (Modal):** +```bash +# Install Modal CLI +pip install modal + +# Authenticate with Modal +modal token new + +# Deploy Whisper service +cd whisper-service +modal deploy whisper_server.py + +# Get WebSocket endpoint URL from deployment output +# Example: wss://your-workspace--voicedform-whisper-transcribe-websocket.modal.run +``` + +See `/whisper-service/README.md` for detailed configuration, monitoring, and troubleshooting. + +### 8.4 Monitoring & Logging + +| Service | Tool | Metrics | +|---------|------|---------| +| Frontend | Vercel Analytics | Core Web Vitals, page views | +| Backend | Vercel Logs | API errors, response times | +| Database | Supabase Dashboard | Query performance, connection count | +| Whisper | Modal Dashboard & Logs | Transcription errors, latency, GPU usage, costs | +| Errors | Sentry (optional) | Exception tracking | + +--- + +## 9. Data Flow Examples + +### 9.1 Creating a Template + +``` +1. User fills out template form: + - Name: "Accident Report" + - Description: "Workplace accident reporting" + - Adds section "Incident Details" + - Adds fields: incident_date (date), description (paragraph) + +2. User clicks "Save Template" + +3. Browser sends POST /api/templates: + { + "name": "Accident Report", + "description": "Workplace accident reporting", + "schema": { + "sections": [{ + "section_name": "Incident Details", + "fields": [ + { + "label": "Date of Incident", + "field_key": "incident_date", + "type": "date", + "constraints": {"required": true}, + "hint": "When did it happen?" + }, + { + "label": "Description", + "field_key": "description", + "type": "paragraph", + "constraints": {"required": true, "min_length": 20}, + "hint": "Describe what happened" + } + ] + }] + } + } + +4. API route validates schema: + - All required fields present + - Field types valid + - No duplicate field_keys + +5. API route inserts into Supabase: + INSERT INTO templates (owner_user_id, name, description, schema) + VALUES (current_user_id, 'Accident Report', '...', {...}) + +6. Returns 201 Created with template ID + +7. Browser redirects to /dashboard +``` + +### 9.2 Completing a Form with Voice + +``` +1. User on /forms/abc-123 page + - Current field: "Date of Incident" (type: date) + - Hint displayed: "When did it happen?" + +2. User clicks "Record" button + +3. Browser captures microphone audio (Web Audio API) + +4. Audio chunks sent via WebSocket to Whisper service: + - Format: Float32Array, 16kHz sample rate + - Chunk size: ~1 second + +5. Whisper processes audio: + - Partial transcripts sent every ~1 second: + {"type": "partial", "text": "It happened on Nov"} + +6. Browser displays partial transcript in real-time + +7. User stops recording + +8. Whisper sends final transcript: + {"type": "final", "text": "It happened on November 10th"} + +9. Next.js validates field: + a. Extract value: "November 10th" + b. Field type is "date" + c. Parse natural language date → 2025-11-10 + d. Check constraints: required=true ✓ + e. Return normalized value + +10. Browser shows: + - Raw: "It happened on November 10th" + - Normalized: "November 10, 2025" + - Validation: ✓ OK + +11. User clicks "Accept" + +12. Browser sends POST /api/forms/abc-123/update: + { + "field_key": "incident_date", + "value_raw": "It happened on November 10th", + "value_normalized": "2025-11-10" + } + +13. API saves to form_session_values table + +14. Browser progresses to next field: "Description" +``` + +### 9.3 Generating and Sending PDF + +``` +1. User completes all fields, on /forms/abc-123/review + +2. User edits one field inline (fixes typo) + - PATCH request updates form_session_values + +3. User enters recipient email: "safety@company.com" + +4. User clicks "Generate PDF & Send Email" + +5. Browser sends POST /api/forms/abc-123/email: + { + "recipient_email": "safety@company.com", + "subject": "Accident Report - November 10, 2025" + } + +6. API route: + a. Fetch session data (template + values) + b. Call PDF generator: + - Render React PDF component + - Layout sections and fields + - Add metadata header + c. Save PDF to Supabase Storage: + - Bucket: "form-pdfs" + - Path: "abc-123/report_20251114.pdf" + d. Insert record in pdf_documents table + e. Get signed URL (1-hour expiry) + f. Call Gmail API: + - Create MIME message + - Attach PDF from signed URL + - Send to recipient + g. Update form_sessions.status = 'sent' + +7. Return 200 OK + +8. Browser shows success message, redirects to /dashboard +``` + +--- + +## 10. Error Handling Strategy + +### 10.1 Frontend Error Boundaries + +```typescript +// app/error.tsx (Global error boundary) +'use client' + +export default function Error({ + error, + reset, +}: { + error: Error & { digest?: string } + reset: () => void +}) { + return ( +
+

Something went wrong!

+

{error.message}

+ +
+ ) +} +``` + +### 10.2 API Error Handling + +```typescript +// lib/api-error.ts +export class APIError extends Error { + constructor( + public statusCode: number, + public code: string, + message: string, + public details?: any + ) { + super(message) + } + + toJSON() { + return { + error: { + code: this.code, + message: this.message, + details: this.details, + }, + } + } +} + +// Usage in API route: +if (!template) { + throw new APIError(404, 'NOT_FOUND', 'Template not found') +} +``` + +### 10.3 Graceful Degradation + +| Failure | Fallback | +|---------|----------| +| Whisper service down | Show manual text input field | +| LLM API timeout | Use deterministic validation only | +| Database connection lost | Retry 3x, then show maintenance page | +| PDF generation fails | Allow download as JSON | +| Email send fails | Save PDF, show download link | + +--- + +## 11. Testing Strategy + +### 11.1 Unit Tests +- Field validation logic (`lib/validation.ts`) +- Value normalization functions (`lib/normalization.ts`) +- Schema validation (`lib/template-schema.ts`) + +### 11.2 Integration Tests +- API route handlers (templates, forms CRUD) +- Database operations +- OAuth flow (mocked) + +### 11.3 End-to-End Tests (Future) +- Complete form workflow +- PDF generation +- Email sending (test mode) + +### 11.4 Manual Testing Checklist +- [ ] Sign in with Google works +- [ ] Create template with all field types +- [ ] Start form session +- [ ] Record voice for each field type +- [ ] Transcript accuracy acceptable +- [ ] Normalization works for dates, numbers, enums +- [ ] Edit fields in review mode +- [ ] Generate PDF (verify formatting) +- [ ] Send email (verify receipt) +- [ ] Resume incomplete session + +--- + +## 12. Future Enhancements + +### 12.1 Phase 2 (Post-MLP) +- Multi-language support (Spanish, French) +- Mobile-responsive UI +- Voice output (TTS for confirmations) +- Collaborative forms (multiple users) +- Template versioning +- Advanced analytics dashboard +- Export formats (CSV, Excel) + +### 12.2 Technical Debt to Address +- Add comprehensive test suite +- Implement caching layer (Redis) +- Set up staging environment +- Add performance monitoring (Datadog, New Relic) +- Implement request tracing +- Add database replication for HA +- Set up CDN for static assets + +--- + +**Document Status:** Ready for Implementation +**Next Steps:** Create SPEC.md with task breakdown diff --git a/REQ.md b/REQ.md new file mode 100644 index 0000000..2f9c20b --- /dev/null +++ b/REQ.md @@ -0,0 +1,946 @@ +# VoicedForm MLP - Requirements Document + +**Version:** 1.0 +**Last Updated:** 2025-11-14 +**Status:** Draft + +--- + +## 1. Product Summary + +### 1.1 Overview +VoicedForm is an **internal-only** web application that enables authenticated users to create form templates and complete them using voice input. The system provides real-time transcription, intelligent field validation, and automated PDF generation with email delivery. + +### 1.2 Core Value Proposition +- **Voice-first form completion**: Hands-free data entry using natural language +- **Template-driven workflow**: Reusable form structures for recurring tasks +- **Intelligent validation**: Schema-based validation enhanced by LLM disambiguation +- **Automated delivery**: One-click PDF generation and Gmail distribution + +### 1.3 Key Capabilities +1. Create and manage form templates +2. Complete forms using voice input with real-time transcription +3. Deterministic validation with LLM-assisted ambiguity resolution +4. Review and correct captured data +5. Generate formatted PDFs +6. Send completed forms via Gmail + +### 1.4 Constraints +- **Internal use only**: No public access +- **English only**: Single language for MLP +- **Voice input only**: No voice output/TTS +- **Google authentication**: Single sign-on provider + +--- + +## 2. User Roles & Access + +### 2.1 Authenticated User +**Who:** Internal team members with Google accounts +**Access Level:** Full application access after OAuth +**Capabilities:** +- Create, edit, and delete own templates +- Start form sessions from any template +- Complete forms using voice input +- Review, edit, and submit forms +- Generate and send PDFs via email + +### 2.2 Unauthenticated Visitor +**Who:** Anyone accessing the application URL +**Access Level:** Landing page only +**Capabilities:** +- View landing page +- Sign in with Google + +--- + +## 3. Functional Requirements + +### 3.1 Authentication (FR-AUTH) + +#### FR-AUTH-001: Google OAuth Sign-In +- **Description:** Users must authenticate using Google OAuth 2.0 +- **Flow:** + 1. User clicks "Sign in with Google" on landing page + 2. OAuth flow initiated with Google + 3. On success, user record created/updated in database + 4. Session token issued + 5. User redirected to /dashboard +- **Acceptance Criteria:** + - First-time users create new user record + - Returning users update last_login timestamp + - Invalid/cancelled auth returns user to landing page with error + +#### FR-AUTH-002: Session Management +- **Description:** Maintain user session across requests +- **Requirements:** + - Session persists for 7 days + - Auto-logout on expiry + - Secure HTTP-only cookies +- **Acceptance Criteria:** + - Protected routes redirect to landing if unauthenticated + - Session renewal on active use + +#### FR-AUTH-003: Logout +- **Description:** Users can manually sign out +- **Requirements:** + - Clear session token + - Redirect to landing page +- **Acceptance Criteria:** + - All subsequent requests require re-authentication + +### 3.2 Template Management (FR-TMPL) + +#### FR-TMPL-001: Create Template +- **Description:** Users can create new form templates +- **Fields:** + - Template name (required, max 100 chars) + - Description (optional, max 500 chars) + - Sections (array) + - Fields (array) +- **Acceptance Criteria:** + - Template saved to database with UUID + - Owner set to current user + - Timestamp recorded + - Redirect to template editor on success + +#### FR-TMPL-002: Template Schema Definition +- **Description:** Templates define structured form schemas +- **Field Properties:** + - `label` (string): Display name + - `field_key` (string): Unique identifier within template + - `type` (enum): `string | paragraph | number | date | enum` + - `constraints` (object): Validation rules + - `required` (boolean) + - `min_length` / `max_length` (number, for strings) + - `min` / `max` (number, for numbers) + - `enum_values` (array, for enum type) + - `date_format` (string, for dates) + - `hint` (string): User guidance text +- **Section Properties:** + - `section_name` (string) + - `fields` (array) +- **Acceptance Criteria:** + - Schema stored as JSONB in Postgres + - Schema validated on save + - Duplicate field_keys rejected + +#### FR-TMPL-003: Edit Template +- **Description:** Users can modify existing templates +- **Operations:** + - Update name/description + - Add/remove sections + - Add/remove/reorder fields + - Modify field constraints +- **Acceptance Criteria:** + - Only owner can edit + - Changes versioned (updated_at timestamp) + - Active form sessions unaffected by template changes + +#### FR-TMPL-004: Delete Template +- **Description:** Users can delete templates they own +- **Constraints:** + - Cannot delete templates with active (draft/in_progress) sessions + - Completed sessions remain in database +- **Acceptance Criteria:** + - Confirmation dialog required + - Soft delete (set deleted_at timestamp) + - Removed from dashboard listing + +#### FR-TMPL-005: List Templates +- **Description:** Dashboard displays user's templates +- **Display:** + - Template name + - Created date + - Last modified date + - Number of completed sessions +- **Sorting:** + - Default: Most recently modified first +- **Acceptance Criteria:** + - Only user's own templates shown + - Deleted templates excluded + +### 3.3 Voice-Driven Form Completion (FR-FORM) + +#### FR-FORM-001: Create Form Session +- **Description:** User initiates form completion from template +- **Flow:** + 1. User selects template from dashboard + 2. System creates `form_session` record + 3. Status set to "draft" + 4. User redirected to `/forms/[sessionId]` +- **Acceptance Criteria:** + - Session linked to template and user + - Timestamp recorded + - All template fields initialized + +#### FR-FORM-002: Voice Recording +- **Description:** Capture audio from user microphone +- **Requirements:** + - Browser-based audio capture using Web Audio API + - Record in chunks (e.g., 1-second intervals) + - Visual recording indicator (animated button) + - Manual start/stop control +- **Acceptance Criteria:** + - Audio format compatible with Whisper (WAV/PCM preferred) + - Sample rate: 16kHz minimum + - Recording can be stopped and restarted + - No audio persisted to disk + +#### FR-FORM-003: Real-Time Transcription +- **Description:** Convert speech to text using server-hosted Whisper +- **Architecture:** + - WebSocket connection between browser and Whisper service + - Streaming audio chunks sent to server + - Partial/final transcripts returned +- **Requirements:** + - Target latency: <1.5 seconds + - English model (Tiny or Base) + - Handle connection failures gracefully +- **Acceptance Criteria:** + - Transcript displayed in real-time + - Final transcript used for field population + - Errors shown to user with retry option + +#### FR-FORM-004: Field Normalization +- **Description:** Convert raw transcript to typed field value +- **Process:** + 1. Receive transcript from Whisper + 2. Extract field value based on type + 3. Apply normalization rules + 4. Validate against constraints + 5. Display normalized value to user +- **Type-Specific Normalization:** + - **string**: Trim whitespace, capitalize if appropriate + - **paragraph**: Preserve formatting, remove filler words + - **number**: Extract numeric value, validate range + - **date**: Parse natural language dates ("tomorrow", "next Friday") + - **enum**: Match to closest allowed value +- **Acceptance Criteria:** + - Normalized value matches field type + - Invalid values flagged with warning + - User can accept or re-record + +#### FR-FORM-005: LLM-Assisted Disambiguation +- **Description:** Use LLM when deterministic parsing fails +- **Trigger Conditions:** + - Multiple possible interpretations + - Ambiguous date/time references + - Unclear enum selection + - Confidence score below threshold +- **Process:** + 1. Send to LLM with: + - Field schema (type, constraints, hint) + - Raw transcript + - Context (previous fields if relevant) + 2. LLM returns: + - Normalized value + - Confidence score + - Explanation + 3. Display to user for confirmation +- **Acceptance Criteria:** + - LLM only invoked when needed (cost optimization) + - User always has final approval + - Explanation shown for transparency + +#### FR-FORM-006: Field Navigation +- **Description:** Sequential progression through form fields +- **Controls:** + - **Accept**: Save current value, advance to next field + - **Back**: Return to previous field + - **Re-record**: Discard current attempt, record again + - **Manual Edit**: Type value instead of voice +- **Acceptance Criteria:** + - Progress indicator shows position in form + - Can navigate backward to correct mistakes + - Current field highlighted + - Section headers displayed for context + +#### FR-FORM-007: Save Session State +- **Description:** Persist form progress incrementally +- **Requirements:** + - Each accepted field saved to `form_session_values` + - Session status updated (draft → in_progress) + - Auto-save on field acceptance +- **Acceptance Criteria:** + - User can close browser and resume later + - Partial data not lost on disconnect + - Last modified timestamp updated + +#### FR-FORM-008: Complete Form Session +- **Description:** Mark all fields captured +- **Trigger:** User completes final field +- **Flow:** + 1. Validate all required fields populated + 2. Status changed to "completed" + 3. Redirect to `/forms/[sessionId]/review` +- **Acceptance Criteria:** + - Cannot complete if required fields missing + - Validation errors shown with field navigation + +### 3.4 Review & Correction (FR-REVIEW) + +#### FR-REVIEW-001: Display Completed Form +- **Description:** Show all captured values in structured view +- **Layout:** + - Organized by sections + - Field label + value pairs + - Validation status indicators (✓/⚠/✗) +- **Acceptance Criteria:** + - All fields visible + - Warnings and errors highlighted + - Read-only by default + +#### FR-REVIEW-002: Inline Editing +- **Description:** Allow direct editing of field values +- **Interaction:** + - Click field to enable edit mode + - Type new value + - Save or cancel +- **Validation:** + - Re-validate on change + - Show errors inline + - Prevent save if validation fails +- **Acceptance Criteria:** + - Edit updates `form_session_values` + - Timestamp updated + - Can edit any field + +#### FR-REVIEW-003: Return to Voice Mode +- **Description:** Navigate back to voice completion from review +- **Requirements:** + - Button to return to `/forms/[sessionId]` + - Resume at last completed field + - Status reverts to "in_progress" +- **Acceptance Criteria:** + - No data lost + - Can switch between modes freely + +### 3.5 PDF Generation (FR-PDF) + +#### FR-PDF-001: Generate PDF Document +- **Description:** Create formatted PDF from form data +- **Requirements:** + - Use template schema for layout + - Professional formatting: + - Header with form name and date + - Sections with clear headings + - Field labels and values + - Page numbers + - Include metadata: + - Generated by (user email) + - Generated at (timestamp) + - Template version +- **Library:** Use `pdfkit`, `puppeteer`, or `react-pdf` +- **Acceptance Criteria:** + - PDF matches form structure + - All values visible and readable + - File size <5MB + +#### FR-PDF-002: Store PDF +- **Description:** Persist generated PDF for reference +- **Storage Options:** + - Supabase Storage (preferred) + - Local filesystem + - S3-compatible storage +- **Naming:** `{sessionId}_{timestamp}.pdf` +- **Acceptance Criteria:** + - URL/path stored in `pdf_documents` table + - PDF accessible for download + - Record linked to session + +### 3.6 Email Delivery (FR-EMAIL) + +#### FR-EMAIL-001: Gmail OAuth Setup +- **Description:** Authenticate with Gmail API +- **Requirements:** + - OAuth 2.0 flow for Gmail access + - Scopes: `gmail.send` + - Credentials stored securely server-side +- **Acceptance Criteria:** + - User authorizes Gmail access once + - Refresh tokens handled automatically + - Revocation supported + +#### FR-EMAIL-002: Send PDF via Email +- **Description:** Email generated PDF as attachment +- **Trigger:** User clicks "Generate PDF + Send Email" on review page +- **Email Content:** + - **To:** Configurable recipient (user input or template default) + - **From:** Authenticated user's Gmail + - **Subject:** Template name + date + - **Body:** Simple message with form summary + - **Attachment:** Generated PDF +- **Acceptance Criteria:** + - Email sent successfully + - User receives confirmation + - Session status set to "sent" + - Error handling for delivery failures + +#### FR-EMAIL-003: Email Confirmation +- **Description:** Show user confirmation before sending +- **Dialog Content:** + - Recipient email + - Subject line + - Preview of attachment name + - Send/Cancel buttons +- **Acceptance Criteria:** + - No emails sent without explicit confirmation + - User can edit recipient before send + - Can download PDF without sending + +--- + +## 4. Routes & Navigation + +### 4.1 Route Map + +| Route | Access | Description | +|-------|--------|-------------| +| `/` | Public | Landing page with sign-in | +| `/dashboard` | Protected | Template list and recent sessions | +| `/templates/new` | Protected | Create new template | +| `/templates/[id]` | Protected | Edit template | +| `/forms/[sessionId]` | Protected | Voice form completion | +| `/forms/[sessionId]/review` | Protected | Review and edit form | +| `/api/auth/callback` | Public | OAuth callback handler | +| `/api/templates` | Protected | Template CRUD endpoints | +| `/api/forms/*` | Protected | Form session endpoints | + +### 4.2 Navigation Flow + +``` +Landing (/) + ├─> Sign in with Google + └─> Dashboard (/dashboard) + ├─> Create Template + │ └─> Template Editor (/templates/[id]) + │ └─> Back to Dashboard + ├─> Start Form (select template) + │ └─> Voice Completion (/forms/[sessionId]) + │ ├─> Review (/forms/[sessionId]/review) + │ │ ├─> Edit inline + │ │ ├─> Back to Voice Completion + │ │ └─> Generate PDF + Send Email + │ │ └─> Dashboard (on success) + │ └─> Save & Exit → Dashboard + └─> Resume Session (from recent list) + └─> Voice Completion or Review (based on status) +``` + +--- + +## 5. Data Model + +### 5.1 Database Schema + +#### Table: `users` +| Column | Type | Constraints | Description | +|--------|------|-------------|-------------| +| `id` | uuid | PRIMARY KEY | User identifier | +| `auth_provider_id` | text | UNIQUE NOT NULL | Google OAuth ID | +| `email` | text | UNIQUE NOT NULL | User email | +| `name` | text | | Display name | +| `avatar_url` | text | | Profile picture URL | +| `created_at` | timestamptz | DEFAULT NOW() | Account creation | +| `updated_at` | timestamptz | DEFAULT NOW() | Last update | +| `last_login_at` | timestamptz | | Last login timestamp | + +#### Table: `templates` +| Column | Type | Constraints | Description | +|--------|------|-------------|-------------| +| `id` | uuid | PRIMARY KEY | Template identifier | +| `owner_user_id` | uuid | FK → users.id | Template creator | +| `name` | text | NOT NULL | Template name | +| `description` | text | | Template description | +| `schema` | jsonb | NOT NULL | Form structure | +| `created_at` | timestamptz | DEFAULT NOW() | Creation timestamp | +| `updated_at` | timestamptz | DEFAULT NOW() | Last modification | +| `deleted_at` | timestamptz | | Soft delete timestamp | + +**Schema JSONB Structure:** +```json +{ + "sections": [ + { + "section_name": "Personal Information", + "fields": [ + { + "label": "Full Name", + "field_key": "full_name", + "type": "string", + "constraints": { + "required": true, + "min_length": 2, + "max_length": 100 + }, + "hint": "Speak your first and last name clearly" + } + ] + } + ] +} +``` + +#### Table: `form_sessions` +| Column | Type | Constraints | Description | +|--------|------|-------------|-------------| +| `id` | uuid | PRIMARY KEY | Session identifier | +| `template_id` | uuid | FK → templates.id | Associated template | +| `user_id` | uuid | FK → users.id | Form completer | +| `status` | text | CHECK(...) | `draft | in_progress | completed | sent` | +| `created_at` | timestamptz | DEFAULT NOW() | Session start | +| `updated_at` | timestamptz | DEFAULT NOW() | Last activity | +| `completed_at` | timestamptz | | Completion timestamp | + +#### Table: `form_session_values` +| Column | Type | Constraints | Description | +|--------|------|-------------|-------------| +| `id` | uuid | PRIMARY KEY | Value identifier | +| `session_id` | uuid | FK → form_sessions.id | Parent session | +| `field_key` | text | NOT NULL | Field identifier from schema | +| `value_raw` | text | | Raw transcript | +| `value_normalized` | jsonb | | Typed value | +| `validation_status` | text | CHECK(...) | `ok | warning | error` | +| `validation_message` | text | | Error/warning details | +| `created_at` | timestamptz | DEFAULT NOW() | First capture | +| `updated_at` | timestamptz | DEFAULT NOW() | Last edit | + +**UNIQUE CONSTRAINT:** `(session_id, field_key)` + +#### Table: `pdf_documents` +| Column | Type | Constraints | Description | +|--------|------|-------------|-------------| +| `id` | uuid | PRIMARY KEY | Document identifier | +| `session_id` | uuid | FK → form_sessions.id | Associated session | +| `path_or_url` | text | NOT NULL | Storage location | +| `file_size_bytes` | integer | | PDF file size | +| `created_at` | timestamptz | DEFAULT NOW() | Generation timestamp | + +### 5.2 Database Indexes +- `users.auth_provider_id` (UNIQUE) +- `users.email` (UNIQUE) +- `templates.owner_user_id` +- `templates.deleted_at` (partial index WHERE deleted_at IS NULL) +- `form_sessions.user_id` +- `form_sessions.template_id` +- `form_sessions.status` +- `form_session_values.session_id` +- `pdf_documents.session_id` + +--- + +## 6. API Endpoints + +### 6.1 Authentication + +#### `POST /api/auth/callback` +- **Description:** Handle OAuth callback from Google +- **Request Body:** Authorization code +- **Response:** Session token, redirect URL +- **Status Codes:** 200 (success), 401 (invalid code) + +#### `POST /api/auth/logout` +- **Description:** End user session +- **Response:** Success confirmation +- **Status Codes:** 200 + +### 6.2 Templates + +#### `GET /api/templates` +- **Description:** List user's templates +- **Query Params:** `limit`, `offset` +- **Response:** Array of templates +- **Status Codes:** 200, 401 + +#### `POST /api/templates` +- **Description:** Create new template +- **Request Body:** `{ name, description, schema }` +- **Response:** Created template with ID +- **Status Codes:** 201, 400 (validation error), 401 + +#### `GET /api/templates/[id]` +- **Description:** Get template details +- **Response:** Template object +- **Status Codes:** 200, 404, 403 (not owner) + +#### `PUT /api/templates/[id]` +- **Description:** Update template +- **Request Body:** Updated fields +- **Response:** Updated template +- **Status Codes:** 200, 400, 404, 403 + +#### `DELETE /api/templates/[id]` +- **Description:** Delete template (soft delete) +- **Response:** Success confirmation +- **Status Codes:** 204, 404, 403, 409 (active sessions exist) + +### 6.3 Form Sessions + +#### `POST /api/forms/create` +- **Description:** Start new form session +- **Request Body:** `{ template_id }` +- **Response:** `{ session_id, redirect_url }` +- **Status Codes:** 201, 400, 404 (template not found) + +#### `GET /api/forms/[sessionId]` +- **Description:** Get session details and current state +- **Response:** Session + values + template schema +- **Status Codes:** 200, 404, 403 + +#### `POST /api/forms/[sessionId]/update` +- **Description:** Save field value +- **Request Body:** `{ field_key, value_raw, value_normalized }` +- **Response:** Validation result +- **Status Codes:** 200, 400 + +#### `POST /api/forms/[sessionId]/complete` +- **Description:** Mark session as completed +- **Response:** Success confirmation +- **Status Codes:** 200, 400 (incomplete fields) + +#### `POST /api/forms/[sessionId]/pdf` +- **Description:** Generate PDF +- **Response:** `{ pdf_url, document_id }` +- **Status Codes:** 201, 500 (generation failed) + +#### `POST /api/forms/[sessionId]/email` +- **Description:** Send PDF via Gmail +- **Request Body:** `{ recipient_email, subject, message }` +- **Response:** Success confirmation +- **Status Codes:** 200, 400, 500 (send failed) + +### 6.4 Whisper Transcription + +#### `WebSocket /api/transcribe` +- **Description:** Streaming audio transcription +- **Messages:** + - Client → Server: Binary audio chunks + - Server → Client: `{ type: "partial" | "final", text: string }` +- **Connection:** Authenticated via query param or cookie + +--- + +## 7. Non-Functional Requirements + +### 7.1 Performance (NFR-PERF) + +#### NFR-PERF-001: Transcription Latency +- **Requirement:** <1.5 seconds from speech end to transcript display +- **Measurement:** Client-side timestamp logging +- **Target:** 95th percentile + +#### NFR-PERF-002: Validation Latency +- **Requirement:** <1 second for deterministic validation +- **Requirement:** <3 seconds for LLM-assisted validation +- **Target:** 95th percentile + +#### NFR-PERF-003: Page Load Time +- **Requirement:** <2 seconds for initial page load (excluding OAuth) +- **Measurement:** Lighthouse performance score >90 + +#### NFR-PERF-004: PDF Generation +- **Requirement:** <5 seconds for forms with <50 fields +- **Requirement:** <10 seconds for larger forms + +### 7.2 Security (NFR-SEC) + +#### NFR-SEC-001: HTTPS Only +- **Requirement:** All traffic over TLS 1.2+ +- **Enforcement:** HTTP → HTTPS redirect +- **HSTS:** Enabled with 1-year max-age + +#### NFR-SEC-002: Secrets Management +- **Requirement:** All secrets server-side only +- **Implementation:** + - Environment variables for API keys + - No secrets in client-side code + - Supabase Row Level Security (RLS) enabled + +#### NFR-SEC-003: Authentication +- **Requirement:** All routes except `/` and `/api/auth/callback` require authentication +- **Session:** HTTP-only, secure, SameSite=Lax cookies +- **Expiry:** 7 days with sliding window + +#### NFR-SEC-004: Data Privacy +- **Requirement:** No raw audio stored on server +- **Requirement:** Transcripts stored only in database +- **Requirement:** PDFs accessible only to session owner + +#### NFR-SEC-005: Input Validation +- **Requirement:** All user inputs sanitized +- **Protection:** XSS prevention, SQL injection prevention (via ORM) + +### 7.3 Observability (NFR-OBS) + +#### NFR-OBS-001: Structured Logging +- **Requirement:** JSON-formatted logs +- **Fields:** + - `timestamp` + - `level` (info, warn, error) + - `event_type` (auth, template_create, session_start, etc.) + - `user_id` + - `session_id` + - `duration_ms` (for operations) + - `error_message` (if applicable) + +#### NFR-OBS-002: Session Tracing +- **Requirement:** Group all logs for a session under single trace ID +- **Implementation:** Pass `session_id` through all operations + +#### NFR-OBS-003: Error Tracking +- **Requirement:** Client-side errors reported to server +- **Implementation:** Global error boundary in React +- **Capture:** + - Error message + - Stack trace + - User context + - Browser/OS info + +### 7.4 Reliability (NFR-REL) + +#### NFR-REL-001: Auto-Save +- **Requirement:** Form progress saved on every field acceptance +- **Recovery:** Users can resume sessions after browser close + +#### NFR-REL-002: Graceful Degradation +- **Requirement:** If Whisper service unavailable, allow manual text input +- **Requirement:** If LLM service unavailable, fall back to deterministic validation only + +#### NFR-REL-003: Database Backups +- **Requirement:** Daily automated backups +- **Retention:** 30 days +- **Recovery:** RPO <24 hours, RTO <4 hours + +### 7.5 Usability (NFR-UX) + +#### NFR-UX-001: Keyboard Navigation +- **Requirement:** All functions accessible via keyboard +- **Shortcuts:** + - `Space` or `R`: Start/stop recording + - `Enter`: Accept field + - `Backspace`: Go back + - `E`: Edit field manually + +#### NFR-UX-002: Accessibility +- **Requirement:** WCAG 2.1 Level AA compliance +- **Implementation:** + - Semantic HTML + - ARIA labels + - Sufficient color contrast (4.5:1 minimum) + - Focus indicators + +#### NFR-UX-003: Responsive Design +- **Requirement:** Functional on desktop (1920x1080) and tablet (1024x768) +- **Note:** Mobile support deferred (keyboard requirement) + +#### NFR-UX-004: Error Messages +- **Requirement:** User-friendly error messages +- **Format:** + - Clear description of problem + - Suggested action + - No technical jargon + +--- + +## 8. Technology Stack + +### 8.1 Frontend +- **Framework:** Next.js 14+ (App Router) +- **Language:** TypeScript 5+ +- **UI Library:** React 19 +- **Styling:** Tailwind CSS (or CSS Modules) +- **State Management:** React Context + hooks +- **Audio Capture:** Web Audio API +- **WebSocket Client:** Native WebSocket or Socket.io-client + +### 8.2 Backend +- **Runtime:** Node.js 20+ +- **API Framework:** Next.js API Routes +- **Database:** Supabase Postgres +- **ORM:** Supabase JS Client +- **Authentication:** NextAuth.js with Google provider +- **WebSocket Server:** ws or Socket.io + +### 8.3 AI/ML Services +- **Transcription:** OpenAI Whisper (Modal deployment) + - Model: `base.en` (configurable: tiny, small, medium) + - Deployment: Modal.com serverless with GPU (T4) + - WebSocket endpoint for real-time transcription + - See `/whisper-service/README.md` for deployment +- **LLM:** OpenAI GPT-4 or GPT-3.5-turbo + - SDK: `openai` npm package + +### 8.4 External Services +- **Email:** Gmail API +- **PDF Generation:** `@react-pdf/renderer` or `puppeteer` +- **File Storage:** Supabase Storage + +### 8.5 Development Tools +- **Package Manager:** npm or pnpm +- **Linting:** ESLint +- **Formatting:** Prettier +- **Type Checking:** TypeScript strict mode +- **Testing:** Jest + React Testing Library (optional for MLP) + +--- + +## 9. Out of Scope + +The following are explicitly excluded from the MLP: + +### 9.1 Features +- Public access / registration +- Multi-tenant architecture +- Role-based access control (admin/user roles) +- Collaboration (multiple users on same form) +- Form versioning +- Template marketplace +- Mobile app +- Voice output / text-to-speech +- Multi-language support +- Form analytics / reporting +- Integration with external systems (Salesforce, etc.) +- Offline mode +- Real-time collaboration + +### 9.2 Technical +- Microservices architecture +- Kubernetes deployment +- Load balancing +- CDN integration +- Advanced caching strategies +- A/B testing framework +- Comprehensive test suite (unit/integration/e2e) + +--- + +## 10. Success Criteria + +The MLP is considered successful when: + +### 10.1 Functional Completeness +- [ ] User can sign in with Google +- [ ] User can create a template with 10+ fields across 3 sections +- [ ] User can start a form session +- [ ] User can complete entire form using voice only +- [ ] Transcription accuracy >85% (English, clear speech) +- [ ] User can review and edit captured data +- [ ] PDF generated matches form structure +- [ ] PDF sent via Gmail successfully +- [ ] User can resume incomplete sessions + +### 10.2 Performance +- [ ] Transcription latency <1.5s (p95) +- [ ] Page load <2s (p95) +- [ ] No blocking UI during transcription + +### 10.3 Reliability +- [ ] Zero data loss during session (auto-save works) +- [ ] Graceful handling of Whisper service downtime +- [ ] Database constraints prevent invalid data + +### 10.4 Usability +- [ ] Non-technical user can create template without guidance +- [ ] Non-technical user can complete form without guidance +- [ ] Error messages are clear and actionable + +--- + +## 11. Assumptions & Dependencies + +### 11.1 Assumptions +- Users have modern browsers (Chrome/Firefox/Safari latest versions) +- Users have working microphones +- Users have Google accounts +- Network latency <100ms to server +- Users speak clearly in English +- Forms typically <100 fields + +### 11.2 Dependencies +- Google OAuth API availability +- OpenAI API availability (for LLM) +- Gmail API availability +- Supabase service uptime +- Whisper model availability + +### 11.3 Risks +| Risk | Impact | Mitigation | +|------|--------|------------| +| Whisper transcription accuracy low | High | Provide manual edit fallback | +| LLM API costs exceed budget | Medium | Implement aggressive caching, limit calls | +| Gmail API rate limits | Medium | Queue emails, implement retry logic | +| Browser audio API incompatibility | High | Feature detection, graceful degradation | +| Database performance degradation | Medium | Proper indexing, query optimization | + +--- + +## 12. Acceptance Testing Scenarios + +### Scenario 1: First-Time User Journey +1. User navigates to landing page +2. Clicks "Sign in with Google" +3. Authorizes application +4. Lands on empty dashboard +5. Clicks "Create Template" +6. Adds template name "Incident Report" +7. Adds section "Details" +8. Adds 5 fields (string, paragraph, date, number, enum) +9. Saves template +10. Returns to dashboard (template visible) + +**Pass Criteria:** Template appears in list, all fields saved correctly + +### Scenario 2: Voice Form Completion +1. User selects template from dashboard +2. Clicks "Start New Form" +3. Sees first field prompt +4. Clicks record button +5. Speaks field value +6. Sees transcript appear +7. Sees normalized value +8. Clicks "Accept" +9. Progresses to next field +10. Completes all fields +11. Redirected to review page + +**Pass Criteria:** All spoken values captured, transcript matches speech, no crashes + +### Scenario 3: Edit and Submit +1. User on review page +2. Notices error in field 3 +3. Clicks field to edit +4. Types correction +5. Saves edit +6. Clicks "Generate PDF + Send Email" +7. Confirms recipient email +8. Clicks "Send" +9. Sees success message +10. Receives email with PDF + +**Pass Criteria:** Edit saved, PDF contains corrected value, email delivered + +--- + +## Appendices + +### Appendix A: Glossary +- **MLP:** Minimum Lovable Product +- **OAuth:** Open Authorization +- **Whisper:** OpenAI's speech recognition model +- **LLM:** Large Language Model +- **RLS:** Row Level Security (Supabase feature) +- **TTS:** Text-to-Speech +- **WCAG:** Web Content Accessibility Guidelines + +### Appendix B: References +- [Next.js Documentation](https://nextjs.org/docs) +- [Supabase Documentation](https://supabase.com/docs) +- [OpenAI Whisper](https://github.com/openai/whisper) +- [Gmail API](https://developers.google.com/gmail/api) +- [Web Audio API](https://developer.mozilla.org/en-US/docs/Web/API/Web_Audio_API) + +--- + +**Document Status:** Ready for Design Phase +**Next Steps:** Create DESIGN.md and SPEC.md diff --git a/SPEC.md b/SPEC.md new file mode 100644 index 0000000..c4e0fcd --- /dev/null +++ b/SPEC.md @@ -0,0 +1,1577 @@ +# VoicedForm MLP - Implementation Specification + +**Version:** 1.0 +**Last Updated:** 2025-11-14 +**Status:** Draft + +--- + +## 1. Implementation Overview + +### 1.1 Development Phases + +The implementation is organized into 7 sequential phases: + +| Phase | Name | Duration | Dependencies | +|-------|------|----------|--------------| +| **Phase 0** | Project Setup | 1 day | None | +| **Phase 1** | Authentication & Infrastructure | 2 days | Phase 0 | +| **Phase 2** | Template Management | 3 days | Phase 1 | +| **Phase 3** | Whisper Integration | 2 days | Phase 1 | +| **Phase 4** | Voice Form Completion | 4 days | Phase 2, 3 | +| **Phase 5** | Review & PDF Generation | 2 days | Phase 4 | +| **Phase 6** | Email Integration | 1 day | Phase 5 | +| **Phase 7** | Polish & Testing | 2 days | All | + +**Total Estimated Duration:** 17 days + +### 1.2 Implementation Principles + +1. **Incremental Delivery**: Each phase produces working, testable features +2. **Vertical Slices**: Implement full stack for each feature before moving on +3. **Test as You Go**: Manual testing after each task +4. **Database First**: Set up schema before building features +5. **Security from Start**: Authentication and authorization from day one + +--- + +## 2. Phase 0: Project Setup + +**Goal:** Initialize Next.js project, configure tooling, set up repository + +### Tasks + +#### TASK-0.1: Initialize Next.js Project +**Priority:** P0 +**Estimated Time:** 1 hour + +**Steps:** +```bash +# Create new Next.js app +npx create-next-app@latest voicedform-app \ + --typescript \ + --tailwind \ + --app \ + --src-dir \ + --import-alias "@/*" + +cd voicedform-app + +# Install core dependencies +npm install \ + @supabase/supabase-js \ + next-auth \ + openai \ + @react-pdf/renderer \ + ws \ + zod \ + date-fns + +# Install dev dependencies +npm install -D \ + @types/node \ + @types/react \ + @types/ws \ + eslint \ + prettier +``` + +**Acceptance Criteria:** +- [ ] Next.js 14+ installed with App Router +- [ ] TypeScript configured with strict mode +- [ ] Tailwind CSS working +- [ ] All dependencies installed without errors +- [ ] `npm run dev` starts development server + +--- + +#### TASK-0.2: Configure Project Structure +**Priority:** P0 +**Estimated Time:** 30 minutes + +**Steps:** +1. Create directory structure: +``` +src/ +├── app/ +│ ├── (public)/ +│ ├── (protected)/ +│ └── api/ +├── components/ +│ ├── auth/ +│ ├── dashboard/ +│ ├── templates/ +│ ├── forms/ +│ ├── review/ +│ └── shared/ +├── lib/ +│ ├── supabase.ts +│ ├── auth.ts +│ ├── validation.ts +│ ├── normalization.ts +│ ├── pdf-generator.ts +│ └── gmail.ts +└── types/ + ├── template.ts + ├── form.ts + └── user.ts +``` + +2. Create placeholder files with TypeScript interfaces + +**Acceptance Criteria:** +- [ ] Directory structure matches design +- [ ] All placeholder files created +- [ ] No TypeScript errors +- [ ] Git repository initialized + +--- + +#### TASK-0.3: Set Up Environment Variables +**Priority:** P0 +**Estimated Time:** 30 minutes + +**Steps:** +1. Create `.env.local`: +```bash +# Supabase (placeholders for now) +NEXT_PUBLIC_SUPABASE_URL= +NEXT_PUBLIC_SUPABASE_ANON_KEY= +SUPABASE_SERVICE_ROLE_KEY= + +# NextAuth +NEXTAUTH_URL=http://localhost:3000 +NEXTAUTH_SECRET= + +# Google OAuth (get from console.cloud.google.com) +GOOGLE_CLIENT_ID= +GOOGLE_CLIENT_SECRET= + +# OpenAI +OPENAI_API_KEY= + +# Whisper Service (Modal deployment) +WHISPER_WS_URL=wss://your-workspace--voicedform-whisper-transcribe-websocket.modal.run + +# Gmail API +GMAIL_CLIENT_ID= +GMAIL_CLIENT_SECRET= +``` + +2. Add `.env.local` to `.gitignore` +3. Create `.env.example` with empty values + +**Acceptance Criteria:** +- [ ] `.env.local` file created with all required variables +- [ ] `.env.example` created for documentation +- [ ] Secrets excluded from version control + +--- + +#### TASK-0.4: Configure ESLint & Prettier +**Priority:** P1 +**Estimated Time:** 30 minutes + +**Steps:** +1. Create `.eslintrc.json`: +```json +{ + "extends": ["next/core-web-vitals", "prettier"], + "rules": { + "@typescript-eslint/no-unused-vars": "error", + "@typescript-eslint/no-explicit-any": "warn" + } +} +``` + +2. Create `.prettierrc`: +```json +{ + "semi": false, + "singleQuote": true, + "tabWidth": 2, + "trailingComma": "es5" +} +``` + +3. Add npm scripts: +```json +{ + "scripts": { + "lint": "next lint", + "format": "prettier --write \"src/**/*.{ts,tsx}\"" + } +} +``` + +**Acceptance Criteria:** +- [ ] `npm run lint` executes without errors +- [ ] `npm run format` formats code consistently +- [ ] VS Code (if used) applies formatting on save + +--- + +## 3. Phase 1: Authentication & Infrastructure + +**Goal:** Set up Supabase, implement Google OAuth, create database schema + +### Tasks + +#### TASK-1.1: Set Up Supabase Project +**Priority:** P0 +**Estimated Time:** 1 hour + +**Steps:** +1. Go to [supabase.com](https://supabase.com), create account +2. Create new project: "voicedform-mlp" +3. Copy project URL and keys to `.env.local` +4. Enable Row Level Security on all tables (will add policies later) + +**Acceptance Criteria:** +- [ ] Supabase project created +- [ ] Connection string works from local machine +- [ ] Environment variables updated + +--- + +#### TASK-1.2: Create Database Schema +**Priority:** P0 +**Estimated Time:** 2 hours + +**Steps:** +1. In Supabase SQL Editor, run schema from DESIGN.md section 3.2 +2. Verify tables created: + - `users` + - `templates` + - `form_sessions` + - `form_session_values` + - `pdf_documents` +3. Verify indexes created +4. Verify triggers created (updated_at) + +**Test Queries:** +```sql +-- Test insert +INSERT INTO users (auth_provider_id, email, name) +VALUES ('test_123', 'test@example.com', 'Test User'); + +-- Test foreign key +INSERT INTO templates (owner_user_id, name, schema) +VALUES ( + (SELECT id FROM users WHERE email = 'test@example.com'), + 'Test Template', + '{"sections": []}'::jsonb +); + +-- Verify +SELECT * FROM templates; +``` + +**Acceptance Criteria:** +- [ ] All tables exist with correct columns +- [ ] Foreign key constraints work +- [ ] Check constraints work (e.g., status enum) +- [ ] Timestamps auto-populate +- [ ] Test data can be inserted and queried + +--- + +#### TASK-1.3: Configure Row Level Security +**Priority:** P0 +**Estimated Time:** 1 hour + +**Steps:** +1. Run RLS policies from DESIGN.md section 3.3 +2. Test policies with different user contexts + +**Test:** +```sql +-- Set user context +SELECT set_config('request.jwt.claims', + json_build_object('sub', 'user_uuid_here')::text, + true +); + +-- Try to access another user's template (should fail) +SELECT * FROM templates WHERE owner_user_id != 'user_uuid_here'; +``` + +**Acceptance Criteria:** +- [ ] RLS enabled on all tables +- [ ] Policies prevent cross-user data access +- [ ] Users can access own data +- [ ] Cascade deletes work correctly + +--- + +#### TASK-1.4: Implement Supabase Client +**Priority:** P0 +**Estimated Time:** 1 hour + +**File:** `src/lib/supabase.ts` + +```typescript +import { createClient } from '@supabase/supabase-js' + +const supabaseUrl = process.env.NEXT_PUBLIC_SUPABASE_URL! +const supabaseAnonKey = process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY! + +export const supabase = createClient(supabaseUrl, supabaseAnonKey) + +// Server-side client with service role +export const supabaseAdmin = createClient( + supabaseUrl, + process.env.SUPABASE_SERVICE_ROLE_KEY! +) + +// Database types +export interface User { + id: string + auth_provider_id: string + email: string + name: string | null + avatar_url: string | null + created_at: string + updated_at: string + last_login_at: string | null +} + +export interface Template { + id: string + owner_user_id: string + name: string + description: string | null + schema: TemplateSchema + created_at: string + updated_at: string + deleted_at: string | null +} + +export interface TemplateSchema { + sections: Section[] +} + +export interface Section { + section_name: string + fields: Field[] +} + +export interface Field { + label: string + field_key: string + type: 'string' | 'paragraph' | 'number' | 'date' | 'enum' + constraints: FieldConstraints + hint?: string +} + +export interface FieldConstraints { + required?: boolean + min_length?: number + max_length?: number + min?: number + max?: number + enum_values?: string[] + date_format?: string +} + +export interface FormSession { + id: string + template_id: string + user_id: string + status: 'draft' | 'in_progress' | 'completed' | 'sent' + created_at: string + updated_at: string + completed_at: string | null +} + +export interface FormValue { + id: string + session_id: string + field_key: string + value_raw: string | null + value_normalized: any + validation_status: 'ok' | 'warning' | 'error' + validation_message: string | null + created_at: string + updated_at: string +} +``` + +**Acceptance Criteria:** +- [ ] Client exports work in both client and server components +- [ ] Types match database schema +- [ ] Can query database successfully + +--- + +#### TASK-1.5: Configure Google OAuth +**Priority:** P0 +**Estimated Time:** 2 hours + +**Steps:** +1. Go to Google Cloud Console +2. Create new project: "VoicedForm" +3. Enable Google+ API +4. Create OAuth 2.0 credentials: + - Application type: Web application + - Authorized redirect URIs: `http://localhost:3000/api/auth/callback/google` +5. Copy Client ID and Secret to `.env.local` + +**File:** `src/app/api/auth/[...nextauth]/route.ts` + +```typescript +import NextAuth from 'next-auth' +import GoogleProvider from 'next-auth/providers/google' +import { supabaseAdmin } from '@/lib/supabase' + +const handler = NextAuth({ + providers: [ + GoogleProvider({ + clientId: process.env.GOOGLE_CLIENT_ID!, + clientSecret: process.env.GOOGLE_CLIENT_SECRET!, + }), + ], + callbacks: { + async signIn({ user, account, profile }) { + if (!user.email) return false + + // Create or update user in Supabase + const { data, error } = await supabaseAdmin + .from('users') + .upsert( + { + auth_provider_id: account.providerAccountId, + email: user.email, + name: user.name, + avatar_url: user.image, + last_login_at: new Date().toISOString(), + }, + { onConflict: 'auth_provider_id' } + ) + .select() + .single() + + if (error) { + console.error('Error upserting user:', error) + return false + } + + return true + }, + async jwt({ token, account, profile }) { + if (account) { + token.accessToken = account.access_token + token.providerId = account.providerAccountId + } + return token + }, + async session({ session, token }) { + // Fetch user ID from Supabase + const { data } = await supabaseAdmin + .from('users') + .select('id') + .eq('auth_provider_id', token.providerId) + .single() + + if (data) { + session.user.id = data.id + } + + return session + }, + }, + pages: { + signIn: '/', + }, +}) + +export { handler as GET, handler as POST } +``` + +**Acceptance Criteria:** +- [ ] Google OAuth consent screen appears +- [ ] User can authorize application +- [ ] User record created/updated in Supabase +- [ ] Session token issued +- [ ] User ID accessible in session + +--- + +#### TASK-1.6: Implement Auth Middleware +**Priority:** P0 +**Estimated Time:** 1 hour + +**File:** `src/middleware.ts` + +```typescript +import { withAuth } from 'next-auth/middleware' + +export default withAuth({ + pages: { + signIn: '/', + }, +}) + +export const config = { + matcher: ['/dashboard/:path*', '/templates/:path*', '/forms/:path*'], +} +``` + +**File:** `src/lib/auth.ts` + +```typescript +import { getServerSession } from 'next-auth/next' +import { redirect } from 'next/navigation' + +export async function requireAuth() { + const session = await getServerSession() + if (!session) { + redirect('/') + } + return session +} + +export async function getCurrentUserId(): Promise { + const session = await requireAuth() + return session.user.id +} +``` + +**Acceptance Criteria:** +- [ ] Unauthenticated users redirected to landing page +- [ ] Protected routes require authentication +- [ ] `requireAuth()` helper works in server components +- [ ] User ID accessible in API routes + +--- + +#### TASK-1.7: Create Landing Page +**Priority:** P1 +**Estimated Time:** 2 hours + +**File:** `src/app/(public)/page.tsx` + +```typescript +'use client' + +import { signIn } from 'next-auth/react' +import { Button } from '@/components/shared/Button' + +export default function LandingPage() { + return ( +
+
+

VoicedForm

+

+ Complete forms using your voice. Fast, accurate, and effortless. +

+ +
+ +
+ +
+
+
🎙️
+

Voice Input

+

+ Speak naturally to fill out forms +

+
+
+
+

Smart Validation

+

+ AI-powered field validation +

+
+
+
📄
+

PDF Export

+

+ Generate and email PDFs automatically +

+
+
+
+
+ ) +} +``` + +**Acceptance Criteria:** +- [ ] Landing page renders without errors +- [ ] "Sign in with Google" button triggers OAuth flow +- [ ] Responsive design works on desktop and tablet +- [ ] Visual design is clean and professional + +--- + +## 4. Phase 2: Template Management + +**Goal:** Build template CRUD functionality and editor UI + +### Tasks + +#### TASK-2.1: Create Templates API Routes +**Priority:** P0 +**Estimated Time:** 3 hours + +**File:** `src/app/api/templates/route.ts` + +```typescript +import { NextRequest, NextResponse } from 'next/server' +import { supabaseAdmin } from '@/lib/supabase' +import { getCurrentUserId } from '@/lib/auth' +import { validateTemplateSchema } from '@/lib/validation' + +// GET /api/templates - List user's templates +export async function GET(request: NextRequest) { + try { + const userId = await getCurrentUserId() + const { searchParams } = new URL(request.url) + const limit = parseInt(searchParams.get('limit') || '20') + const offset = parseInt(searchParams.get('offset') || '0') + + const { data, error } = await supabaseAdmin + .from('templates') + .select('*') + .eq('owner_user_id', userId) + .is('deleted_at', null) + .order('updated_at', { ascending: false }) + .range(offset, offset + limit - 1) + + if (error) throw error + + return NextResponse.json(data) + } catch (error) { + return NextResponse.json( + { error: { code: 'INTERNAL_ERROR', message: error.message } }, + { status: 500 } + ) + } +} + +// POST /api/templates - Create new template +export async function POST(request: NextRequest) { + try { + const userId = await getCurrentUserId() + const body = await request.json() + + // Validate schema + const validationResult = validateTemplateSchema(body.schema) + if (!validationResult.valid) { + return NextResponse.json( + { + error: { + code: 'VALIDATION_ERROR', + message: 'Invalid template schema', + details: validationResult.errors + } + }, + { status: 400 } + ) + } + + const { data, error } = await supabaseAdmin + .from('templates') + .insert({ + owner_user_id: userId, + name: body.name, + description: body.description || null, + schema: body.schema, + }) + .select() + .single() + + if (error) throw error + + return NextResponse.json(data, { status: 201 }) + } catch (error) { + return NextResponse.json( + { error: { code: 'INTERNAL_ERROR', message: error.message } }, + { status: 500 } + ) + } +} +``` + +**File:** `src/app/api/templates/[id]/route.ts` + +```typescript +// GET /api/templates/:id +// PUT /api/templates/:id +// DELETE /api/templates/:id +// (Implementation similar to above) +``` + +**Acceptance Criteria:** +- [ ] GET /api/templates returns user's templates +- [ ] POST /api/templates creates template +- [ ] PUT /api/templates/:id updates template +- [ ] DELETE /api/templates/:id soft-deletes template +- [ ] Schema validation works +- [ ] Access control enforced (users can't access others' templates) +- [ ] Error responses follow standard format + +--- + +#### TASK-2.2: Implement Template Schema Validation +**Priority:** P0 +**Estimated Time:** 2 hours + +**File:** `src/lib/validation.ts` + +```typescript +import { z } from 'zod' + +const FieldConstraintsSchema = z.object({ + required: z.boolean().optional(), + min_length: z.number().optional(), + max_length: z.number().optional(), + min: z.number().optional(), + max: z.number().optional(), + enum_values: z.array(z.string()).optional(), + date_format: z.string().optional(), +}) + +const FieldSchema = z.object({ + label: z.string().min(1).max(100), + field_key: z.string().regex(/^[a-z_][a-z0-9_]*$/), + type: z.enum(['string', 'paragraph', 'number', 'date', 'enum']), + constraints: FieldConstraintsSchema, + hint: z.string().optional(), +}) + +const SectionSchema = z.object({ + section_name: z.string().min(1).max(100), + fields: z.array(FieldSchema).min(1), +}) + +const TemplateSchemaValidator = z.object({ + sections: z.array(SectionSchema).min(1), +}) + +export function validateTemplateSchema(schema: any): { + valid: boolean + errors?: string[] +} { + try { + TemplateSchemaValidator.parse(schema) + + // Check for duplicate field_keys + const fieldKeys = new Set() + for (const section of schema.sections) { + for (const field of section.fields) { + if (fieldKeys.has(field.field_key)) { + return { + valid: false, + errors: [`Duplicate field_key: ${field.field_key}`], + } + } + fieldKeys.add(field.field_key) + } + } + + return { valid: true } + } catch (error) { + if (error instanceof z.ZodError) { + return { + valid: false, + errors: error.errors.map((e) => `${e.path.join('.')}: ${e.message}`), + } + } + return { valid: false, errors: [error.message] } + } +} +``` + +**Acceptance Criteria:** +- [ ] Valid schemas pass validation +- [ ] Invalid field types rejected +- [ ] Duplicate field_keys detected +- [ ] Missing required fields detected +- [ ] Helpful error messages returned + +--- + +#### TASK-2.3: Create Dashboard Page +**Priority:** P0 +**Estimated Time:** 3 hours + +**File:** `src/app/(protected)/dashboard/page.tsx` + +```typescript +import { requireAuth, getCurrentUserId } from '@/lib/auth' +import { supabaseAdmin } from '@/lib/supabase' +import { TemplateCard } from '@/components/dashboard/TemplateCard' +import { RecentSessions } from '@/components/dashboard/RecentSessions' +import Link from 'next/link' + +export default async function DashboardPage() { + await requireAuth() + const userId = await getCurrentUserId() + + // Fetch templates + const { data: templates } = await supabaseAdmin + .from('templates') + .select('*') + .eq('owner_user_id', userId) + .is('deleted_at', null) + .order('updated_at', { ascending: false }) + + // Fetch recent sessions + const { data: recentSessions } = await supabaseAdmin + .from('form_sessions') + .select('*, templates(name)') + .eq('user_id', userId) + .order('updated_at', { ascending: false }) + .limit(5) + + return ( +
+ + +
+ {/* Templates Section */} +
+
+

Templates

+ + Create Template + +
+ + {templates && templates.length > 0 ? ( +
+ {templates.map((template) => ( + + ))} +
+ ) : ( +
+

+ No templates yet. Create your first template to get started! +

+ + Create Template + +
+ )} +
+ + {/* Recent Sessions */} + {recentSessions && recentSessions.length > 0 && ( +
+

Recent Sessions

+ +
+ )} +
+
+ ) +} +``` + +**Acceptance Criteria:** +- [ ] Dashboard renders after authentication +- [ ] Templates displayed in grid +- [ ] "Create Template" button navigates correctly +- [ ] Recent sessions shown (if any) +- [ ] Empty state shown when no templates + +--- + +#### TASK-2.4: Build Template Editor Component +**Priority:** P0 +**Estimated Time:** 5 hours + +**File:** `src/components/templates/TemplateEditor.tsx` + +```typescript +'use client' + +import { useState } from 'react' +import { useRouter } from 'next/navigation' +import { Template, Section, Field } from '@/lib/supabase' +import { SectionEditor } from './SectionEditor' +import { Button } from '@/components/shared/Button' + +interface TemplateEditorProps { + initialTemplate?: Template +} + +export function TemplateEditor({ initialTemplate }: TemplateEditorProps) { + const router = useRouter() + const [name, setName] = useState(initialTemplate?.name || '') + const [description, setDescription] = useState( + initialTemplate?.description || '' + ) + const [sections, setSections] = useState( + initialTemplate?.schema.sections || [] + ) + const [errors, setErrors] = useState([]) + const [isSaving, setIsSaving] = useState(false) + + const addSection = () => { + setSections([ + ...sections, + { section_name: '', fields: [] }, + ]) + } + + const updateSection = (index: number, section: Section) => { + const newSections = [...sections] + newSections[index] = section + setSections(newSections) + } + + const removeSection = (index: number) => { + setSections(sections.filter((_, i) => i !== index)) + } + + const saveTemplate = async () => { + setIsSaving(true) + setErrors([]) + + try { + const templateData = { + name, + description, + schema: { sections }, + } + + const url = initialTemplate + ? `/api/templates/${initialTemplate.id}` + : '/api/templates' + const method = initialTemplate ? 'PUT' : 'POST' + + const response = await fetch(url, { + method, + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(templateData), + }) + + if (!response.ok) { + const error = await response.json() + setErrors(error.error.details || [error.error.message]) + return + } + + router.push('/dashboard') + } catch (error) { + setErrors([error.message]) + } finally { + setIsSaving(false) + } + } + + return ( +
+

+ {initialTemplate ? 'Edit Template' : 'Create Template'} +

+ + {errors.length > 0 && ( +
+

Errors:

+
    + {errors.map((error, i) => ( +
  • {error}
  • + ))} +
+
+ )} + +
+
+ + setName(e.target.value)} + className="w-full border rounded px-3 py-2" + placeholder="e.g., Accident Report" + /> +
+ +
+ +