fix: Add Vertex AI credentials to backend deployment (#443)

Gkrumbach07 · web-flow · commit f6639ac83384 · 2025-12-04T17:53:44.000-06:00
## Summary The display name auto-generation feature (PR #432) requires backend access to Vertex AI when `CLAUDE_CODE_USE_VERTEX=1`. This PR adds the missing configuration: ### Changes 1. **Add Vertex AI env vars** to backend deployment: - `CLOUD_ML_REGION` - `ANTHROPIC_VERTEX_PROJECT_ID` - `GOOGLE_APPLICATION_CREDENTIALS` All sourced from `operator-config` ConfigMap (same as operator). 2. **Mount `ambient-vertex` secret** as volume at `/app/vertex/`: - Contains `ambient-code-key.json` (GCP service account key) - Marked as `optional: true` so deployments without Vertex still work ### Why This Is Needed When `CLAUDE_CODE_USE_VERTEX=1`, the backend must use Vertex AI for Claude API calls (display name generation). Previously, the env vars were set but the actual credentials file wasn't mounted, causing: ``` ANTHROPIC_VERTEX_PROJECT_ID is required when CLAUDE_CODE_USE_VERTEX=1 ``` ### Testing Tested on dev cluster with Vertex AI enabled - display names now generate correctly via Vertex.
diff --git a/components/backend/handlers/display_name.go b/components/backend/handlers/display_name.go
@@ -24,9 +24,10 @@ const (
 	// anthropicAPIKeyField is the secret field containing the Anthropic API key
 	anthropicAPIKeyField = "ANTHROPIC_API_KEY"
 	// haiku model for quick, cheap name generation (standard API)
-	haiku3Model = "claude-3-5-haiku-20241022"
-	// haiku model for Vertex AI (requires version suffix)
-	haiku3ModelVertex = "claude-3-5-haiku-v2@20241022"
+	haiku3Model = "claude-haiku-4-5-20251001"
+	// haiku model for Vertex AI - use claude-haiku-4-5 which supports global region
+	// See: https://platform.claude.com/docs/en/build-with-claude/claude-on-vertex-ai
+	haiku3ModelVertex = "claude-haiku-4-5@20251001"
 	// Maximum display name length
 	maxDisplayNameLength = 50
 	// Timeout for API call
@@ -139,21 +140,24 @@ func getAnthropicClient(ctx context.Context, projectName string) (anthropic.Clie
 		region := os.Getenv("CLOUD_ML_REGION")
 		gcpProjectID := os.Getenv("ANTHROPIC_VERTEX_PROJECT_ID")
 
-		if region == "" {
-			region = "us-central1" // Default region
+		// Default to us-east5 - claude-haiku-4-5 is not available in global region
+		// See: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude
+		if region == "" || region == "global" {
+			region = "us-east5"
 		}
 		if gcpProjectID == "" {
-			return anthropic.Client{}, false, fmt.Errorf("ANTHROPIC_VERTEX_PROJECT_ID is required when CLAUDE_CODE_USE_VERTEX=1")
+			return anthropic.Client{}, false, fmt.Errorf("ANTHROPIC_VERTEX_PROJECT_ID is required when CLAUDE_CODE_USE_VERTEX=1 (check backend deployment env vars)")
 		}
 
 		log.Printf("DisplayNameGen: Using Vertex AI for %s (region: %s, project: %s)", projectName, region, gcpProjectID)
+		// Must pass OAuth scope for Vertex AI - without it, auth fails with "invalid_scope" error
 		client := anthropic.NewClient(
-			vertex.WithGoogleAuth(ctx, region, gcpProjectID),
+			vertex.WithGoogleAuth(ctx, region, gcpProjectID, "https://www.googleapis.com/auth/cloud-platform"),
 		)
 		return client, true, nil
 	}
 
-	// Fall back to API key from project secret
+	// Vertex not enabled - use API key from project secret
 	apiKey, err := getAPIKeyFromSecret(ctx, projectName)
 	if err != nil {
 		return anthropic.Client{}, false, fmt.Errorf("failed to get API key: %w", err)
diff --git a/components/backend/websocket/handlers.go b/components/backend/websocket/handlers.go
@@ -227,12 +227,15 @@ func PostSessionMessageWS(c *gin.Context) {
 	c.JSON(http.StatusAccepted, gin.H{"status": "queued"})
 }
 
+// maxUserMessageChars is the maximum characters to include from user messages for display name generation
+const maxUserMessageChars = 1000
+
 // triggerDisplayNameGenerationIfNeeded checks if display name generation should be triggered
 // and initiates it asynchronously. This runs in a goroutine to not block the response.
 func triggerDisplayNameGenerationIfNeeded(projectName, sessionID string, messageBody map[string]interface{}) {
-	// Extract user message content
-	content, ok := messageBody["content"].(string)
-	if !ok || strings.TrimSpace(content) == "" {
+	// Extract current user message content
+	currentContent, ok := messageBody["content"].(string)
+	if !ok || strings.TrimSpace(currentContent) == "" {
 		return
 	}
 
@@ -248,39 +251,64 @@ func triggerDisplayNameGenerationIfNeeded(projectName, sessionID string, message
 		return
 	}
 
-	// Check if display name should be generated
+	// Check if display name should be generated (only if empty/unset)
 	if !handlers.ShouldGenerateDisplayName(spec) {
 		return
 	}
 
-	// Count existing user messages to check if this is the first
-	messages, err := retrieveMessagesFromS3(sessionID)
+	log.Printf("DisplayNameGen: Triggering generation for %s/%s", projectName, sessionID)
+
+	// Collect all user messages (existing + current) for better context
+	combinedContent := collectUserMessages(sessionID, currentContent)
+
+	// Extract session context for better name generation
+	sessionCtx := handlers.ExtractSessionContext(spec)
+
+	// Trigger async display name generation
+	handlers.GenerateDisplayNameAsync(projectName, sessionID, combinedContent, sessionCtx)
+}
+
+// collectUserMessages fetches existing user messages from storage and combines with current message
+// Returns a truncated string of all user messages (max maxUserMessageChars)
+func collectUserMessages(sessionID, currentMessage string) string {
+	// Fetch existing messages from storage
+	existingMessages, err := retrieveMessagesFromS3(sessionID)
 	if err != nil {
-		log.Printf("DisplayNameGen: Failed to get messages for %s: %v", sessionID, err)
-		return
+		log.Printf("DisplayNameGen: Failed to retrieve messages for %s: %v", sessionID, err)
+		// Fall back to just the current message
+		return truncateString(currentMessage, maxUserMessageChars)
 	}
 
-	userMessageCount := 0
-	for _, m := range messages {
-		if m.Type == "user_message" {
-			userMessageCount++
+	// Collect user message contents
+	var userMessages []string
+	for _, msg := range existingMessages {
+		if msg.Type == "user_message" {
+			// Extract content from payload (Payload is already map[string]interface{})
+			if content, ok := msg.Payload["content"].(string); ok && strings.TrimSpace(content) != "" {
+				userMessages = append(userMessages, strings.TrimSpace(content))
+			}
 		}
 	}
 
-	// We already broadcast the current message, so count includes it
-	// If this is the first user message (count == 1 after broadcast), generate name
-	// Since we're checking before persist fully completes, check for <= 1
-	if userMessageCount > 1 {
-		log.Printf("DisplayNameGen: Skipping - not first user message (count: %d)", userMessageCount)
-		return
-	}
+	// Add current message
+	userMessages = append(userMessages, strings.TrimSpace(currentMessage))
 
-	// Extract session context for better name generation
-	sessionCtx := handlers.ExtractSessionContext(spec)
+	// Combine with separator
+	combined := strings.Join(userMessages, " | ")
 
-	// Trigger async display name generation
-	log.Printf("DisplayNameGen: Triggering generation for %s/%s", projectName, sessionID)
-	handlers.GenerateDisplayNameAsync(projectName, sessionID, content, sessionCtx)
+	// Truncate if too long
+	return truncateString(combined, maxUserMessageChars)
+}
+
+// truncateString truncates a string to maxLen characters, adding "..." if truncated
+func truncateString(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	if maxLen <= 3 {
+		return s[:maxLen]
+	}
+	return s[:maxLen-3] + "..."
 }
 
 // getSessionForDisplayName retrieves session data for display name generation
diff --git a/components/manifests/base/backend-deployment.yaml b/components/manifests/base/backend-deployment.yaml
@@ -82,14 +82,30 @@ spec:
           value: "main"
         - name: OOTB_WORKFLOWS_PATH
           value: "workflows"
-        # Backend needs CLAUDE_CODE_USE_VERTEX to expose vertexEnabled flag via /api/cluster-info
-        # This allows the frontend to show warnings when ANTHROPIC_API_KEY is configured with Vertex enabled
-        # Shares the same config value as the operator for consistency
+        # Vertex AI configuration from operator-config ConfigMap
+        # Backend needs these for:
+        # 1. CLAUDE_CODE_USE_VERTEX: Expose vertexEnabled flag via /api/cluster-info
+        # 2. Other Vertex vars: Display name auto-generation uses Claude Haiku via Vertex
         - name: CLAUDE_CODE_USE_VERTEX
           valueFrom:
             configMapKeyRef:
               name: operator-config
               key: CLAUDE_CODE_USE_VERTEX
+        - name: CLOUD_ML_REGION
+          valueFrom:
+            configMapKeyRef:
+              name: operator-config
+              key: CLOUD_ML_REGION
+        - name: ANTHROPIC_VERTEX_PROJECT_ID
+          valueFrom:
+            configMapKeyRef:
+              name: operator-config
+              key: ANTHROPIC_VERTEX_PROJECT_ID
+        - name: GOOGLE_APPLICATION_CREDENTIALS
+          valueFrom:
+            configMapKeyRef:
+              name: operator-config
+              key: GOOGLE_APPLICATION_CREDENTIALS
         resources:
           requests:
             cpu: 100m
@@ -112,10 +128,21 @@ spec:
         volumeMounts:
         - name: backend-state
           mountPath: /workspace
+        # Vertex AI credentials (optional - only needed when CLAUDE_CODE_USE_VERTEX=1)
+        - name: vertex-credentials
+          mountPath: /app/vertex
+          readOnly: true
       volumes:
       - name: backend-state
         persistentVolumeClaim:
           claimName: backend-state-pvc
+      # ambient-vertex secret contains GCP service account key for Vertex AI
+      # This secret must exist in the same namespace as the backend (ambient-code)
+      # Created manually: kubectl create secret generic ambient-vertex --from-file=ambient-code-key.json=<path-to-key> -n ambient-code
+      - name: vertex-credentials
+        secret:
+          secretName: ambient-vertex
+          optional: true  # Don't fail if Vertex not configured
       
 ---
 apiVersion: v1