Skip to content

Commit f6639ac

Browse files
authored
fix: Add Vertex AI credentials to backend deployment (#443)
## Summary The display name auto-generation feature (PR #432) requires backend access to Vertex AI when `CLAUDE_CODE_USE_VERTEX=1`. This PR adds the missing configuration: ### Changes 1. **Add Vertex AI env vars** to backend deployment: - `CLOUD_ML_REGION` - `ANTHROPIC_VERTEX_PROJECT_ID` - `GOOGLE_APPLICATION_CREDENTIALS` All sourced from `operator-config` ConfigMap (same as operator). 2. **Mount `ambient-vertex` secret** as volume at `/app/vertex/`: - Contains `ambient-code-key.json` (GCP service account key) - Marked as `optional: true` so deployments without Vertex still work ### Why This Is Needed When `CLAUDE_CODE_USE_VERTEX=1`, the backend must use Vertex AI for Claude API calls (display name generation). Previously, the env vars were set but the actual credentials file wasn't mounted, causing: ``` ANTHROPIC_VERTEX_PROJECT_ID is required when CLAUDE_CODE_USE_VERTEX=1 ``` ### Testing Tested on dev cluster with Vertex AI enabled - display names now generate correctly via Vertex.
1 parent f621834 commit f6639ac

File tree

3 files changed

+94
-35
lines changed

3 files changed

+94
-35
lines changed

components/backend/handlers/display_name.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@ const (
2424
// anthropicAPIKeyField is the secret field containing the Anthropic API key
2525
anthropicAPIKeyField = "ANTHROPIC_API_KEY"
2626
// haiku model for quick, cheap name generation (standard API)
27-
haiku3Model = "claude-3-5-haiku-20241022"
28-
// haiku model for Vertex AI (requires version suffix)
29-
haiku3ModelVertex = "claude-3-5-haiku-v2@20241022"
27+
haiku3Model = "claude-haiku-4-5-20251001"
28+
// haiku model for Vertex AI - use claude-haiku-4-5 which supports global region
29+
// See: https://platform.claude.com/docs/en/build-with-claude/claude-on-vertex-ai
30+
haiku3ModelVertex = "claude-haiku-4-5@20251001"
3031
// Maximum display name length
3132
maxDisplayNameLength = 50
3233
// Timeout for API call
@@ -139,21 +140,24 @@ func getAnthropicClient(ctx context.Context, projectName string) (anthropic.Clie
139140
region := os.Getenv("CLOUD_ML_REGION")
140141
gcpProjectID := os.Getenv("ANTHROPIC_VERTEX_PROJECT_ID")
141142

142-
if region == "" {
143-
region = "us-central1" // Default region
143+
// Default to us-east5 - claude-haiku-4-5 is not available in global region
144+
// See: https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude
145+
if region == "" || region == "global" {
146+
region = "us-east5"
144147
}
145148
if gcpProjectID == "" {
146-
return anthropic.Client{}, false, fmt.Errorf("ANTHROPIC_VERTEX_PROJECT_ID is required when CLAUDE_CODE_USE_VERTEX=1")
149+
return anthropic.Client{}, false, fmt.Errorf("ANTHROPIC_VERTEX_PROJECT_ID is required when CLAUDE_CODE_USE_VERTEX=1 (check backend deployment env vars)")
147150
}
148151

149152
log.Printf("DisplayNameGen: Using Vertex AI for %s (region: %s, project: %s)", projectName, region, gcpProjectID)
153+
// Must pass OAuth scope for Vertex AI - without it, auth fails with "invalid_scope" error
150154
client := anthropic.NewClient(
151-
vertex.WithGoogleAuth(ctx, region, gcpProjectID),
155+
vertex.WithGoogleAuth(ctx, region, gcpProjectID, "https://www.googleapis.com/auth/cloud-platform"),
152156
)
153157
return client, true, nil
154158
}
155159

156-
// Fall back to API key from project secret
160+
// Vertex not enabled - use API key from project secret
157161
apiKey, err := getAPIKeyFromSecret(ctx, projectName)
158162
if err != nil {
159163
return anthropic.Client{}, false, fmt.Errorf("failed to get API key: %w", err)

components/backend/websocket/handlers.go

Lines changed: 52 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,15 @@ func PostSessionMessageWS(c *gin.Context) {
227227
c.JSON(http.StatusAccepted, gin.H{"status": "queued"})
228228
}
229229

230+
// maxUserMessageChars is the maximum characters to include from user messages for display name generation
231+
const maxUserMessageChars = 1000
232+
230233
// triggerDisplayNameGenerationIfNeeded checks if display name generation should be triggered
231234
// and initiates it asynchronously. This runs in a goroutine to not block the response.
232235
func triggerDisplayNameGenerationIfNeeded(projectName, sessionID string, messageBody map[string]interface{}) {
233-
// Extract user message content
234-
content, ok := messageBody["content"].(string)
235-
if !ok || strings.TrimSpace(content) == "" {
236+
// Extract current user message content
237+
currentContent, ok := messageBody["content"].(string)
238+
if !ok || strings.TrimSpace(currentContent) == "" {
236239
return
237240
}
238241

@@ -248,39 +251,64 @@ func triggerDisplayNameGenerationIfNeeded(projectName, sessionID string, message
248251
return
249252
}
250253

251-
// Check if display name should be generated
254+
// Check if display name should be generated (only if empty/unset)
252255
if !handlers.ShouldGenerateDisplayName(spec) {
253256
return
254257
}
255258

256-
// Count existing user messages to check if this is the first
257-
messages, err := retrieveMessagesFromS3(sessionID)
259+
log.Printf("DisplayNameGen: Triggering generation for %s/%s", projectName, sessionID)
260+
261+
// Collect all user messages (existing + current) for better context
262+
combinedContent := collectUserMessages(sessionID, currentContent)
263+
264+
// Extract session context for better name generation
265+
sessionCtx := handlers.ExtractSessionContext(spec)
266+
267+
// Trigger async display name generation
268+
handlers.GenerateDisplayNameAsync(projectName, sessionID, combinedContent, sessionCtx)
269+
}
270+
271+
// collectUserMessages fetches existing user messages from storage and combines with current message
272+
// Returns a truncated string of all user messages (max maxUserMessageChars)
273+
func collectUserMessages(sessionID, currentMessage string) string {
274+
// Fetch existing messages from storage
275+
existingMessages, err := retrieveMessagesFromS3(sessionID)
258276
if err != nil {
259-
log.Printf("DisplayNameGen: Failed to get messages for %s: %v", sessionID, err)
260-
return
277+
log.Printf("DisplayNameGen: Failed to retrieve messages for %s: %v", sessionID, err)
278+
// Fall back to just the current message
279+
return truncateString(currentMessage, maxUserMessageChars)
261280
}
262281

263-
userMessageCount := 0
264-
for _, m := range messages {
265-
if m.Type == "user_message" {
266-
userMessageCount++
282+
// Collect user message contents
283+
var userMessages []string
284+
for _, msg := range existingMessages {
285+
if msg.Type == "user_message" {
286+
// Extract content from payload (Payload is already map[string]interface{})
287+
if content, ok := msg.Payload["content"].(string); ok && strings.TrimSpace(content) != "" {
288+
userMessages = append(userMessages, strings.TrimSpace(content))
289+
}
267290
}
268291
}
269292

270-
// We already broadcast the current message, so count includes it
271-
// If this is the first user message (count == 1 after broadcast), generate name
272-
// Since we're checking before persist fully completes, check for <= 1
273-
if userMessageCount > 1 {
274-
log.Printf("DisplayNameGen: Skipping - not first user message (count: %d)", userMessageCount)
275-
return
276-
}
293+
// Add current message
294+
userMessages = append(userMessages, strings.TrimSpace(currentMessage))
277295

278-
// Extract session context for better name generation
279-
sessionCtx := handlers.ExtractSessionContext(spec)
296+
// Combine with separator
297+
combined := strings.Join(userMessages, " | ")
280298

281-
// Trigger async display name generation
282-
log.Printf("DisplayNameGen: Triggering generation for %s/%s", projectName, sessionID)
283-
handlers.GenerateDisplayNameAsync(projectName, sessionID, content, sessionCtx)
299+
// Truncate if too long
300+
return truncateString(combined, maxUserMessageChars)
301+
}
302+
303+
// truncateString truncates a string to maxLen characters, adding "..." if truncated
304+
func truncateString(s string, maxLen int) string {
305+
if len(s) <= maxLen {
306+
return s
307+
}
308+
if maxLen <= 3 {
309+
return s[:maxLen]
310+
}
311+
return s[:maxLen-3] + "..."
284312
}
285313

286314
// getSessionForDisplayName retrieves session data for display name generation

components/manifests/base/backend-deployment.yaml

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,30 @@ spec:
8282
value: "main"
8383
- name: OOTB_WORKFLOWS_PATH
8484
value: "workflows"
85-
# Backend needs CLAUDE_CODE_USE_VERTEX to expose vertexEnabled flag via /api/cluster-info
86-
# This allows the frontend to show warnings when ANTHROPIC_API_KEY is configured with Vertex enabled
87-
# Shares the same config value as the operator for consistency
85+
# Vertex AI configuration from operator-config ConfigMap
86+
# Backend needs these for:
87+
# 1. CLAUDE_CODE_USE_VERTEX: Expose vertexEnabled flag via /api/cluster-info
88+
# 2. Other Vertex vars: Display name auto-generation uses Claude Haiku via Vertex
8889
- name: CLAUDE_CODE_USE_VERTEX
8990
valueFrom:
9091
configMapKeyRef:
9192
name: operator-config
9293
key: CLAUDE_CODE_USE_VERTEX
94+
- name: CLOUD_ML_REGION
95+
valueFrom:
96+
configMapKeyRef:
97+
name: operator-config
98+
key: CLOUD_ML_REGION
99+
- name: ANTHROPIC_VERTEX_PROJECT_ID
100+
valueFrom:
101+
configMapKeyRef:
102+
name: operator-config
103+
key: ANTHROPIC_VERTEX_PROJECT_ID
104+
- name: GOOGLE_APPLICATION_CREDENTIALS
105+
valueFrom:
106+
configMapKeyRef:
107+
name: operator-config
108+
key: GOOGLE_APPLICATION_CREDENTIALS
93109
resources:
94110
requests:
95111
cpu: 100m
@@ -112,10 +128,21 @@ spec:
112128
volumeMounts:
113129
- name: backend-state
114130
mountPath: /workspace
131+
# Vertex AI credentials (optional - only needed when CLAUDE_CODE_USE_VERTEX=1)
132+
- name: vertex-credentials
133+
mountPath: /app/vertex
134+
readOnly: true
115135
volumes:
116136
- name: backend-state
117137
persistentVolumeClaim:
118138
claimName: backend-state-pvc
139+
# ambient-vertex secret contains GCP service account key for Vertex AI
140+
# This secret must exist in the same namespace as the backend (ambient-code)
141+
# Created manually: kubectl create secret generic ambient-vertex --from-file=ambient-code-key.json=<path-to-key> -n ambient-code
142+
- name: vertex-credentials
143+
secret:
144+
secretName: ambient-vertex
145+
optional: true # Don't fail if Vertex not configured
119146

120147
---
121148
apiVersion: v1

0 commit comments

Comments
 (0)