Skip to content

Commit cc6a82c

Browse files
sallyomclaudeAmbient Code Bot
authored
Observability: Langfuse (#329)
This PR * adds Langfuse instrumentation for usage and cost tracking * adds script and manifests to deploy Langfuse on K8s & OpenShift --------- Signed-off-by: sallyom <somalley@redhat.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Ambient Code Bot <bot@ambient-code.local>
1 parent 7a437c9 commit cc6a82c

23 files changed

+5282
-726
lines changed

.github/workflows/runner-tests.yml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: Claude Code Runner Tests
2+
3+
on:
4+
pull_request:
5+
paths:
6+
- 'components/runners/claude-code-runner/**'
7+
- '.github/workflows/runner-tests.yml'
8+
push:
9+
branches: [main]
10+
paths:
11+
- 'components/runners/claude-code-runner/**'
12+
- '.github/workflows/runner-tests.yml'
13+
14+
jobs:
15+
test:
16+
runs-on: ubuntu-latest
17+
defaults:
18+
run:
19+
working-directory: components/runners/claude-code-runner
20+
21+
steps:
22+
- name: Checkout code
23+
uses: actions/checkout@v5
24+
25+
- name: Set up Python
26+
uses: actions/setup-python@v5
27+
with:
28+
python-version: '3.11'
29+
cache: 'pip'
30+
31+
- name: Install dependencies
32+
run: |
33+
python -m pip install --upgrade pip
34+
pip install -e .
35+
pip install pytest pytest-asyncio pytest-cov
36+
37+
- name: Run unit tests for observability and security_utils
38+
run: |
39+
# Only run standalone unit tests that don't require runner_shell runtime
40+
# (test_model_mapping.py and test_wrapper_vertex.py require full runtime environment)
41+
pytest tests/test_observability.py tests/test_security_utils.py -v --tb=short --color=yes
42+
43+
- name: Run tests with coverage
44+
run: |
45+
pytest tests/test_observability.py tests/test_security_utils.py --cov=observability --cov=security_utils --cov-report=term-missing --cov-report=xml
46+
47+
- name: Upload coverage to Codecov
48+
uses: codecov/codecov-action@v4
49+
with:
50+
files: ./components/runners/claude-code-runner/coverage.xml
51+
flags: runner
52+
name: claude-code-runner
53+
fail_ci_if_error: false

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,7 @@ e2e/.env.test
124124
e2e/node_modules/
125125
e2e/cypress/screenshots/
126126
e2e/cypress/videos/
127+
128+
# Langfuse secrets and deployment credentials
129+
e2e/.env.langfuse
130+
e2e/langfuse/.env.langfuse-keys

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: help setup-env build-all build-frontend build-backend build-operator build-runner deploy clean dev-frontend dev-backend lint test registry-login push-all dev-start dev-stop dev-test dev-logs-operator dev-restart-operator dev-operator-status dev-test-operator e2e-test e2e-setup e2e-clean setup-hooks remove-hooks
1+
.PHONY: help setup-env build-all build-frontend build-backend build-operator build-runner deploy clean dev-frontend dev-backend lint test registry-login push-all dev-start dev-stop dev-test dev-logs-operator dev-restart-operator dev-operator-status dev-test-operator e2e-test e2e-setup e2e-clean setup-hooks remove-hooks deploy-langfuse-openshift
22

33
# Default target
44
help: ## Show this help message
@@ -175,3 +175,7 @@ e2e-setup: ## Install e2e test dependencies
175175
e2e-clean: ## Clean up e2e test environment
176176
@echo "Cleaning up e2e environment..."
177177
cd e2e && CONTAINER_ENGINE=$(CONTAINER_ENGINE) ./scripts/cleanup.sh
178+
179+
deploy-langfuse-openshift: ## Deploy Langfuse to OpenShift/ROSA cluster
180+
@echo "Deploying Langfuse to OpenShift cluster..."
181+
@cd e2e && ./scripts/deploy-langfuse.sh --openshift

components/backend/handlers/middleware.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"encoding/json"
66
"log"
77
"net/http"
8+
"regexp"
89
"strings"
910
"time"
1011

@@ -30,6 +31,23 @@ var (
3031
StringPtr = func(s string) *string { return &s }
3132
)
3233

34+
// Kubernetes DNS-1123 label validation (namespace, service account names)
35+
var kubernetesNameRegex = regexp.MustCompile(`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$`)
36+
37+
// isValidKubernetesName validates that a string is a valid Kubernetes DNS-1123 label
38+
// Returns false if:
39+
// - name is empty (prevents empty string injection)
40+
// - name exceeds 63 characters
41+
// - name contains invalid characters (not lowercase alphanumeric or '-')
42+
// - name starts or ends with '-' (enforced by regex)
43+
func isValidKubernetesName(name string) bool {
44+
// Explicit length check: reject empty strings and names > 63 chars
45+
if len(name) == 0 || len(name) > 63 {
46+
return false
47+
}
48+
return kubernetesNameRegex.MatchString(name)
49+
}
50+
3351
// ContentListItem represents a content list item for file browsing
3452
type ContentListItem struct {
3553
Name string `json:"name"`
@@ -102,6 +120,14 @@ func GetK8sClientsForRequest(c *gin.Context) (*kubernetes.Clientset, dynamic.Int
102120
// updateAccessKeyLastUsedAnnotation attempts to update the ServiceAccount's last-used annotation
103121
// when the incoming token is a ServiceAccount JWT. Uses the backend service account client strictly
104122
// for this telemetry update and only for SAs labeled app=ambient-access-key. Best-effort; errors ignored.
123+
//
124+
// RBAC:
125+
// This function intentionally uses the backend service account (K8sClientMw) instead of user credentials
126+
// because it updates platform-managed telemetry metadata (last-used timestamp) that users should not control.
127+
//
128+
// - Only updates ServiceAccounts with label app=ambient-access-key (line check below)
129+
// - Only updates the last-used-at annotation (no other metadata changes)
130+
// - Best-effort operation with all errors ignored (cannot disrupt user requests)
105131
func updateAccessKeyLastUsedAnnotation(c *gin.Context) {
106132
// Parse Authorization header
107133
rawAuth := c.GetHeader("Authorization")
@@ -252,6 +278,13 @@ func ValidateProjectContext() gin.HandlerFunc {
252278
return
253279
}
254280

281+
// Validate namespace name to prevent injection attacks
282+
if !isValidKubernetesName(projectHeader) {
283+
c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid project name format"})
284+
c.Abort()
285+
return
286+
}
287+
255288
// Ensure the caller has at least list permission on agenticsessions in the namespace
256289
ssar := &authv1.SelfSubjectAccessReview{
257290
Spec: authv1.SelfSubjectAccessReviewSpec{
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Example: Platform Admin Langfuse Secret
2+
#
3+
# This secret stores ALL Langfuse observability configuration for the platform.
4+
# All LANGFUSE_* environment variables are configured here in one place.
5+
#
6+
# IMPORTANT:
7+
# - This is a PLATFORM-ADMIN managed secret, not configured per-workspace
8+
# - Should be created in the same namespace as the operator (typically 'ambient-code')
9+
# - Contains both credentials (PUBLIC_KEY, SECRET_KEY) and configuration (HOST, ENABLED)
10+
#
11+
# How to create this secret:
12+
#
13+
# Option 1: Using kubectl (recommended for production):
14+
# kubectl create secret generic ambient-admin-langfuse-secret \
15+
# --from-literal=LANGFUSE_PUBLIC_KEY=pk-lf-YOUR-PUBLIC-KEY-HERE \
16+
# --from-literal=LANGFUSE_SECRET_KEY=sk-lf-YOUR-SECRET-KEY-HERE \
17+
# --from-literal=LANGFUSE_HOST=http://langfuse-web.langfuse.svc.cluster.local:3000 \
18+
# --from-literal=LANGFUSE_ENABLED=true \
19+
# -n ambient-code
20+
#
21+
# Option 2: Using this YAML file (less secure - keys visible in manifest):
22+
# 1. Copy this file: cp ambient-admin-langfuse-secret.yaml.example ambient-admin-langfuse-secret.yaml
23+
# 2. Replace placeholder values with your actual configuration
24+
# 3. Apply: kubectl apply -f ambient-admin-langfuse-secret.yaml
25+
# 4. Delete the file: rm ambient-admin-langfuse-secret.yaml # Don't commit secrets!
26+
27+
apiVersion: v1
28+
kind: Secret
29+
metadata:
30+
name: ambient-admin-langfuse-secret
31+
labels:
32+
app: agentic-operator
33+
ambient-code.io/component: observability
34+
type: Opaque
35+
stringData:
36+
# Get these keys from your Langfuse instance:
37+
# https://your-langfuse-host.com/settings (Project Settings -> API Keys)
38+
LANGFUSE_PUBLIC_KEY: "pk-lf-YOUR-PUBLIC-KEY-HERE"
39+
LANGFUSE_SECRET_KEY: "sk-lf-YOUR-SECRET-KEY-HERE"
40+
41+
# Langfuse instance URL (cluster-internal or external)
42+
LANGFUSE_HOST: "http://langfuse-web.langfuse.svc.cluster.local:3000"
43+
44+
# Enable Langfuse observability for all sessions
45+
LANGFUSE_ENABLED: "true"

components/manifests/base/operator-deployment.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,32 @@ spec:
5757
configMapKeyRef:
5858
name: operator-config
5959
key: GOOGLE_APPLICATION_CREDENTIALS
60+
# Platform-wide Langfuse observability configuration
61+
# All LANGFUSE_* config stored in ambient-admin-langfuse-secret (platform-admin managed)
62+
- name: LANGFUSE_ENABLED
63+
valueFrom:
64+
secretKeyRef:
65+
name: ambient-admin-langfuse-secret
66+
key: LANGFUSE_ENABLED
67+
optional: true # Optional: defaults to false if secret doesn't exist
68+
- name: LANGFUSE_HOST
69+
valueFrom:
70+
secretKeyRef:
71+
name: ambient-admin-langfuse-secret
72+
key: LANGFUSE_HOST
73+
optional: true # Optional: only needed if Langfuse enabled
74+
- name: LANGFUSE_PUBLIC_KEY
75+
valueFrom:
76+
secretKeyRef:
77+
name: ambient-admin-langfuse-secret
78+
key: LANGFUSE_PUBLIC_KEY
79+
optional: true # Optional: only needed if Langfuse enabled
80+
- name: LANGFUSE_SECRET_KEY
81+
valueFrom:
82+
secretKeyRef:
83+
name: ambient-admin-langfuse-secret
84+
key: LANGFUSE_SECRET_KEY
85+
optional: true # Optional: only needed if Langfuse enabled
6086
resources:
6187
requests:
6288
cpu: 50m

0 commit comments

Comments
 (0)