From 6cff5fd8168a7453ea4c5fd57f7605c5220619e3 Mon Sep 17 00:00:00 2001 From: Ivan Porta Date: Thu, 4 Dec 2025 20:40:14 +0900 Subject: [PATCH 01/17] Add linkerd Signed-off-by: Ivan Porta --- .devcontainer/Dockerfile | 6 + .devcontainer/devcontainer.json | 1 + Makefile | 6 +- contrib/security/security-self-assessment.md | 1 + go/cli/internal/profiles/demo.yaml | 2 + go/cli/internal/profiles/minimal.yaml | 2 + helm/agents/linkerd/Chart-template.yaml | 5 + helm/agents/linkerd/templates/agent.yaml | 273 +++++++++++++++++++ helm/agents/linkerd/templates/rbac.yaml | 146 ++++++++++ helm/agents/linkerd/values.yaml | 9 + helm/kagent/Chart-template.yaml | 4 + helm/kagent/values.yaml | 29 +- 12 files changed, 472 insertions(+), 12 deletions(-) create mode 100644 helm/agents/linkerd/Chart-template.yaml create mode 100644 helm/agents/linkerd/templates/agent.yaml create mode 100644 helm/agents/linkerd/templates/rbac.yaml create mode 100644 helm/agents/linkerd/values.yaml diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 455e310b3..f870d17e4 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -91,6 +91,7 @@ ARG TOOLS_GO_VERSION ARG TOOLS_K9S_VERSION ARG TOOLS_KIND_VERSION ARG TOOLS_ISTIO_VERSION +ARG TOOLS_LINKERD_VERSION ARG TOOLS_ARGO_CD_VERSION ARG TOOLS_KUBECTL_VERSION ARG ARCH=${TARGETARCH:-amd64} @@ -124,6 +125,11 @@ RUN curl -L https://istio.io/downloadIstio | ISTIO_VERSION=${TOOLS_ISTIO_VERSION && mv istio-*/bin/istioctl /usr/local/bin/istioctl \ && rm -rf istio-* +# Install Linkerd Edge +RUN curl -sL https://run.linkerd.io/install-edge | LINKERD2_VERSION=${TOOLS_LINKERD_VERSION} TARGET_ARCH=${ARCH} sh \ + && mv ~/.linkerd2/bin/linkerd /usr/local/bin/linkerd \ + && rm -rf ~/.linkerd2 + # Install kind RUN curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.27.0/kind-$(uname)-${ARCH} \ && chmod +x ./kind \ diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 54c1b1b2f..3cd62dd4d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -9,6 +9,7 @@ "TOOLS_K9S_VERSION": "0.50.4", "TOOLS_KIND_VERSION": "0.27.0", "TOOLS_ISTIO_VERSION": "1.26.0", + "TOOLS_LINKERD_VERSION": "edge-25.11.3", "TOOLS_KUBECTL_VERSION": "1.33.4" } }, diff --git a/Makefile b/Makefile index 053a74f09..e27c0ffcc 100644 --- a/Makefile +++ b/Makefile @@ -264,6 +264,8 @@ helm-agents: helm package -d $(HELM_DIST_FOLDER) helm/agents/kgateway VERSION=$(VERSION) envsubst < helm/agents/istio/Chart-template.yaml > helm/agents/istio/Chart.yaml helm package -d $(HELM_DIST_FOLDER) helm/agents/istio + VERSION=$(VERSION) envsubst < helm/agents/linkerd/Chart-template.yaml > helm/agents/linkerd/Chart.yaml + helm package -d $(HELM_DIST_FOLDER) helm/agents/linkerd VERSION=$(VERSION) envsubst < helm/agents/promql/Chart-template.yaml > helm/agents/promql/Chart.yaml helm package -d $(HELM_DIST_FOLDER) helm/agents/promql VERSION=$(VERSION) envsubst < helm/agents/observability/Chart-template.yaml > helm/agents/observability/Chart.yaml @@ -300,7 +302,7 @@ helm-install-provider: helm-version check-api-key helm $(HELM_ACTION) kagent-crds helm/kagent-crds \ --namespace kagent \ --create-namespace \ - --history-max 2 \ + --timeout 5m \ --kube-context kind-$(KIND_CLUSTER_NAME) \ --wait \ @@ -308,7 +310,6 @@ helm-install-provider: helm-version check-api-key helm $(HELM_ACTION) kagent helm/kagent \ --namespace kagent \ --create-namespace \ - --history-max 2 \ --timeout 5m \ --kube-context kind-$(KIND_CLUSTER_NAME) \ --wait \ @@ -353,6 +354,7 @@ helm-publish: helm-version helm push ./$(HELM_DIST_FOLDER)/istio-agent-$(VERSION).tgz $(HELM_REPO)/kagent/agents helm push ./$(HELM_DIST_FOLDER)/promql-agent-$(VERSION).tgz $(HELM_REPO)/kagent/agents helm push ./$(HELM_DIST_FOLDER)/observability-agent-$(VERSION).tgz $(HELM_REPO)/kagent/agents + helm push ./$(HELM_DIST_FOLDER)/linkerd-agent-$(VERSION).tgz $(HELM_REPO)/kagent/agents helm push ./$(HELM_DIST_FOLDER)/argo-rollouts-agent-$(VERSION).tgz $(HELM_REPO)/kagent/agents helm push ./$(HELM_DIST_FOLDER)/cilium-policy-agent-$(VERSION).tgz $(HELM_REPO)/kagent/agents helm push ./$(HELM_DIST_FOLDER)/cilium-manager-agent-$(VERSION).tgz $(HELM_REPO)/kagent/agents diff --git a/contrib/security/security-self-assessment.md b/contrib/security/security-self-assessment.md index 08690dc00..46ecc4c80 100644 --- a/contrib/security/security-self-assessment.md +++ b/contrib/security/security-self-assessment.md @@ -194,6 +194,7 @@ Optional tooling: - **kgateway**: Gateway and Kubernetes Gateway API integration - **Grafana**: Observability and monitoring integration - **Istio**: Integration with Istio Service Mesh APIs +- **Linkerd**: Integration with Linkerd Service Mesh APIs - **Argo**: Integration with Argo Rollouts - **Cilium**: Integration through specialized agents for eBPF-based networking diff --git a/go/cli/internal/profiles/demo.yaml b/go/cli/internal/profiles/demo.yaml index d2b266007..3dd509bf5 100644 --- a/go/cli/internal/profiles/demo.yaml +++ b/go/cli/internal/profiles/demo.yaml @@ -7,6 +7,8 @@ agents: enabled: true istio-agent: enabled: true + linkerd-agent: + enabled: true promql-agent: enabled: true observability-agent: diff --git a/go/cli/internal/profiles/minimal.yaml b/go/cli/internal/profiles/minimal.yaml index 59cc778d5..88cbbdc2c 100644 --- a/go/cli/internal/profiles/minimal.yaml +++ b/go/cli/internal/profiles/minimal.yaml @@ -7,6 +7,8 @@ agents: enabled: false istio-agent: enabled: false + linkerd-agent: + enabled: false promql-agent: enabled: false observability-agent: diff --git a/helm/agents/linkerd/Chart-template.yaml b/helm/agents/linkerd/Chart-template.yaml new file mode 100644 index 000000000..9359beb68 --- /dev/null +++ b/helm/agents/linkerd/Chart-template.yaml @@ -0,0 +1,5 @@ +apiVersion: v2 +name: linkerd-agent +description: A Linkerd Agent for kagent +type: application +version: ${VERSION} \ No newline at end of file diff --git a/helm/agents/linkerd/templates/agent.yaml b/helm/agents/linkerd/templates/agent.yaml new file mode 100644 index 000000000..5fd76b914 --- /dev/null +++ b/helm/agents/linkerd/templates/agent.yaml @@ -0,0 +1,273 @@ +apiVersion: kagent.dev/v1alpha2 +kind: Agent +metadata: + name: linkerd-agent + namespace: {{ include "kagent.namespace" . }} + labels: + {{- include "kagent.labels" . | nindent 4 }} +spec: + description: A Linkerd (edge) Expert AI Agent specializing in Linkerd operations, troubleshooting, and maintenance. + type: Declarative + declarative: + systemMessage: |- + You are a Kubernetes and Linkerd (edge) Expert AI Agent with comprehensive knowledge of container orchestration, service mesh architecture, and cloud-native systems. You have access to a wide range of specialized tools that enable you to interact with Kubernetes clusters and Linkerd service mesh implementations to perform diagnostics, configuration, management, and troubleshooting. + + Core Expertise: + + 1. Kubernetes Capabilities + - Cluster architecture and components + - Resource management and scheduling + - Networking, services, and ingress + - EndpointSlices and service discovery + - Storage systems and volumes + - Security and RBAC + - Configuration and secrets + - Deployment strategies + - Monitoring and logging + - High availability and scaling + - Troubleshooting methodologies + + 2. Linkerd Capabilities + - Linkerd control plane and data plane architecture + - Proxy injection and sidecar lifecycle + - mTLS, identity, and trust anchors + - Authorization policy (policy.linkerd.io) + - Service profiles and route-level configuration + - Traffic splitting and canary-style rollouts + - Linkerd-viz telemetry (stat, routes, tap, top, dashboard) + - Diagnostics via `linkerd check` and `linkerd diagnostics` + - Multicluster connectivity (multicluster.linkerd.io) + - CNI and transparent proxying + - Edge APIs (e.g., policy.linkerd.io, HTTPRoute integration) + + Available Tools: + + 1. Kubernetes Resource Management: + - `k8s_get_resources`: Retrieve Kubernetes resources by type, namespace, and filters + - `k8s_describe_resource`: Get detailed information about a specific resource + - `k8s_create_resource`: Create a new Kubernetes resource from YAML + - `k8s_create_resource_from_url`: Create a resource from a URL-hosted manifest + - `k8s_delete_resource`: Delete a Kubernetes resource + - `k8s_patch_resource`: Apply a partial update to a resource + + 2. Kubernetes Resource Manipulation: + - `k8s_generate_resource`: Generate custom Kubernetes resources (Deployments, Services, CRDs, etc.) + - `k8s_patch_resource`: Apply safe, partial updates to existing resources + + 3. Linkerd Service Mesh Management: + - `linkerd_install`: Install or upgrade the Linkerd control plane (maps to `linkerd install` / `linkerd upgrade`) + - `linkerd_install_cni`: Manage Linkerd CNI installation (`linkerd install-cni`) + - `linkerd_uninstall`: Generate resources to uninstall Linkerd (`linkerd uninstall`) + - `linkerd_check`: Run pre-install, control-plane, or proxy health checks (`linkerd check`) + - `linkerd_diagnostics`: Run diagnostics commands (`linkerd diagnostics`, e.g. endpoints, policy, proxy-metrics) + - `linkerd_authz`: Inspect Linkerd authorization state (`linkerd authz`, `linkerd viz authz`) + - `linkerd_identity`: Inspect workload certificates (`linkerd identity`) + - `linkerd_inject` / `linkerd_uninject`: Mutate Kubernetes configs to add/remove the Linkerd proxy (`linkerd inject`, `linkerd uninject`) + - `linkerd_profile`: Manage and generate service profiles (`linkerd profile`) + - `linkerd_multicluster`: Manage multicluster setup (`linkerd multicluster`) + - `linkerd_prune`: Output extraneous control-plane resources (`linkerd prune`) + - `linkerd_viz`: Manage the Linkerd-viz extension and observability (`linkerd viz` – stat, routes, tap, top, dashboard) + - `linkerd_version`: Get Linkerd CLI and control-plane version information (`linkerd version`) + + 4. Documentation and Information: + - `query_documentation`: Query documentation and best practices across Kubernetes and Linkerd edge + + Operational Protocol: + + 1. Initial Assessment + - Gather information about the cluster and relevant resources + - Identify the scope and nature of the task or issue + - Determine required permissions and access levels + - Plan the approach with safety and minimal disruption + + 2. Execution Strategy + - Use read-only operations first for information gathering + - Prefer `linkerd check` and `linkerd diagnostics` for safe validation + - Validate planned changes before execution + - Implement changes incrementally when possible + - Verify results after each significant change + - Document all actions and outcomes + + 3. Troubleshooting Methodology + - Systematically narrow down problem sources + - Analyze logs, events, metrics, and Linkerd-viz output + - Check resource configurations and relationships (Deployments, Services, Endpoints/EndpointSlices, CRDs) + - Verify network connectivity, policies, and service discovery + - Review recent changes and deployments + - Isolate service mesh configuration issues (injection, identity, policy, routes, tap) + + Safety Guidelines: + + 1. Cluster Operations + - Prioritize non-disruptive operations + - Verify contexts before executing changes + - Understand blast radius of all operations + - Backup critical configurations before modifications + - Consider scaling and failure-domain implications of all changes + + 2. Linkerd Service Mesh Management + - Use `linkerd check --pre` before installing or upgrading + - Validate identity and trust anchors before modifying mTLS/CA + - Apply policy.linkerd.io changes incrementally and test in non-critical namespaces first + - Gradually roll out traffic-splitting and canary configurations + - Prefer `linkerd viz` commands to observe impact before and after changes + - Maintain fallback configurations and be ready to rollback + + Best Practices: + + 1. Resource Management + - Use namespaces for logical separation + - Implement resource quotas and limits + - Use labels and annotations for organization + - Follow the principle of least privilege for RBAC + - Implement network policies for segmentation + + 2. Linkerd Configuration + - Ensure all meshed workloads have the Linkerd proxy injected and healthy + - Enable and validate mTLS by default across the mesh + - Use service profiles for per-route success-rate and latency metrics + - Use policy.linkerd.io resources (e.g. AuthorizationPolicy, MeshTLSAuthentication) for fine-grained access control + - Use multicluster extensions and gateways for cross-cluster communication when needed + - Keep control-plane and extensions (like linkerd-viz) aligned with supported edge/stable versions + + 3. Monitoring and Observability + - Use `linkerd viz stat`, `routes`, and `top` for live traffic insights + - Use `linkerd viz tap` for request-level debugging + - Use `linkerd diagnostics proxy-metrics` and controller metrics for low-level analysis + - Configure proper log levels for Linkerd proxies and control-plane components + - Set up alerts based on success-rate, latency, and TLS status + - Monitor proxy resource usage and p95/p99 latencies + + Common Scenarios: + + 1. Kubernetes Troubleshooting + - Pod scheduling failures + - Service discovery issues and DNS problems + - Resource constraints (CPU/Mem/Storage) + - ConfigMap and Secret misconfigurations + - Persistent volume issues + - Network policy conflicts + - Endpoint / EndpointSlice misconfiguration + + 2. Linkerd Troubleshooting + - Proxy injection failures (e.g., missing annotations, unsupported workloads) + - Linkerd control-plane health problems (e.g., `linkerd check` failures) + - mTLS and identity issues (cert expiry, trust anchor rotation) + - Authorization failures with policy.linkerd.io + - Traffic splitting and canary routing not behaving as expected + - Performance degradation visible in `linkerd viz stat` or `routes` + - Multicluster connectivity issues (service mirroring, gateways) + - Observability gaps in linkerd-viz (missing metrics, tap/edges/stat anomalies) + + Your primary goal is to provide expert assistance with Kubernetes and Linkerd (edge) environments by leveraging your specialized tools while following best practices for safety, reliability, and performance. Always aim to not just solve immediate issues but to improve the overall system architecture and operational practices. + + modelConfig: {{ .Values.modelConfigRef | default (printf "%s" (include "kagent.defaultModelConfigName" .)) }} + tools: + - type: McpServer + mcpServer: + name: kagent-tool-server + kind: RemoteMCPServer + apiGroup: kagent.dev + toolNames: + - k8s_create_resource + - k8s_create_resource_from_url + - k8s_delete_resource + - k8s_describe_resource + - k8s_get_resources + - k8s_patch_resource + - k8s_generate_resource + - linkerd_install + - linkerd_install_cni + - linkerd_upgrade + - linkerd_uninstall + - linkerd_check + - linkerd_diagnostics + - linkerd_authz + - linkerd_identity + - linkerd_inject + - linkerd_uninject + - linkerd_profile + - linkerd_multicluster + - linkerd_prune + - linkerd_viz + - linkerd_version + - query_documentation + a2aConfig: + skills: + - id: linkerd-service-mesh-configuration + name: Linkerd Service Mesh Configuration + description: Manages Linkerd control plane, data plane, CNI, multicluster, and extensions (such as linkerd-viz). Handles installation, upgrades, uninstalls, and validation via linkerd check and diagnostics. + tags: + - linkerd + - service-mesh + - configuration + - install + - upgrade + - cni + - multicluster + - profile + - diagnostics + examples: + - "Install Linkerd edge in my cluster and run pre-checks." + - "Run linkerd check to validate my control plane in the 'linkerd' namespace." + - "Upgrade my Linkerd installation to the latest edge version." + - "Install the linkerd-viz extension and verify it with linkerd viz check." + - "Set up multicluster connectivity between 'cluster-a' and 'cluster-b'." + - "Generate a service profile for the 'backend' service and apply it." + - id: linkerd-traffic-management + name: Linkerd Traffic Management + description: Configures and inspects Linkerd traffic behavior using service profiles, SMI traffic splits, and observability via linkerd-viz commands. + tags: + - linkerd + - traffic + - routing + - service-profile + - routes + - canary + - multicluster + examples: + - "Create a service profile for the 'api' service in the 'default' namespace." + - "Set up a traffic split so 10% of traffic for 'my-app' goes to the 'canary' deployment." + - "Show live route stats for 'frontend' using linkerd viz routes." + - "Help me validate that my canary deployment is receiving traffic and performing well." + - "Inspect outbound traffic from the 'orders' deployment and identify any failing routes." + - id: linkerd-security-policies + name: Linkerd Security & Policy + description: Implements and manages Linkerd security features, including mTLS, identity, and policy.linkerd.io resources for fine-grained access control. + tags: + - linkerd + - security + - mtls + - identity + - authorization + - authentication + - policy + examples: + - "Verify that mTLS is enabled for all meshed workloads and identify any plaintext connections." + - "Create a Linkerd authorization policy to only allow 'service-a' to call 'service-b'." + - "Help design MeshTLSAuthentication and AuthorizationPolicy resources for my 'payments' namespace." + - "Use linkerd authz to list all authorizations affecting the 'web' deployment." + - "Rotate the Linkerd trust anchor and verify the mesh health afterward." + - id: linkerd-observability-troubleshooting + name: Linkerd Observability & Troubleshooting + description: Diagnoses issues within the Linkerd service mesh, inspects telemetry and metrics through linkerd-viz, and correlates them with Kubernetes resources to find and resolve problems. + tags: + - linkerd + - observability + - troubleshooting + - telemetry + - metrics + - logs + - debug + - diagnose + - k8s + examples: + - "My requests to 'service-x' show high latency; use linkerd viz stat and routes to help troubleshoot." + - "Tap traffic for the 'checkout' deployment and identify 5xx responses." + - "Run linkerd diagnostics to check proxy metrics for the 'web' deployment." + - "Describe the Linkerd control plane pods in the 'linkerd' namespace and verify their status." + - "Use linkerd viz top to identify noisy neighbors in the 'production' namespace." + - "query_documentation for best practices on tuning Linkerd edge performance." + deployment: + resources: + {{- toYaml .Values.resources | nindent 8 }} diff --git a/helm/agents/linkerd/templates/rbac.yaml b/helm/agents/linkerd/templates/rbac.yaml new file mode 100644 index 000000000..ac8ab7989 --- /dev/null +++ b/helm/agents/linkerd/templates/rbac.yaml @@ -0,0 +1,146 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "kagent.fullname" . }}-linkerd-role + labels: + {{- include "kagent.labels" . | nindent 4 }} +rules: +- apiGroups: + - '' + resources: + - namespaces + - services + - endpoints + - pods + - persistentvolumeclaims + verbs: + - "*" +# EndpointSlices +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - "*" +- apiGroups: + - apps + resources: + - deployments + - daemonsets + - replicasets + - statefulsets + verbs: + - "*" +- apiGroups: + - policy + resources: + - poddisruptionbudgets + verbs: + - "*" +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - "*" +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + - ingresses + verbs: + - "*" +- apiGroups: + - rbac.authorization.k8s.io + resources: + - clusterroles + - clusterrolebindings + - roles + - rolebindings + verbs: + - "*" +- apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - "*" +# API server extension objects +- apiGroups: + - apiregistration.k8s.io + resources: + - apiservices + verbs: + - "*" +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + - subjectaccessreviews + verbs: + - "*" +- apiGroups: + - authorization.k8s.io + resources: + - selfsubjectaccessreviews + - selfsubjectrulesreviews + - subjectaccessreviews + verbs: + - "*" +- apiGroups: + - policy + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - example +- apiGroups: + - admissionregistration.k8s.io + resources: + - validatingwebhookconfigurations + - mutatingwebhookconfigurations + verbs: + - "*" +- apiGroups: + - '' + resources: + - secrets + - configmaps + - serviceaccounts + verbs: + - "*" +# Linkerd & related CRDs +- apiGroups: + - linkerd.io + - policy.linkerd.io + - viz.linkerd.io + - multicluster.linkerd.io + - split.smi-spec.io + - tap.linkerd.io + - gateway.networking.k8s.io + resources: + - "*" + verbs: + - "*" +- apiGroups: + - '' + resources: + - pods/portforward + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "kagent.fullname" . }}-linkerd-rolebinding + labels: + {{- include "kagent.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "kagent.fullname" . }}-linkerd-role +subjects: + - kind: ServiceAccount + name: {{ include "kagent.fullname" . }} + namespace: {{ include "kagent.namespace" . }} \ No newline at end of file diff --git a/helm/agents/linkerd/values.yaml b/helm/agents/linkerd/values.yaml new file mode 100644 index 000000000..47cde6ba9 --- /dev/null +++ b/helm/agents/linkerd/values.yaml @@ -0,0 +1,9 @@ +modelConfigRef: "" + +resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 1000m + memory: 1Gi \ No newline at end of file diff --git a/helm/kagent/Chart-template.yaml b/helm/kagent/Chart-template.yaml index 6c88bc933..82608cb9b 100644 --- a/helm/kagent/Chart-template.yaml +++ b/helm/kagent/Chart-template.yaml @@ -32,6 +32,10 @@ dependencies: version: ${VERSION} repository: file://../agents/istio condition: agents.istio-agent.enabled + - name: linkerd-agent + version: ${VERSION} + repository: file://../agents/linkerd + condition: agents.linkerd-agent.enabled - name: promql-agent version: ${VERSION} repository: file://../agents/promql diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index bc28e3e98..7f5257962 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -228,7 +228,7 @@ kagent-tools: agents: k8s-agent: - enabled: true + enabled: false resources: requests: cpu: 100m @@ -237,7 +237,7 @@ agents: cpu: 1000m memory: 1Gi kgateway-agent: - enabled: true + enabled: false resources: requests: cpu: 100m @@ -254,7 +254,7 @@ agents: limits: cpu: 1000m memory: 1Gi - promql-agent: + linkerd-agent: enabled: true resources: requests: @@ -263,8 +263,17 @@ agents: limits: cpu: 1000m memory: 1Gi + promql-agent: + enabled: false + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 1000m + memory: 1Gi observability-agent: - enabled: true + enabled: false resources: requests: cpu: 100m @@ -273,7 +282,7 @@ agents: cpu: 1000m memory: 1Gi argo-rollouts-agent: - enabled: true + enabled: false resources: requests: cpu: 100m @@ -291,7 +300,7 @@ agents: cpu: 1000m memory: 1Gi cilium-policy-agent: - enabled: true + enabled: false resources: requests: cpu: 100m @@ -300,7 +309,7 @@ agents: cpu: 1000m memory: 1Gi cilium-manager-agent: - enabled: true + enabled: false resources: requests: cpu: 100m @@ -309,7 +318,7 @@ agents: cpu: 1000m memory: 1Gi cilium-debug-agent: - enabled: true + enabled: false resources: requests: cpu: 100m @@ -324,9 +333,9 @@ agents: tools: grafana-mcp: - enabled: true + enabled: false querydoc: - enabled: true + enabled: false grafana-mcp: grafana: From 25193c0c6dcd33f5b091386762285f2fc99cfd48 Mon Sep 17 00:00:00 2001 From: Ivan Porta Date: Thu, 4 Dec 2025 23:20:44 +0900 Subject: [PATCH 02/17] Update linkerd Signed-off-by: Ivan Porta --- helm/agents/linkerd/templates/agent.yaml | 61 ++++++++++++++---------- 1 file changed, 36 insertions(+), 25 deletions(-) diff --git a/helm/agents/linkerd/templates/agent.yaml b/helm/agents/linkerd/templates/agent.yaml index 5fd76b914..5fd8b98b5 100644 --- a/helm/agents/linkerd/templates/agent.yaml +++ b/helm/agents/linkerd/templates/agent.yaml @@ -35,7 +35,7 @@ spec: - Service profiles and route-level configuration - Traffic splitting and canary-style rollouts - Linkerd-viz telemetry (stat, routes, tap, top, dashboard) - - Diagnostics via `linkerd check` and `linkerd diagnostics` + - Diagnostics via `linkerd check` and `linkerd diagnostics` subcommands (e.g., `proxy-metrics`, `controller-metrics`, `endpoints`, `policy`, `profile`) - Multicluster connectivity (multicluster.linkerd.io) - CNI and transparent proxying - Edge APIs (e.g., policy.linkerd.io, HTTPRoute integration) @@ -55,19 +55,22 @@ spec: - `k8s_patch_resource`: Apply safe, partial updates to existing resources 3. Linkerd Service Mesh Management: - - `linkerd_install`: Install or upgrade the Linkerd control plane (maps to `linkerd install` / `linkerd upgrade`) + - `linkerd_install`: Install the Linkerd control plane (maps to `linkerd install`, including CRDs and control-plane manifests) - `linkerd_install_cni`: Manage Linkerd CNI installation (`linkerd install-cni`) - - `linkerd_uninstall`: Generate resources to uninstall Linkerd (`linkerd uninstall`) + - `linkerd_upgrade`: Upgrade an existing Linkerd control plane (`linkerd upgrade`) + - `linkerd_uninstall`: Generate and apply manifests to uninstall Linkerd (`linkerd uninstall`) - `linkerd_check`: Run pre-install, control-plane, or proxy health checks (`linkerd check`) - - `linkerd_diagnostics`: Run diagnostics commands (`linkerd diagnostics`, e.g. endpoints, policy, proxy-metrics) - - `linkerd_authz`: Inspect Linkerd authorization state (`linkerd authz`, `linkerd viz authz`) - - `linkerd_identity`: Inspect workload certificates (`linkerd identity`) - - `linkerd_inject` / `linkerd_uninject`: Mutate Kubernetes configs to add/remove the Linkerd proxy (`linkerd inject`, `linkerd uninject`) + - `linkerd_version`: Get Linkerd CLI, control-plane, and proxy versions (`linkerd version`) + - `linkerd_authz`: Inspect Linkerd authorization state for a resource (`linkerd authz`) - `linkerd_profile`: Manage and generate service profiles (`linkerd profile`) - - `linkerd_multicluster`: Manage multicluster setup (`linkerd multicluster`) - - `linkerd_prune`: Output extraneous control-plane resources (`linkerd prune`) - - `linkerd_viz`: Manage the Linkerd-viz extension and observability (`linkerd viz` – stat, routes, tap, top, dashboard) - - `linkerd_version`: Get Linkerd CLI and control-plane version information (`linkerd version`) + - `linkerd_policy`: Manage Linkerd policy operations such as `linkerd policy generate` + - `linkerd_fips_audit`: Audit Linkerd proxies for FIPS compliance (`linkerd fips audit`) + - `linkerd_patch_workload_injection`: Patch Kubernetes workloads to enable, disable, or remove Linkerd proxy auto-injection by manipulating the `linkerd.io/inject` annotation + - `linkerd_diagnostics_proxy_metrics`: Fetch metrics directly from Linkerd proxies (`linkerd diagnostics proxy-metrics`) + - `linkerd_diagnostics_controller_metrics`: Fetch metrics from Linkerd control-plane components (`linkerd diagnostics controller-metrics`) + - `linkerd_diagnostics_endpoints`: Inspect Linkerd’s service discovery endpoints (`linkerd diagnostics endpoints`) + - `linkerd_diagnostics_policy`: Inspect Linkerd’s policy state for a given resource/port (`linkerd diagnostics policy`) + - `linkerd_diagnostics_profile`: Inspect Linkerd’s service discovery profile for an authority (`linkerd diagnostics profile`) 4. Documentation and Information: - `query_documentation`: Query documentation and best practices across Kubernetes and Linkerd edge @@ -82,7 +85,7 @@ spec: 2. Execution Strategy - Use read-only operations first for information gathering - - Prefer `linkerd check` and `linkerd diagnostics` for safe validation + - Prefer `linkerd check` and diagnostics subcommands (`linkerd diagnostics proxy-metrics`, `controller-metrics`, `endpoints`, `policy`, `profile`) for safe validation - Validate planned changes before execution - Implement changes incrementally when possible - Verify results after each significant change @@ -110,7 +113,7 @@ spec: - Validate identity and trust anchors before modifying mTLS/CA - Apply policy.linkerd.io changes incrementally and test in non-critical namespaces first - Gradually roll out traffic-splitting and canary configurations - - Prefer `linkerd viz` commands to observe impact before and after changes + - Prefer diagnostics (`linkerd diagnostics proxy-metrics`, `endpoints`, `policy`, `profile`) and `linkerd authz` to observe impact before and after changes - Maintain fallback configurations and be ready to rollback Best Practices: @@ -124,6 +127,7 @@ spec: 2. Linkerd Configuration - Ensure all meshed workloads have the Linkerd proxy injected and healthy + - Use `linkerd_patch_workload_injection` to standardize proxy auto-injection via annotations - Enable and validate mTLS by default across the mesh - Use service profiles for per-route success-rate and latency metrics - Use policy.linkerd.io resources (e.g. AuthorizationPolicy, MeshTLSAuthentication) for fine-grained access control @@ -158,6 +162,7 @@ spec: - Performance degradation visible in `linkerd viz stat` or `routes` - Multicluster connectivity issues (service mirroring, gateways) - Observability gaps in linkerd-viz (missing metrics, tap/edges/stat anomalies) + - Diagnostics using `linkerd diagnostics proxy-metrics`, `endpoints`, `policy`, and `profile` for deep inspection Your primary goal is to provide expert assistance with Kubernetes and Linkerd (edge) environments by leveraging your specialized tools while following best practices for safety, reliability, and performance. Always aim to not just solve immediate issues but to improve the overall system architecture and operational practices. @@ -181,22 +186,23 @@ spec: - linkerd_upgrade - linkerd_uninstall - linkerd_check - - linkerd_diagnostics + - linkerd_version - linkerd_authz - - linkerd_identity - - linkerd_inject - - linkerd_uninject - linkerd_profile - - linkerd_multicluster - - linkerd_prune - - linkerd_viz - - linkerd_version + - linkerd_policy + - linkerd_fips_audit + - linkerd_patch_workload_injection + - linkerd_diagnostics_proxy_metrics + - linkerd_diagnostics_controller_metrics + - linkerd_diagnostics_endpoints + - linkerd_diagnostics_policy + - linkerd_diagnostics_profile - query_documentation a2aConfig: skills: - id: linkerd-service-mesh-configuration name: Linkerd Service Mesh Configuration - description: Manages Linkerd control plane, data plane, CNI, multicluster, and extensions (such as linkerd-viz). Handles installation, upgrades, uninstalls, and validation via linkerd check and diagnostics. + description: Manages Linkerd control plane, data plane, CNI, FIPS-enabled setups, and extensions (such as linkerd-viz). Handles installation, upgrades, uninstalls, auto-injection configuration, and validation via linkerd check and diagnostics subcommands (proxy-metrics, controller-metrics, endpoints, policy, profile). tags: - linkerd - service-mesh @@ -214,6 +220,8 @@ spec: - "Install the linkerd-viz extension and verify it with linkerd viz check." - "Set up multicluster connectivity between 'cluster-a' and 'cluster-b'." - "Generate a service profile for the 'backend' service and apply it." + - "Toggle auto-injection for the 'backend' deployment using linkerd_patch_workload_injection." + - "Run linkerd fips audit in the 'production' namespace and interpret the results." - id: linkerd-traffic-management name: Linkerd Traffic Management description: Configures and inspects Linkerd traffic behavior using service profiles, SMI traffic splits, and observability via linkerd-viz commands. @@ -233,7 +241,7 @@ spec: - "Inspect outbound traffic from the 'orders' deployment and identify any failing routes." - id: linkerd-security-policies name: Linkerd Security & Policy - description: Implements and manages Linkerd security features, including mTLS, identity, and policy.linkerd.io resources for fine-grained access control. + description: Implements and manages Linkerd security features, including mTLS, identity, and policy.linkerd.io resources for fine-grained access control. Leverages linkerd_authz, linkerd_policy, linkerd_diagnostics_policy, and linkerd_fips_audit to inspect and enforce security posture. tags: - linkerd - security @@ -248,9 +256,11 @@ spec: - "Help design MeshTLSAuthentication and AuthorizationPolicy resources for my 'payments' namespace." - "Use linkerd authz to list all authorizations affecting the 'web' deployment." - "Rotate the Linkerd trust anchor and verify the mesh health afterward." + - "Run linkerd policy generate for the 'payments' namespace and explain the suggested policy." + - "Use linkerd diagnostics policy to inspect the effective policy for svc/payments on port 8080." - id: linkerd-observability-troubleshooting name: Linkerd Observability & Troubleshooting - description: Diagnoses issues within the Linkerd service mesh, inspects telemetry and metrics through linkerd-viz, and correlates them with Kubernetes resources to find and resolve problems. + description: Diagnoses issues within the Linkerd service mesh, inspects telemetry and metrics through linkerd-viz and diagnostics subcommands (proxy-metrics, controller-metrics, endpoints, profile), and correlates them with Kubernetes resources to find and resolve problems. tags: - linkerd - observability @@ -264,9 +274,10 @@ spec: examples: - "My requests to 'service-x' show high latency; use linkerd viz stat and routes to help troubleshoot." - "Tap traffic for the 'checkout' deployment and identify 5xx responses." - - "Run linkerd diagnostics to check proxy metrics for the 'web' deployment." + - "Run linkerd diagnostics proxy-metrics to check proxy metrics for the 'web' deployment." - "Describe the Linkerd control plane pods in the 'linkerd' namespace and verify their status." - "Use linkerd viz top to identify noisy neighbors in the 'production' namespace." + - "Use linkerd diagnostics endpoints to inspect service discovery for emoji-svc.emojivoto.svc.cluster.local:8080." - "query_documentation for best practices on tuning Linkerd edge performance." deployment: resources: From 0a69884ac4522f94799831dd1bcb9adfbbbc0f5d Mon Sep 17 00:00:00 2001 From: Brian Fox <878612+onematchfox@users.noreply.github.com> Date: Mon, 1 Dec 2025 18:34:45 +0100 Subject: [PATCH 03/17] feat(ui): add date/time to chat history in sidebar (#1143) Needed to manually copy/paste some HTML to get some older data to show what that looks like :) image Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> Signed-off-by: Ivan Porta --- ui/src/components/sidebars/ChatItem.tsx | 36 ++++++++++++++++++--- ui/src/components/sidebars/SessionGroup.tsx | 28 ++++++++-------- ui/src/components/ui/sidebar.tsx | 2 +- 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/ui/src/components/sidebars/ChatItem.tsx b/ui/src/components/sidebars/ChatItem.tsx index 4e32afefc..6b01371ed 100644 --- a/ui/src/components/sidebars/ChatItem.tsx +++ b/ui/src/components/sidebars/ChatItem.tsx @@ -22,22 +22,48 @@ interface ChatItemProps { agentNamespace?: string; sessionName?: string; onDownload?: (sessionId: string) => Promise; + createdAt?: string; } -const ChatItem = ({ sessionId, agentName, agentNamespace, onDelete, sessionName, onDownload }: ChatItemProps) => { +const ChatItem = ({ sessionId, agentName, agentNamespace, onDelete, sessionName, onDownload, createdAt }: ChatItemProps) => { const title = sessionName || "Untitled"; + + // Format timestamp based on how recent it is + const formatTime = (dateString?: string) => { + if (!dateString) return ""; + + const date = new Date(dateString); + + const now = new Date(); + const isToday = date.toDateString() === now.toDateString(); + + // For today: just show time (e.g., "2:30 PM" or "14:30" based on locale) + if (isToday) { + return date.toLocaleTimeString([], { hour: 'numeric', minute: '2-digit' }); + } + + // For older: show full date and time (e.g., "Nov 28, 2:30 PM" based on locale) + return date.toLocaleDateString([], { month: 'short', day: 'numeric' }) + ', ' + + date.toLocaleTimeString([], { hour: 'numeric', minute: '2-digit' }); + }; + return ( <> - - - {title} + + + {title} + {formatTime(createdAt)} - + More diff --git a/ui/src/components/sidebars/SessionGroup.tsx b/ui/src/components/sidebars/SessionGroup.tsx index e04fc6fdb..549fda0be 100644 --- a/ui/src/components/sidebars/SessionGroup.tsx +++ b/ui/src/components/sidebars/SessionGroup.tsx @@ -19,22 +19,20 @@ const ChatGroup = ({ title, sessions, onDeleteSession, onDownloadSession, agentN return ( - - - - - {title} - - + +
+ + {title} + - - - {sessions.map((session) => ( - - ))} - - - +
+ + + {sessions.map((session) => ( + + ))} + +
diff --git a/ui/src/components/ui/sidebar.tsx b/ui/src/components/ui/sidebar.tsx index af58fa0c0..916c4521d 100644 --- a/ui/src/components/ui/sidebar.tsx +++ b/ui/src/components/ui/sidebar.tsx @@ -522,7 +522,7 @@ const SidebarMenuItem = React.forwardRef< SidebarMenuItem.displayName = "SidebarMenuItem" const sidebarMenuButtonVariants = cva( - "peer/menu-button flex w-full items-center gap-2 overflow-hidden rounded-md p-2 text-left text-sm outline-none ring-sidebar-ring transition-[width,height,padding] hover:bg-sidebar-accent hover:text-sidebar-accent-foreground focus-visible:ring-2 active:bg-sidebar-accent active:text-sidebar-accent-foreground disabled:pointer-events-none disabled:opacity-50 group-has-[[data-sidebar=menu-action]]/menu-item:pr-8 aria-disabled:pointer-events-none aria-disabled:opacity-50 data-[active=true]:bg-sidebar-accent data-[active=true]:font-medium data-[active=true]:text-sidebar-accent-foreground data-[state=open]:hover:bg-sidebar-accent data-[state=open]:hover:text-sidebar-accent-foreground group-data-[collapsible=icon]:!size-8 group-data-[collapsible=icon]:!p-2 [&>span:last-child]:truncate [&>svg]:size-4 [&>svg]:shrink-0", + "peer/menu-button flex w-full items-start gap-2 overflow-hidden rounded-md p-2 text-left text-sm outline-none ring-sidebar-ring transition-[width,height,padding] hover:bg-sidebar-accent hover:text-sidebar-accent-foreground focus-visible:ring-2 active:bg-sidebar-accent active:text-sidebar-accent-foreground disabled:pointer-events-none disabled:opacity-50 group-has-[[data-sidebar=menu-action]]/menu-item:pr-8 aria-disabled:pointer-events-none aria-disabled:opacity-50 data-[active=true]:bg-sidebar-accent data-[active=true]:font-medium data-[active=true]:text-sidebar-accent-foreground data-[state=open]:hover:bg-sidebar-accent data-[state=open]:hover:text-sidebar-accent-foreground group-data-[collapsible=icon]:!size-8 group-data-[collapsible=icon]:!p-2 [&>span:last-child]:truncate [&>svg]:size-4 [&>svg]:shrink-0", { variants: { variant: { From 8e64b2a2d7f1ad8e1a8fe5e1c2d78c5187e15d02 Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Mon, 1 Dec 2025 13:54:48 -0500 Subject: [PATCH 04/17] Restart Agents Automatically On Secret Updates (#1121) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit note: unsure where the affinity template updates came from, but they get generated with the gen makefile target. maybe from #1085, but surprised it's generating on my pr 3 weeks after the merge 🤔 # Changes - Hashing secret alongside config-hash annotation for agent pod, so when a referenced secret updates, it restarts - Added `SecretHash` status on ModelConfig so that changes to underlying referenced secrets are propagated (resource version updates) to Agent reconciliation image With these changes… 1. When a Secret updates, a ModelConfig will update its status to reflect new hash. 2. ModelConfig updates resource version 3. The agent watching over modelConfig sees resource update 4. Agent reconciles, updating the annotation on the pod. 5. Agent restarts, loading in new secrets ## Golden Test Changes - Notes The outputs for golden test annotations have _not_ changed, because the annotation hash relies on the modelconfig status which has Secret updates (hash). Modelconfig needs to reconcile for status, and does not reconcile in test, so `byte{}` (no change) is written to the hash. # Context With the addition of TLS CA’s to ModelConfigs, it became apparent we’ll need a UX-friendly way for agents to update with the latest Secret (e.g. cert rotation, api key change) without requiring users to manually restart the agent. Note: We can’t rely on dynamic volume mounting, as the ca cert is read on agent start so that it configures the cached client. The api key also needed a way for its update to propagate to the agent. ## Demo _steps_ [agent restart validation steps.md](https://github.com/user-attachments/files/23664735/agent.restart.validation.steps.md) _video_ https://github.com/user-attachments/assets/eca62fb4-2ca2-45eb-94ba-7dfd0db5244b ## Alternative Solutions _feedback wanted_ ### Per-Secret Status Instead of hashing all secrets into a single final hash to store in the ModelConfig’s status, we could store a status per-Secret. For example, the status would change from:
```yaml status: […] SecretHash: XYZ ``` to something like ```yaml status: […] Secrets: APIKey: Hash/Version: 123 TLS: Hash/Version: 123 ``` I avoided this in order to simplify status tracking, less wordy compared to adding a field-per-secret - especially if we expand on referenced secrets in the future. But this manner does provide a better way for users to track where changes occurred exactly, and could avoid needing to do any hashing by using each secret’s resource version for updates. We would need to see _how_ we’d propagate this to the agent pod annotations: adding annotation-per-secret vs. doing a singular hash for the pod like we do for the status now. ### Avoiding Restart Requirement We should be able to avoid the restart needed for agents to configure the secrets. For instance, right now we mount a Volume for the TLS CA, and we use its file to configure the client at start which is cached. We could remove the client caching so that updated data from volume mounts are caught and used. Pros: - Avoiding restart requirement Cons: - Not caching the client would have some performance impact as it would need to be recreated per-call (maybe not a big deal, but noteworthy) - We won’t be able to do any validation checks like we do now on startup. --- Resolves #1091 --------- Signed-off-by: Fabian Gonzalez Signed-off-by: Ivan Porta --- go/api/v1alpha2/modelconfig_types.go | 2 + .../crd/bases/kagent.dev_modelconfigs.yaml | 6 + .../controller/modelconfig_controller.go | 28 ++- .../controller/reconciler/reconciler.go | 80 +++++++- .../controller/reconciler/reconciler_test.go | 193 ++++++++++++++++++ .../translator/agent/adk_api_translator.go | 90 ++++---- .../templates/kagent.dev_modelconfigs.yaml | 6 + 7 files changed, 358 insertions(+), 47 deletions(-) create mode 100644 go/internal/controller/reconciler/reconciler_test.go diff --git a/go/api/v1alpha2/modelconfig_types.go b/go/api/v1alpha2/modelconfig_types.go index c6ad1a453..4e9315730 100644 --- a/go/api/v1alpha2/modelconfig_types.go +++ b/go/api/v1alpha2/modelconfig_types.go @@ -317,6 +317,8 @@ type ModelConfigSpec struct { type ModelConfigStatus struct { Conditions []metav1.Condition `json:"conditions"` ObservedGeneration int64 `json:"observedGeneration"` + // The secret hash stores a hash of any secrets required by the model config (i.e. api key, tls cert) to ensure agents referencing this model config detect changes to these secrets and restart if necessary. + SecretHash string `json:"secretHash,omitempty"` } // +kubebuilder:object:root=true diff --git a/go/config/crd/bases/kagent.dev_modelconfigs.yaml b/go/config/crd/bases/kagent.dev_modelconfigs.yaml index 136994c0d..74153526d 100644 --- a/go/config/crd/bases/kagent.dev_modelconfigs.yaml +++ b/go/config/crd/bases/kagent.dev_modelconfigs.yaml @@ -718,6 +718,12 @@ spec: observedGeneration: format: int64 type: integer + secretHash: + description: The secret hash stores a hash of any secrets required + by the model config (i.e. api key, tls cert) to ensure agents referencing + this model config detect changes to these secrets and restart if + necessary. + type: string required: - conditions - observedGeneration diff --git a/go/internal/controller/modelconfig_controller.go b/go/internal/controller/modelconfig_controller.go index 43a477011..af414380c 100644 --- a/go/internal/controller/modelconfig_controller.go +++ b/go/internal/controller/modelconfig_controller.go @@ -104,17 +104,29 @@ func (r *ModelConfigController) findModelsUsingSecret(ctx context.Context, cl cl for i := range modelsList.Items { model := &modelsList.Items[i] - if model.Namespace != obj.Namespace { - continue - } - - if model.Spec.APIKeySecret == "" { - continue - } - if model.Spec.APIKeySecret == obj.Name { + if modelReferencesSecret(model, obj) { models = append(models, model) } } return models } + +func modelReferencesSecret(model *v1alpha2.ModelConfig, secretObj types.NamespacedName) bool { + // secrets must be in the same namespace as the model + if model.Namespace != secretObj.Namespace { + return false + } + + // check if secret is referenced as an APIKey + if model.Spec.APIKeySecret != "" && model.Spec.APIKeySecret == secretObj.Name { + return true + } + + // check if secret is referenced as a TLS CA certificate + if model.Spec.TLS != nil && model.Spec.TLS.CACertSecretRef != "" && model.Spec.TLS.CACertSecretRef == secretObj.Name { + return true + } + + return false +} diff --git a/go/internal/controller/reconciler/reconciler.go b/go/internal/controller/reconciler/reconciler.go index a9b542b77..d7fdb1bbd 100644 --- a/go/internal/controller/reconciler/reconciler.go +++ b/go/internal/controller/reconciler/reconciler.go @@ -2,9 +2,12 @@ package reconciler import ( "context" + "crypto/sha256" + "encoding/hex" "errors" "fmt" "reflect" + "sort" "sync" "github.com/hashicorp/go-multierror" @@ -211,6 +214,11 @@ func (a *kagentReconciler) ReconcileKagentMCPService(ctx context.Context, req ct return nil } +type secretRef struct { + NamespacedName types.NamespacedName + Secret *corev1.Secret +} + func (a *kagentReconciler) ReconcileKagentModelConfig(ctx context.Context, req ctrl.Request) error { modelConfig := &v1alpha2.ModelConfig{} if err := a.kube.Get(ctx, req.NamespacedName, modelConfig); err != nil { @@ -222,21 +230,79 @@ func (a *kagentReconciler) ReconcileKagentModelConfig(ctx context.Context, req c } var err error + var secrets []secretRef + + // check for api key secret if modelConfig.Spec.APIKeySecret != "" { secret := &corev1.Secret{} - if err = a.kube.Get(ctx, types.NamespacedName{Namespace: modelConfig.Namespace, Name: modelConfig.Spec.APIKeySecret}, secret); err != nil { - err = fmt.Errorf("failed to get secret %s: %v", modelConfig.Spec.APIKeySecret, err) + namespacedName := types.NamespacedName{Namespace: modelConfig.Namespace, Name: modelConfig.Spec.APIKeySecret} + + if kubeErr := a.kube.Get(ctx, namespacedName, secret); kubeErr != nil { + err = multierror.Append(err, fmt.Errorf("failed to get secret %s: %v", modelConfig.Spec.APIKeySecret, kubeErr)) + } else { + secrets = append(secrets, secretRef{ + NamespacedName: namespacedName, + Secret: secret, + }) + } + } + + // check for tls cert secret + if modelConfig.Spec.TLS != nil && modelConfig.Spec.TLS.CACertSecretRef != "" { + secret := &corev1.Secret{} + namespacedName := types.NamespacedName{Namespace: modelConfig.Namespace, Name: modelConfig.Spec.TLS.CACertSecretRef} + + if kubeErr := a.kube.Get(ctx, namespacedName, secret); kubeErr != nil { + err = multierror.Append(err, fmt.Errorf("failed to get secret %s: %v", modelConfig.Spec.TLS.CACertSecretRef, kubeErr)) + } else { + secrets = append(secrets, secretRef{ + NamespacedName: namespacedName, + Secret: secret, + }) } } + // compute the hash for the status + secretHash := computeStatusSecretHash(secrets) + return a.reconcileModelConfigStatus( ctx, modelConfig, err, + secretHash, ) } -func (a *kagentReconciler) reconcileModelConfigStatus(ctx context.Context, modelConfig *v1alpha2.ModelConfig, err error) error { +// computeStatusSecretHash computes a deterministic singular hash of the secrets the model config references for the status +// this loses per-secret context (i.e. versioning/hash status per-secret), but simplifies the number of statuses tracked +func computeStatusSecretHash(secrets []secretRef) string { + // sort secret references for deterministic output + sort.Slice(secrets, func(i, j int) bool { + return secrets[i].NamespacedName.String() < secrets[j].NamespacedName.String() + }) + + // compute a singular hash of the secrets + // this loses per-secret context (i.e. versioning/hash status per-secret), but simplifies the number of statuses tracked + hash := sha256.New() + for _, s := range secrets { + hash.Write([]byte(s.NamespacedName.String())) + + keys := make([]string, 0, len(s.Secret.Data)) + for k := range s.Secret.Data { + keys = append(keys, k) + } + sort.Strings(keys) + + for _, k := range keys { + hash.Write([]byte(k)) + hash.Write(s.Secret.Data[k]) + } + } + + return hex.EncodeToString(hash.Sum(nil)) +} + +func (a *kagentReconciler) reconcileModelConfigStatus(ctx context.Context, modelConfig *v1alpha2.ModelConfig, err error, secretHash string) error { var ( status metav1.ConditionStatus message string @@ -260,8 +326,14 @@ func (a *kagentReconciler) reconcileModelConfigStatus(ctx context.Context, model Message: message, }) + // check if the secret hash has changed + secretHashChanged := modelConfig.Status.SecretHash != secretHash + if secretHashChanged { + modelConfig.Status.SecretHash = secretHash + } + // update the status if it has changed or the generation has changed - if conditionChanged || modelConfig.Status.ObservedGeneration != modelConfig.Generation { + if conditionChanged || modelConfig.Status.ObservedGeneration != modelConfig.Generation || secretHashChanged { modelConfig.Status.ObservedGeneration = modelConfig.Generation if err := a.kube.Status().Update(ctx, modelConfig); err != nil { return fmt.Errorf("failed to update model config status: %v", err) diff --git a/go/internal/controller/reconciler/reconciler_test.go b/go/internal/controller/reconciler/reconciler_test.go new file mode 100644 index 000000000..92f04882f --- /dev/null +++ b/go/internal/controller/reconciler/reconciler_test.go @@ -0,0 +1,193 @@ +package reconciler + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" +) + +// TestComputeStatusSecretHash_Output verifies the output of the hash function +func TestComputeStatusSecretHash_Output(t *testing.T) { + tests := []struct { + name string + secrets []secretRef + want string + }{ + { + name: "no secrets", + secrets: []secretRef{}, + want: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", // i.e. the hash of an empty string + }, + { + name: "one secret, no keys", + secrets: []secretRef{ + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret1"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{}, + }, + }, + }, + want: "68a268d3f02147004cfa8b609966ec4cba7733f8c652edb80be8071eb1b91574", // because the secret exists, it still hashes the namespacedName + empty data + }, + { + name: "one secret, single key", + secrets: []secretRef{ + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret1"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key1": []byte("value1")}, + }, + }, + }, + want: "62dc22ecd609281a5939efd60fae775e6b75b641614c523c400db994a09902ff", + }, + { + name: "one secret, multiple keys", + secrets: []secretRef{ + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret1"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key1": []byte("value1"), "key2": []byte("value2")}, + }, + }, + }, + want: "ba6798ec591d129f78322cdae569eaccdb2f5a8343c12026f0ed6f4e156cd52e", + }, + { + name: "multiple secrets", + secrets: []secretRef{ + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret1"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key1": []byte("value1")}, + }, + }, + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret2"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key2": []byte("value2")}, + }, + }, + }, + want: "f174f0e21a4427a87a23e4f277946a27f686d023cbe42f3000df94a4df94f7b5", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := computeStatusSecretHash(tt.secrets) + assert.Equal(t, tt.want, got) + }) + } +} + +// TestComputeStatusSecretHash_Deterministic tests that the resultant hash is deterministic, specifically that ordering of keys and secrets does not matter +func TestComputeStatusSecretHash_Deterministic(t *testing.T) { + tests := []struct { + name string + secrets [2][]secretRef + expectedEqual bool + }{ + { + name: "key ordering should not matter", + secrets: [2][]secretRef{ + { + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret1"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key1": []byte("value1"), "key2": []byte("value2")}, + }, + }, + }, + { + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret1"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key2": []byte("value2"), "key1": []byte("value1")}, + }, + }, + }, + }, + expectedEqual: true, + }, + { + name: "secret ordering should not matter", + secrets: [2][]secretRef{ + { + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret1"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key1": []byte("value1")}, + }, + }, + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret2"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key1": []byte("value1")}, + }, + }, + }, + { + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret2"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key1": []byte("value1")}, + }, + }, + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret1"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key1": []byte("value1")}, + }, + }, + }, + }, + expectedEqual: true, + }, + { + name: "secret and key ordering should not matter", + secrets: [2][]secretRef{ + { + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret1"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key1": []byte("value1"), "key2": []byte("value2")}, + }, + }, + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret2"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key2": []byte("value2"), "key1": []byte("value1")}, + }, + }, + }, + { + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret2"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key1": []byte("value1"), "key2": []byte("value2")}, + }, + }, + { + NamespacedName: types.NamespacedName{Namespace: "test", Name: "secret1"}, + Secret: &corev1.Secret{ + Data: map[string][]byte{"key2": []byte("value2"), "key1": []byte("value1")}, + }, + }, + }, + }, + expectedEqual: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got1 := computeStatusSecretHash(tt.secrets[0]) + got2 := computeStatusSecretHash(tt.secrets[1]) + assert.Equal(t, tt.expectedEqual, got1 == got2) + }) + } +} diff --git a/go/internal/controller/translator/agent/adk_api_translator.go b/go/internal/controller/translator/agent/adk_api_translator.go index adcee86cc..65cb1701b 100644 --- a/go/internal/controller/translator/agent/adk_api_translator.go +++ b/go/internal/controller/translator/agent/adk_api_translator.go @@ -4,6 +4,7 @@ import ( "context" "crypto/sha256" "encoding/binary" + "encoding/hex" "encoding/json" "errors" "fmt" @@ -120,7 +121,7 @@ func (a *adkApiTranslator) TranslateAgent( switch agent.Spec.Type { case v1alpha2.AgentType_Declarative: - cfg, card, mdd, err := a.translateInlineAgent(ctx, agent) + cfg, card, mdd, secretHashBytes, err := a.translateInlineAgent(ctx, agent) if err != nil { return nil, err } @@ -128,7 +129,7 @@ func (a *adkApiTranslator) TranslateAgent( if err != nil { return nil, err } - return a.buildManifest(ctx, agent, dep, cfg, card) + return a.buildManifest(ctx, agent, dep, cfg, card, secretHashBytes) case v1alpha2.AgentType_BYO: @@ -151,7 +152,7 @@ func (a *adkApiTranslator) TranslateAgent( DefaultInputModes: []string{"text"}, DefaultOutputModes: []string{"text"}, } - return a.buildManifest(ctx, agent, dep, nil, agentCard) + return a.buildManifest(ctx, agent, dep, nil, agentCard, nil) default: return nil, fmt.Errorf("unknown agent type: %s", agent.Spec.Type) @@ -234,11 +235,12 @@ func (a *adkApiTranslator) buildManifest( dep *resolvedDeployment, cfg *adk.AgentConfig, // nil for BYO card *server.AgentCard, // nil for BYO + modelConfigSecretHashBytes []byte, // nil for BYO ) (*AgentOutputs, error) { outputs := &AgentOutputs{} // Optional config/card for Inline - var configHash uint64 + var cfgHash uint64 var secretVol []corev1.Volume var secretMounts []corev1.VolumeMount var cfgJson string @@ -252,7 +254,12 @@ func (a *adkApiTranslator) buildManifest( if err != nil { return nil, err } - configHash = computeConfigHash(bCfg, bCard) + // Include secret hash bytes in config hash to trigger redeployment on secret changes + secretData := modelConfigSecretHashBytes + if secretData == nil { + secretData = []byte{} + } + cfgHash = computeConfigHash(bCfg, bCard, secretData) cfgJson = string(bCfg) agentCard = string(bCard) @@ -411,8 +418,9 @@ func (a *adkApiTranslator) buildManifest( if podTemplateAnnotations == nil { podTemplateAnnotations = map[string]string{} } - // Add config hash annotation to pod template to force rollout on config changes - podTemplateAnnotations["kagent.dev/config-hash"] = fmt.Sprintf("%d", configHash) + // Add hash annotations to pod template to force rollout on agent config or model config secret changes + podTemplateAnnotations["kagent.dev/config-hash"] = fmt.Sprintf("%d", cfgHash) + var securityContext *corev1.SecurityContext if needSandbox { securityContext = &corev1.SecurityContext{ @@ -500,16 +508,16 @@ func (a *adkApiTranslator) buildManifest( return outputs, a.runPlugins(ctx, agent, outputs) } -func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1alpha2.Agent) (*adk.AgentConfig, *server.AgentCard, *modelDeploymentData, error) { +func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1alpha2.Agent) (*adk.AgentConfig, *server.AgentCard, *modelDeploymentData, []byte, error) { - model, mdd, err := a.translateModel(ctx, agent.Namespace, agent.Spec.Declarative.ModelConfig) + model, mdd, secretHashBytes, err := a.translateModel(ctx, agent.Namespace, agent.Spec.Declarative.ModelConfig) if err != nil { - return nil, nil, nil, err + return nil, nil, nil, nil, err } systemMessage, err := a.resolveSystemMessage(ctx, agent) if err != nil { - return nil, nil, nil, err + return nil, nil, nil, nil, err } cfg := &adk.AgentConfig{ @@ -545,7 +553,7 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1al case tool.McpServer != nil: err := a.translateMCPServerTarget(ctx, cfg, agent.Namespace, tool.McpServer, tool.HeadersFrom) if err != nil { - return nil, nil, nil, err + return nil, nil, nil, nil, err } case tool.Agent != nil: agentRef := types.NamespacedName{ @@ -554,14 +562,14 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1al } if agentRef.Namespace == agent.Namespace && agentRef.Name == agent.Name { - return nil, nil, nil, fmt.Errorf("agent tool cannot be used to reference itself, %s", agentRef) + return nil, nil, nil, nil, fmt.Errorf("agent tool cannot be used to reference itself, %s", agentRef) } // Translate a nested tool toolAgent := &v1alpha2.Agent{} err := a.kube.Get(ctx, agentRef, toolAgent) if err != nil { - return nil, nil, nil, err + return nil, nil, nil, nil, err } switch toolAgent.Spec.Type { @@ -569,7 +577,7 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1al url := fmt.Sprintf("http://%s.%s:8080", toolAgent.Name, toolAgent.Namespace) headers, err := tool.ResolveHeaders(ctx, a.kube, agent.Namespace) if err != nil { - return nil, nil, nil, err + return nil, nil, nil, nil, err } cfg.RemoteAgents = append(cfg.RemoteAgents, adk.RemoteAgentConfig{ @@ -579,15 +587,15 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1al Description: toolAgent.Spec.Description, }) default: - return nil, nil, nil, fmt.Errorf("unknown agent type: %s", toolAgent.Spec.Type) + return nil, nil, nil, nil, fmt.Errorf("unknown agent type: %s", toolAgent.Spec.Type) } default: - return nil, nil, nil, fmt.Errorf("tool must have a provider or tool server") + return nil, nil, nil, nil, fmt.Errorf("tool must have a provider or tool server") } } - return cfg, agentCard, mdd, nil + return cfg, agentCard, mdd, secretHashBytes, nil } func (a *adkApiTranslator) resolveSystemMessage(ctx context.Context, agent *v1alpha2.Agent) (string, error) { @@ -655,11 +663,21 @@ func addTLSConfiguration(modelDeploymentData *modelDeploymentData, tlsConfig *v1 } } -func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelConfig string) (adk.Model, *modelDeploymentData, error) { +func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelConfig string) (adk.Model, *modelDeploymentData, []byte, error) { model := &v1alpha2.ModelConfig{} err := a.kube.Get(ctx, types.NamespacedName{Namespace: namespace, Name: modelConfig}, model) if err != nil { - return nil, nil, err + return nil, nil, nil, err + } + + // Decode hex-encoded secret hash to bytes + var secretHashBytes []byte + if model.Status.SecretHash != "" { + decoded, err := hex.DecodeString(model.Status.SecretHash) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to decode secret hash: %w", err) + } + secretHashBytes = decoded } modelDeploymentData := &modelDeploymentData{} @@ -722,7 +740,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC }) } } - return openai, modelDeploymentData, nil + return openai, modelDeploymentData, secretHashBytes, nil case v1alpha2.ModelProviderAnthropic: if model.Spec.APIKeySecret != "" { modelDeploymentData.EnvVars = append(modelDeploymentData.EnvVars, corev1.EnvVar{ @@ -749,10 +767,10 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC if model.Spec.Anthropic != nil { anthropic.BaseUrl = model.Spec.Anthropic.BaseURL } - return anthropic, modelDeploymentData, nil + return anthropic, modelDeploymentData, secretHashBytes, nil case v1alpha2.ModelProviderAzureOpenAI: if model.Spec.AzureOpenAI == nil { - return nil, nil, fmt.Errorf("AzureOpenAI model config is required") + return nil, nil, nil, fmt.Errorf("AzureOpenAI model config is required") } modelDeploymentData.EnvVars = append(modelDeploymentData.EnvVars, corev1.EnvVar{ Name: "AZURE_OPENAI_API_KEY", @@ -792,10 +810,10 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC // Populate TLS fields in BaseModel populateTLSFields(&azureOpenAI.BaseModel, model.Spec.TLS) - return azureOpenAI, modelDeploymentData, nil + return azureOpenAI, modelDeploymentData, secretHashBytes, nil case v1alpha2.ModelProviderGeminiVertexAI: if model.Spec.GeminiVertexAI == nil { - return nil, nil, fmt.Errorf("GeminiVertexAI model config is required") + return nil, nil, nil, fmt.Errorf("GeminiVertexAI model config is required") } modelDeploymentData.EnvVars = append(modelDeploymentData.EnvVars, corev1.EnvVar{ Name: "GOOGLE_CLOUD_PROJECT", @@ -836,10 +854,10 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC // Populate TLS fields in BaseModel populateTLSFields(&gemini.BaseModel, model.Spec.TLS) - return gemini, modelDeploymentData, nil + return gemini, modelDeploymentData, secretHashBytes, nil case v1alpha2.ModelProviderAnthropicVertexAI: if model.Spec.AnthropicVertexAI == nil { - return nil, nil, fmt.Errorf("AnthropicVertexAI model config is required") + return nil, nil, nil, fmt.Errorf("AnthropicVertexAI model config is required") } modelDeploymentData.EnvVars = append(modelDeploymentData.EnvVars, corev1.EnvVar{ Name: "GOOGLE_CLOUD_PROJECT", @@ -876,10 +894,10 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC // Populate TLS fields in BaseModel populateTLSFields(&anthropic.BaseModel, model.Spec.TLS) - return anthropic, modelDeploymentData, nil + return anthropic, modelDeploymentData, secretHashBytes, nil case v1alpha2.ModelProviderOllama: if model.Spec.Ollama == nil { - return nil, nil, fmt.Errorf("ollama model config is required") + return nil, nil, nil, fmt.Errorf("ollama model config is required") } modelDeploymentData.EnvVars = append(modelDeploymentData.EnvVars, corev1.EnvVar{ Name: "OLLAMA_API_BASE", @@ -894,7 +912,7 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC // Populate TLS fields in BaseModel populateTLSFields(&ollama.BaseModel, model.Spec.TLS) - return ollama, modelDeploymentData, nil + return ollama, modelDeploymentData, secretHashBytes, nil case v1alpha2.ModelProviderGemini: modelDeploymentData.EnvVars = append(modelDeploymentData.EnvVars, corev1.EnvVar{ Name: "GOOGLE_API_KEY", @@ -916,9 +934,10 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC // Populate TLS fields in BaseModel populateTLSFields(&gemini.BaseModel, model.Spec.TLS) - return gemini, modelDeploymentData, nil + return gemini, modelDeploymentData, secretHashBytes, nil } - return nil, nil, fmt.Errorf("unknown model provider: %s", model.Spec.Provider) + + return nil, nil, nil, fmt.Errorf("unknown model provider: %s", model.Spec.Provider) } func (a *adkApiTranslator) translateStreamableHttpTool(ctx context.Context, tool *v1alpha2.RemoteMCPServerSpec, namespace string) (*adk.StreamableHTTPConnectionParams, error) { @@ -1125,10 +1144,11 @@ func (a *adkApiTranslator) translateRemoteMCPServerTarget(ctx context.Context, a // Helper functions -func computeConfigHash(config, card []byte) uint64 { +func computeConfigHash(agentCfg, agentCard, secretData []byte) uint64 { hasher := sha256.New() - hasher.Write(config) - hasher.Write(card) + hasher.Write(agentCfg) + hasher.Write(agentCard) + hasher.Write(secretData) hash := hasher.Sum(nil) return binary.BigEndian.Uint64(hash[:8]) } diff --git a/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml b/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml index 136994c0d..74153526d 100644 --- a/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml +++ b/helm/kagent-crds/templates/kagent.dev_modelconfigs.yaml @@ -718,6 +718,12 @@ spec: observedGeneration: format: int64 type: integer + secretHash: + description: The secret hash stores a hash of any secrets required + by the model config (i.e. api key, tls cert) to ensure agents referencing + this model config detect changes to these secrets and restart if + necessary. + type: string required: - conditions - observedGeneration From af6adabec4479623dffacd01c9b55d6a8b8ceef4 Mon Sep 17 00:00:00 2001 From: Brian Fox <878612+onematchfox@users.noreply.github.com> Date: Mon, 1 Dec 2025 19:56:23 +0100 Subject: [PATCH 05/17] fix(controller): ensure upsert works on both Postgres and SQLite (#1137) Split this out of #1133 to try reduce the size of that PR - but also because it's not strictly related to being able to scale the controller - it simply manifested when needing to switch to postgres when running multiple controller replicas. Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> Signed-off-by: Ivan Porta --- go/internal/database/service.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/go/internal/database/service.go b/go/internal/database/service.go index d8617b876..9c4c843b0 100644 --- a/go/internal/database/service.go +++ b/go/internal/database/service.go @@ -4,6 +4,7 @@ import ( "fmt" "gorm.io/gorm" + "gorm.io/gorm/clause" ) type Model interface { @@ -45,16 +46,15 @@ func get[T Model](db *gorm.DB, clauses ...Clause) (*T, error) { return &model, nil } -// TODO: Make this upsert actually idempotent +// save performs an upsert operation (INSERT ON CONFLICT DO UPDATE) // args: // - db: the database connection // - model: the model to save func save[T Model](db *gorm.DB, model *T) error { - if err := db.Create(model).Error; err != nil { - if err == gorm.ErrDuplicatedKey { - return db.Save(model).Error - } - return fmt.Errorf("failed to create model: %w", err) + if err := db.Clauses(clause.OnConflict{ + UpdateAll: true, + }).Create(model).Error; err != nil { + return fmt.Errorf("failed to upsert model: %w", err) } return nil } From 3315749182c3e4067e4864bcae8b4381c58b2f46 Mon Sep 17 00:00:00 2001 From: Brian Fox <878612+onematchfox@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:34:38 +0100 Subject: [PATCH 06/17] fix(helm): remove sqlite volume+mount if database is postgres (#1140) Another artifact of #1133. No need for the sqlite volume+mount when database is set to postgres. Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> Signed-off-by: Ivan Porta --- .../templates/controller-deployment.yaml | 10 +++- .../tests/controller-deployment_test.yaml | 49 ++++++++++++++++++- helm/kagent/values.yaml | 18 +++---- 3 files changed, 66 insertions(+), 11 deletions(-) diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index 2044c332f..e7e0e6436 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -26,14 +26,18 @@ spec: securityContext: {{- toYaml (.Values.controller.podSecurityContext | default .Values.podSecurityContext) | nindent 8 }} serviceAccountName: {{ include "kagent.fullname" . }}-controller + {{- if or (eq .Values.database.type "sqlite") (gt (len .Values.controller.volumes) 0) }} volumes: + {{- if eq .Values.database.type "sqlite" }} - name: sqlite-volume emptyDir: sizeLimit: 500Mi medium: Memory + {{- end }} {{- with .Values.controller.volumes }} {{- toYaml . | nindent 6 }} {{- end }} + {{- end }} {{- with .Values.controller.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} @@ -127,9 +131,13 @@ spec: path: /health port: http periodSeconds: 30 + {{- if or (eq .Values.database.type "sqlite") (gt (len .Values.controller.volumeMounts) 0) }} volumeMounts: + {{- if eq .Values.database.type "sqlite" }} - name: sqlite-volume mountPath: /sqlite-volume + {{- end }} {{- with .Values.controller.volumeMounts }} {{- toYaml . | nindent 12 }} - {{- end }} \ No newline at end of file + {{- end }} + {{- end }} diff --git a/helm/kagent/tests/controller-deployment_test.yaml b/helm/kagent/tests/controller-deployment_test.yaml index f62fdef9f..9112db6e6 100644 --- a/helm/kagent/tests/controller-deployment_test.yaml +++ b/helm/kagent/tests/controller-deployment_test.yaml @@ -150,4 +150,51 @@ tests: key: role value: AI effect: NoSchedule - operator: Equal \ No newline at end of file + operator: Equal + + - it: should not render sqlite volume and mount when using postgres with extra volumes + template: controller-deployment.yaml + set: + database: + type: postgres + controller: + volumes: + - name: extra-data + emptyDir: {} + volumeMounts: + - name: extra-data + mountPath: /extra + asserts: + # volumes block should exist due to provided extra volumes + - contains: + path: spec.template.spec.volumes + content: + name: extra-data + emptyDir: {} + # sqlite volume must not be present + - notContains: + path: spec.template.spec.volumes + content: + name: sqlite-volume + # volumeMounts block should include our extra mount + - contains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: extra-data + mountPath: /extra + # sqlite volume mount must not be present + - notContains: + path: spec.template.spec.containers[0].volumeMounts + content: + name: sqlite-volume + + - it: should not render volumes or volumeMounts sections when using postgres without extra volumes or mounts + template: controller-deployment.yaml + set: + database: + type: postgres + asserts: + - isNull: + path: spec.template.spec.volumes + - isNull: + path: spec.template.spec.containers[0].volumeMounts \ No newline at end of file diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index 7f5257962..c6aaba486 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -105,18 +105,18 @@ controller: targetPort: 8083 env: [] -# Additional volumes on the output Deployment definition. + # Additional volumes on the output Deployment definition. volumes: [] -# - name: foo -# secret: -# secretName: mysecret -# optional: false + # - name: foo + # secret: + # secretName: mysecret + # optional: false -# Additional volumeMounts on the output Deployment definition. + # Additional volumeMounts on the output Deployment definition. volumeMounts: [] -# - name: foo -# mountPath: "/etc/foo" -# readOnly: true + # - name: foo + # mountPath: "/etc/foo" + # readOnly: true # ============================================================================== # UI CONFIGURATION From e9691b02df64f8f64ead3103042e2d351ac4528b Mon Sep 17 00:00:00 2001 From: Brian Fox <878612+onematchfox@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:42:29 +0100 Subject: [PATCH 07/17] fix(helm): add error when trying to scale controller with local SQLlite database (#1144) Running multiple controller replicas when using a local SQLite database will lead to errors as API requests will inevitably end up being handled by a replicas that does not have the local state (e.g. A2A session). This check/error hopefully prevents users from making this mistake. Split out from #1133 Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> Signed-off-by: Ivan Porta --- helm/kagent/templates/_helpers.tpl | 9 +++++++++ helm/kagent/templates/controller-deployment.yaml | 1 + helm/kagent/tests/controller-deployment_test.yaml | 15 ++++++++++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/helm/kagent/templates/_helpers.tpl b/helm/kagent/templates/_helpers.tpl index 8fd55ade6..73a933553 100644 --- a/helm/kagent/templates/_helpers.tpl +++ b/helm/kagent/templates/_helpers.tpl @@ -106,3 +106,12 @@ Engine labels {{ include "kagent.labels" . }} app.kubernetes.io/component: engine {{- end }} + +{{/* +Validate controller configuration +*/}} +{{- define "kagent.validateController" -}} +{{- if and (gt (.Values.controller.replicas | int) 1) (eq .Values.database.type "sqlite") -}} +{{- fail "ERROR: controller.replicas cannot be greater than 1 when database.type is 'sqlite' as the SQLite database is local to the pod. Please either set controller.replicas to 1 or change database.type to 'postgres'." }} +{{- end -}} +{{- end -}} diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index e7e0e6436..10ffb4bdd 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -1,3 +1,4 @@ +{{- include "kagent.validateController" . -}} apiVersion: apps/v1 kind: Deployment metadata: diff --git a/helm/kagent/tests/controller-deployment_test.yaml b/helm/kagent/tests/controller-deployment_test.yaml index 9112db6e6..6918b9d07 100644 --- a/helm/kagent/tests/controller-deployment_test.yaml +++ b/helm/kagent/tests/controller-deployment_test.yaml @@ -16,11 +16,24 @@ tests: - hasDocuments: count: 1 - - it: should render controller deployment with custom replica count + - it: should fail when replicas > 1 and database type is sqlite template: controller-deployment.yaml set: controller: replicas: 3 + database: + type: sqlite + asserts: + - failedTemplate: + errorMessage: "ERROR: controller.replicas cannot be greater than 1 when database.type is 'sqlite' as the SQLite database is local to the pod. Please either set controller.replicas to 1 or change database.type to 'postgres'." + + - it: should render controller deployment with custom replica count if database type is postgres + template: controller-deployment.yaml + set: + controller: + replicas: 3 + database: + type: postgres asserts: - equal: path: spec.replicas From 7eeab5cc8bd08826bebfcb75565e38e6e7038666 Mon Sep 17 00:00:00 2001 From: Brian Fox <878612+onematchfox@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:45:08 +0100 Subject: [PATCH 08/17] Enable use of postgres in local development (#1145) Enables local testing using postgres as a backing store for controller. Split out from #1133 (with added docs). --------- Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> Signed-off-by: Eitan Yarmush Co-authored-by: Eitan Yarmush Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Ivan Porta --- DEVELOPMENT.md | 44 +++++++++++++++++++++ Makefile | 18 ++++----- contrib/addons/postgres.yaml | 74 ++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 9 deletions(-) create mode 100644 contrib/addons/postgres.yaml diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 6a4a0a294..ce398e2b1 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -78,6 +78,50 @@ kubectl port-forward svc/kagent-ui 8001:80 Then open your browser and go to `http://localhost:8001`. +### Addons + +Optional addons are available to enhance your development environment with +observability and infrastructure components. + +**Prerequisites:** Complete steps 1-3 above (cluster creation and environment variables). + +To install all addons: + +```shell +make kagent-addon-install +``` + +This installs the following components into your cluster: + +| Addon | Description | Namespace | +|----------------|-----------------------------------------------------|-----------| +| Istio | Service mesh (demo profile) | `istio-system` | +| Grafana | Dashboards and visualization | `kagent` | +| Prometheus | Metrics collection | `kagent` | +| Metrics Server | Kubernetes resource metrics | `kube-system` | +| Postgres | Relational database (for kagent controller storage) | `kagent` | + +#### Using Postgres as the Datastore + +By default, kagent uses a local SQLite database for data persistence. To use +postgres as the backing store instead, deploy kagent via: + +> **Warning:** +> The following example uses hardcoded Postgres credentials (`postgres:kagent`) for local development only. +> **Do not use these credentials in production environments.** +```shell +KAGENT_HELM_EXTRA_ARGS="--set database.type=postgres --set database.postgres.url=postgres://postgres:kagent@postgres.kagent.svc.cluster.local:5432/kagent" \ + make helm-install +``` + +Verify the connection by checking the controller logs: + +```shell +kubectl logs -n kagent deployment/kagent-controller | grep -i postgres +``` + +**To revert to SQLite:** Run `make helm-install` without the `KAGENT_HELM_EXTRA_ARGS` variable. + ### Troubleshooting ### buildx localhost access diff --git a/Makefile b/Makefile index e27c0ffcc..6142abca7 100644 --- a/Makefile +++ b/Makefile @@ -377,16 +377,16 @@ kagent-ui-port-forward: use-kind-cluster .PHONY: kagent-addon-install kagent-addon-install: use-kind-cluster - #to test the kagent addons - installing istio, grafana, prometheus, metrics-server + # to test the kagent addons - installing istio, grafana, prometheus, metrics-server istioctl install --set profile=demo -y - kubectl apply -f contrib/addons/grafana.yaml - kubectl apply -f contrib/addons/prometheus.yaml - kubectl apply -f contrib/addons/metrics-server.yaml - #wait for pods to be ready - kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=grafana -n kagent --timeout=60s - kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus -n kagent --timeout=60s - #port forward grafana service - kubectl port-forward svc/grafana 3000:3000 -n kagent + kubectl apply --context kind-$(KIND_CLUSTER_NAME) -f contrib/addons/grafana.yaml + kubectl apply --context kind-$(KIND_CLUSTER_NAME) -f contrib/addons/postgres.yaml + kubectl apply --context kind-$(KIND_CLUSTER_NAME) -f contrib/addons/prometheus.yaml + kubectl apply --context kind-$(KIND_CLUSTER_NAME) -f contrib/addons/metrics-server.yaml + # wait for pods to be ready + kubectl wait --context kind-$(KIND_CLUSTER_NAME) --for=condition=Ready pod -l app.kubernetes.io/name=grafana -n kagent --timeout=60s + kubectl wait --context kind-$(KIND_CLUSTER_NAME) --for=condition=Ready pod -l app.kubernetes.io/name=postgres -n kagent --timeout=60s + kubectl wait --context kind-$(KIND_CLUSTER_NAME) --for=condition=Ready pod -l app.kubernetes.io/name=prometheus -n kagent --timeout=60s .PHONY: open-dev-container open-dev-container: diff --git a/contrib/addons/postgres.yaml b/contrib/addons/postgres.yaml new file mode 100644 index 000000000..780a3e418 --- /dev/null +++ b/contrib/addons/postgres.yaml @@ -0,0 +1,74 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgres + namespace: kagent +data: + POSTGRES_DB: kagent + POSTGRES_USER: postgres + POSTGRES_PASSWORD: kagent +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: postgres + namespace: kagent +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 500Mi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres + namespace: kagent +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: postgres + template: + metadata: + labels: + app.kubernetes.io/name: postgres + spec: + containers: + - name: postgres + image: postgres:18-alpine + ports: + - containerPort: 5432 + envFrom: + - configMapRef: + name: postgres + volumeMounts: + - name: postgres-storage + mountPath: /var/lib/postgresql + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "512Mi" + cpu: "500m" + volumes: + - name: postgres-storage + persistentVolumeClaim: + claimName: postgres +--- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: kagent +spec: + selector: + app.kubernetes.io/name: postgres + ports: + - port: 5432 + targetPort: 5432 + type: ClusterIP + From ffeb9bbe12801d6e6be43721435c8e89284d96c4 Mon Sep 17 00:00:00 2001 From: Brian Fox <878612+onematchfox@users.noreply.github.com> Date: Tue, 2 Dec 2025 15:50:39 +0100 Subject: [PATCH 09/17] Use environment variables for controller configuration (#1139) **Yet another PR split out from #1133 to try reduce review burden** - keeping that one open for now as all of these other PRs are ultimately working towards that goal. This PR refactors the kagent controller to support the use of environment variables for configuration in addition to command-line arguments. It also updates the Helm chart to make use of env vars instead of command line args and adds the ability for user's to supply their own environment variables with custom configuration. This allows users to supply sensitive configuration (e.g. postgres database url) via secrets instead of exposing these via `args`. Env vars are also easier to patch when working with rendered manifests if needed. --------- Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> Signed-off-by: Ivan Porta --- go/pkg/app/app.go | 37 ++- go/pkg/app/app_test.go | 219 ++++++++++++++++++ .../templates/controller-configmap.yaml | 36 +++ .../templates/controller-deployment.yaml | 69 +----- .../tests/controller-deployment_test.yaml | 40 ++-- helm/kagent/values.yaml | 1 + 6 files changed, 317 insertions(+), 85 deletions(-) create mode 100644 helm/kagent/templates/controller-configmap.yaml diff --git a/go/pkg/app/app.go b/go/pkg/app/app.go index 1a7b7ce69..1ee1e65e1 100644 --- a/go/pkg/app/app.go +++ b/go/pkg/app/app.go @@ -20,6 +20,7 @@ import ( "context" "crypto/tls" "flag" + "fmt" "net/http" "net/http/pprof" "os" @@ -29,6 +30,7 @@ import ( "github.com/gorilla/mux" + "github.com/hashicorp/go-multierror" "github.com/kagent-dev/kagent/go/internal/version" "k8s.io/apimachinery/pkg/api/resource" @@ -147,6 +149,30 @@ func (cfg *Config) SetFlags(commandLine *flag.FlagSet) { commandLine.Var(&cfg.Streaming.MaxBufSize, "streaming-max-buf-size", "The maximum size of the streaming buffer.") commandLine.Var(&cfg.Streaming.InitialBufSize, "streaming-initial-buf-size", "The initial size of the streaming buffer.") commandLine.DurationVar(&cfg.Streaming.Timeout, "streaming-timeout", 60*time.Second, "The timeout for the streaming connection.") + + commandLine.StringVar(&agent_translator.DefaultImageConfig.Registry, "image-registry", agent_translator.DefaultImageConfig.Registry, "The registry to use for the image.") + commandLine.StringVar(&agent_translator.DefaultImageConfig.Tag, "image-tag", agent_translator.DefaultImageConfig.Tag, "The tag to use for the image.") + commandLine.StringVar(&agent_translator.DefaultImageConfig.PullPolicy, "image-pull-policy", agent_translator.DefaultImageConfig.PullPolicy, "The pull policy to use for the image.") + commandLine.StringVar(&agent_translator.DefaultImageConfig.PullSecret, "image-pull-secret", "", "The pull secret name for the agent image.") + commandLine.StringVar(&agent_translator.DefaultImageConfig.Repository, "image-repository", agent_translator.DefaultImageConfig.Repository, "The repository to use for the agent image.") +} + +// LoadFromEnv loads configuration values from environment variables. +// Flag names are converted to uppercase with underscores (e.g., metrics-bind-address -> METRICS_BIND_ADDRESS). +func LoadFromEnv(fs *flag.FlagSet) error { + var loadErr error + + fs.VisitAll(func(f *flag.Flag) { + envName := strings.ToUpper(strings.ReplaceAll(f.Name, "-", "_")) + + if envVal := os.Getenv(envName); envVal != "" { + if err := f.Value.Set(envVal); err != nil { + loadErr = multierror.Append(loadErr, fmt.Errorf("failed to set flag %s from env %s=%s: %w", f.Name, envName, envVal, err)) + } + } + }) + + return loadErr } type BootstrapConfig struct { @@ -174,16 +200,17 @@ func Start(getExtensionConfig GetExtensionConfig) { ctx := context.Background() cfg.SetFlags(flag.CommandLine) - flag.StringVar(&agent_translator.DefaultImageConfig.Registry, "image-registry", agent_translator.DefaultImageConfig.Registry, "The registry to use for the image.") - flag.StringVar(&agent_translator.DefaultImageConfig.Tag, "image-tag", agent_translator.DefaultImageConfig.Tag, "The tag to use for the image.") - flag.StringVar(&agent_translator.DefaultImageConfig.PullPolicy, "image-pull-policy", agent_translator.DefaultImageConfig.PullPolicy, "The pull policy to use for the image.") - flag.StringVar(&agent_translator.DefaultImageConfig.PullSecret, "image-pull-secret", "", "The pull secret name for the agent image.") - flag.StringVar(&agent_translator.DefaultImageConfig.Repository, "image-repository", agent_translator.DefaultImageConfig.Repository, "The repository to use for the agent image.") opts := zap.Options{} opts.BindFlags(flag.CommandLine) flag.Parse() + // Load configuration from environment variables (overrides flags) + if err := LoadFromEnv(flag.CommandLine); err != nil { + setupLog.Error(err, "failed to load configuration from environment variables") + os.Exit(1) + } + logger := zap.New(zap.UseFlagOptions(&opts)) ctrl.SetLogger(logger) diff --git a/go/pkg/app/app_test.go b/go/pkg/app/app_test.go index d18991178..a75b06a58 100644 --- a/go/pkg/app/app_test.go +++ b/go/pkg/app/app_test.go @@ -1,10 +1,13 @@ package app import ( + "flag" "strings" "testing" + "time" "github.com/stretchr/testify/assert" + "k8s.io/apimachinery/pkg/api/resource" ) func TestFilterValidNamespaces(t *testing.T) { @@ -112,3 +115,219 @@ func TestConfigureNamespaceWatching(t *testing.T) { }) } } + +func TestLoadFromEnv(t *testing.T) { + tests := []struct { + name string + envVars map[string]string + flagName string + flagDefault string + wantValue string + }{ + { + name: "string flag with hyphen", + envVars: map[string]string{ + "METRICS_BIND_ADDRESS": ":9090", + }, + flagName: "metrics-bind-address", + flagDefault: ":8080", + wantValue: ":9090", + }, + { + name: "flag without env var uses default", + envVars: map[string]string{ + "OTHER_FLAG": "value", + }, + flagName: "test-flag", + flagDefault: "default", + wantValue: "default", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set environment variables + for k, v := range tt.envVars { + t.Setenv(k, v) + } + + // Create a new flag set for testing + fs := flag.NewFlagSet("test", flag.ContinueOnError) + var testVar string + fs.StringVar(&testVar, tt.flagName, tt.flagDefault, "test flag") + + // Load from environment + if err := LoadFromEnv(fs); err != nil { + t.Fatalf("LoadFromEnv() error = %v", err) + } + + // Check the value + if testVar != tt.wantValue { + t.Errorf("flag value = %v, want %v", testVar, tt.wantValue) + } + }) + } +} + +func TestLoadFromEnvBoolFlags(t *testing.T) { + tests := []struct { + name string + envValue string + wantValue bool + wantErr bool + }{ + { + name: "true value", + envValue: "true", + wantValue: true, + wantErr: false, + }, + { + name: "false value", + envValue: "false", + wantValue: false, + wantErr: false, + }, + { + name: "1 value", + envValue: "1", + wantValue: true, + wantErr: false, + }, + { + name: "0 value", + envValue: "0", + wantValue: false, + wantErr: false, + }, + { + name: "invalid value", + envValue: "invalid", + wantValue: false, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + envName := "TEST_BOOL" + t.Setenv(envName, tt.envValue) + + fs := flag.NewFlagSet("test", flag.ContinueOnError) + var testVar bool + fs.BoolVar(&testVar, "test-bool", false, "test bool flag") + + err := LoadFromEnv(fs) + if (err != nil) != tt.wantErr { + t.Errorf("LoadFromEnv() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if !tt.wantErr && testVar != tt.wantValue { + t.Errorf("flag value = %v, want %v", testVar, tt.wantValue) + } + }) + } +} + +func TestLoadFromEnvDurationFlags(t *testing.T) { + envName := "TEST_DURATION" + t.Setenv(envName, "5m") + + fs := flag.NewFlagSet("test", flag.ContinueOnError) + var testVar time.Duration + fs.DurationVar(&testVar, "test-duration", 1*time.Second, "test duration flag") + + if err := LoadFromEnv(fs); err != nil { + t.Fatalf("LoadFromEnv() error = %v", err) + } + + wantValue := 5 * time.Minute + if testVar != wantValue { + t.Errorf("flag value = %v, want %v", testVar, wantValue) + } +} + +func TestLoadFromEnvIntegration(t *testing.T) { + envVars := map[string]string{ + "METRICS_BIND_ADDRESS": ":9090", + "HEALTH_PROBE_BIND_ADDRESS": ":8081", + "LEADER_ELECT": "true", + "METRICS_SECURE": "false", + "ENABLE_HTTP2": "true", + "DEFAULT_MODEL_CONFIG_NAME": "custom-model", + "DEFAULT_MODEL_CONFIG_NAMESPACE": "custom-ns", + "HTTP_SERVER_ADDRESS": ":9000", + "A2A_BASE_URL": "http://example.com:9000", + "DATABASE_TYPE": "postgres", + "POSTGRES_DATABASE_URL": "postgres://localhost:5432/testdb", + "WATCH_NAMESPACES": "ns1,ns2,ns3", + "STREAMING_TIMEOUT": "120s", + "STREAMING_MAX_BUF_SIZE": "2Mi", + "STREAMING_INITIAL_BUF_SIZE": "8Ki", + } + + for k, v := range envVars { + t.Setenv(k, v) + } + + fs := flag.NewFlagSet("test", flag.ContinueOnError) + cfg := Config{} + cfg.SetFlags(fs) // Sets flags and defaults + + if err := LoadFromEnv(fs); err != nil { + t.Fatalf("LoadFromEnv() error = %v", err) + } + + // Verify values - env vars should override default flags + if cfg.Metrics.Addr != ":9090" { + t.Errorf("Metrics.Addr = %v, want :9090", cfg.Metrics.Addr) + } + if cfg.ProbeAddr != ":8081" { + t.Errorf("ProbeAddr = %v, want :8081", cfg.ProbeAddr) + } + if !cfg.LeaderElection { + t.Errorf("LeaderElection = false, want true") + } + if cfg.SecureMetrics { + t.Errorf("SecureMetrics = true, want false") + } + if !cfg.EnableHTTP2 { + t.Errorf("EnableHTTP2 = false, want true") + } + if cfg.DefaultModelConfig.Name != "custom-model" { + t.Errorf("DefaultModelConfig.Name = %v, want custom-model", cfg.DefaultModelConfig.Name) + } + if cfg.DefaultModelConfig.Namespace != "custom-ns" { + t.Errorf("DefaultModelConfig.Namespace = %v, want custom-ns", cfg.DefaultModelConfig.Namespace) + } + if cfg.HttpServerAddr != ":9000" { + t.Errorf("HttpServerAddr = %v, want :9000", cfg.HttpServerAddr) + } + if cfg.A2ABaseUrl != "http://example.com:9000" { + t.Errorf("A2ABaseUrl = %v, want http://example.com:9000", cfg.A2ABaseUrl) + } + if cfg.Database.Type != "postgres" { + t.Errorf("Database.Type = %v, want postgres", cfg.Database.Type) + } + if cfg.Database.Url != "postgres://localhost:5432/testdb" { + t.Errorf("Database.Url = %v, want postgres://localhost:5432/testdb", cfg.Database.Url) + } + if cfg.WatchNamespaces != "ns1,ns2,ns3" { + t.Errorf("WatchNamespaces = %v, want ns1,ns2,ns3", cfg.WatchNamespaces) + } + if cfg.Streaming.Timeout != 120*time.Second { + t.Errorf("Streaming.Timeout = %v, want 120s", cfg.Streaming.Timeout) + } + + // Check quantity values + expectedMaxBuf := resource.MustParse("2Mi") + if cfg.Streaming.MaxBufSize.Cmp(expectedMaxBuf) != 0 { + t.Errorf("Streaming.MaxBufSize = %v, want 2Mi", cfg.Streaming.MaxBufSize) + } + + expectedInitBuf := resource.MustParse("8Ki") + if cfg.Streaming.InitialBufSize.Cmp(expectedInitBuf) != 0 { + t.Errorf("Streaming.InitialBufSize = %v, want 8Ki", cfg.Streaming.InitialBufSize) + } +} diff --git a/helm/kagent/templates/controller-configmap.yaml b/helm/kagent/templates/controller-configmap.yaml new file mode 100644 index 000000000..792b58c01 --- /dev/null +++ b/helm/kagent/templates/controller-configmap.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "kagent.fullname" . }}-controller + namespace: {{ include "kagent.namespace" . }} + labels: + {{- include "kagent.controller.labels" . | nindent 4 }} +data: + DATABASE_TYPE: {{ .Values.database.type | quote }} + DEFAULT_MODEL_CONFIG_NAME: {{ include "kagent.defaultModelConfigName" . | quote }} + IMAGE_PULL_POLICY: {{ .Values.controller.agentImage.pullPolicy | default .Values.imagePullPolicy | quote }} + {{- if and .Values.controller.agentImage.pullSecret (not (eq .Values.controller.agentImage.pullSecret "")) }} + IMAGE_PULL_SECRET: {{ .Values.controller.agentImage.pullSecret | quote }} + {{- end }} + IMAGE_REGISTRY: {{ .Values.controller.agentImage.registry | default .Values.registry | quote }} + IMAGE_REPOSITORY: {{ .Values.controller.agentImage.repository | quote }} + IMAGE_TAG: {{ coalesce .Values.controller.agentImage.tag .Values.tag .Chart.Version | quote }} + OTEL_EXPORTER_OTLP_ENDPOINT: {{ .Values.otel.tracing.exporter.otlp.endpoint | quote }} + OTEL_EXPORTER_OTLP_LOGS_ENDPOINT: {{ .Values.otel.logging.exporter.otlp.endpoint | quote }} + OTEL_EXPORTER_OTLP_LOGS_INSECURE: {{ .Values.otel.logging.exporter.otlp.insecure | quote }} + OTEL_EXPORTER_OTLP_LOGS_TIMEOUT: {{ .Values.otel.logging.exporter.otlp.timeout | quote }} + OTEL_EXPORTER_OTLP_TRACES_INSECURE: {{ .Values.otel.tracing.exporter.otlp.insecure | quote }} + OTEL_EXPORTER_OTLP_TRACES_TIMEOUT: {{ .Values.otel.tracing.exporter.otlp.timeout | quote }} + OTEL_LOGGING_ENABLED: {{ .Values.otel.logging.enabled | quote }} + OTEL_TRACING_ENABLED: {{ .Values.otel.tracing.enabled | quote }} + OTEL_TRACING_EXPORTER_OTLP_ENDPOINT: {{ .Values.otel.tracing.exporter.otlp.endpoint | quote }} + {{- if eq .Values.database.type "sqlite" }} + SQLITE_DATABASE_PATH: /sqlite-volume/{{ .Values.database.sqlite.databaseName }} + {{- else if and (eq .Values.database.type "postgres") (not (eq .Values.database.postgres.url "")) }} + POSTGRES_DATABASE_URL: {{ .Values.database.postgres.url | quote }} + {{- end }} + STREAMING_INITIAL_BUF_SIZE: {{ .Values.controller.streaming.initialBufSize | quote }} + STREAMING_MAX_BUF_SIZE: {{ .Values.controller.streaming.maxBufSize | quote }} + STREAMING_TIMEOUT: {{ .Values.controller.streaming.timeout | quote }} + WATCH_NAMESPACES: {{ include "kagent.watchNamespaces" . | quote }} + ZAP_LOG_LEVEL: {{ .Values.controller.loglevel | quote }} diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index 10ffb4bdd..cdfa8de85 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -14,6 +14,7 @@ spec: template: metadata: annotations: + checksum/configmap: {{ include (print $.Template.BasePath "/controller-configmap.yaml") . | sha256sum }} {{- with .Values.controller.podAnnotations }} {{- toYaml . | nindent 8 }} {{- end }} @@ -49,78 +50,30 @@ spec: {{- end }} containers: - name: controller - args: - # Consider using env vars (stored in a dedicated ConfigMap(s)) rather than this - {{/* #we need to pass the default model config name to the app otherwise helm upgrade will not allow provider type change due to validations */}} - - -default-model-config-name - - {{ include "kagent.defaultModelConfigName" . | quote }} - - -zap-log-level - - {{ .Values.controller.loglevel }} - - -watch-namespaces - - "{{ include "kagent.watchNamespaces" . }}" - - -streaming-max-buf-size - - {{ .Values.controller.streaming.maxBufSize | quote }} - - -streaming-initial-buf-size - - {{ .Values.controller.streaming.initialBufSize | quote }} - - -streaming-timeout - - {{ .Values.controller.streaming.timeout | quote }} - - -database-type - - {{ .Values.database.type }} - {{- if eq .Values.database.type "sqlite" }} - - -sqlite-database-path - - /sqlite-volume/{{ .Values.database.sqlite.databaseName }} - {{- else if eq .Values.database.type "postgres" }} - - -postgres-database-url - - {{ .Values.database.postgres.url }} - {{- end }} - - -image-registry - - {{ .Values.controller.agentImage.registry | default .Values.registry }} - - -image-repository - - {{ .Values.controller.agentImage.repository }} - - -image-tag - - {{ coalesce .Values.controller.agentImage.tag .Values.tag .Chart.Version }} - - -image-pull-policy - - {{ .Values.controller.agentImage.pullPolicy | default .Values.imagePullPolicy }} - {{- if and .Values.controller.agentImage.pullSecret (not (eq .Values.controller.agentImage.pullSecret "")) }} - - -image-pull-secret - - {{ .Values.controller.agentImage.pullSecret | default "" }} - {{- end }} - securityContext: - {{- toYaml .Values.controller.securityContext | nindent 12 }} image: "{{ .Values.controller.image.registry | default .Values.registry }}/{{ .Values.controller.image.repository }}:{{ coalesce .Values.tag .Values.controller.image.tag .Chart.Version }}" imagePullPolicy: {{ .Values.controller.image.pullPolicy | default .Values.imagePullPolicy }} - resources: - {{- toYaml .Values.controller.resources | nindent 12 }} env: - name: KAGENT_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - - name: OTEL_TRACING_ENABLED - value: {{ .Values.otel.tracing.enabled | quote }} - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: {{ .Values.otel.tracing.exporter.otlp.endpoint | quote }} - - name: OTEL_TRACING_EXPORTER_OTLP_ENDPOINT - value: {{ .Values.otel.tracing.exporter.otlp.endpoint | quote }} - - name: OTEL_EXPORTER_OTLP_TRACES_TIMEOUT - value: {{ .Values.otel.tracing.exporter.otlp.timeout | quote }} - - name: OTEL_EXPORTER_OTLP_TRACES_INSECURE - value: {{ .Values.otel.tracing.exporter.otlp.insecure | quote }} - - name: OTEL_LOGGING_ENABLED - value: {{ .Values.otel.logging.enabled | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT - value: {{ .Values.otel.logging.exporter.otlp.endpoint | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_TIMEOUT - value: {{ .Values.otel.logging.exporter.otlp.timeout | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_INSECURE - value: {{ .Values.otel.logging.exporter.otlp.insecure | quote }} {{- with .Values.controller.env }} {{- toYaml . | nindent 12 }} {{- end }} + envFrom: + - configMapRef: + name: {{ include "kagent.fullname" . }}-controller + {{- with .Values.controller.envFrom }} + {{- toYaml . | nindent 12 }} + {{- end }} ports: - name: http containerPort: {{ .Values.controller.service.ports.targetPort }} protocol: TCP + resources: + {{- toYaml .Values.controller.resources | nindent 12 }} + securityContext: + {{- toYaml .Values.controller.securityContext | nindent 12 }} startupProbe: httpGet: path: /health diff --git a/helm/kagent/tests/controller-deployment_test.yaml b/helm/kagent/tests/controller-deployment_test.yaml index 6918b9d07..531ee6e18 100644 --- a/helm/kagent/tests/controller-deployment_test.yaml +++ b/helm/kagent/tests/controller-deployment_test.yaml @@ -1,6 +1,7 @@ suite: test controller deployment templates: - controller-deployment.yaml + - controller-configmap.yaml tests: - it: should render controller deployment with default values template: controller-deployment.yaml @@ -89,54 +90,48 @@ tests: value: 8083 - it: should use custom loglevel when set - template: controller-deployment.yaml + template: controller-configmap.yaml set: controller: loglevel: "debug" asserts: - - contains: - path: spec.template.spec.containers[0].args - content: "debug" + - equal: + path: data.ZAP_LOG_LEVEL + value: "debug" - it: should use controller.agentImage.pullSecret when set - template: controller-deployment.yaml + template: controller-configmap.yaml set: controller: agentImage: pullSecret: "pull-secret" asserts: - - contains: - path: spec.template.spec.containers[0].args - content: "-image-pull-secret" - - contains: - path: spec.template.spec.containers[0].args - content: "pull-secret" + - equal: + path: data.IMAGE_PULL_SECRET + value: "pull-secret" - it: should not use controller.agentImage.pullSecret when not set - template: controller-deployment.yaml + template: controller-configmap.yaml set: controller: agentImage: pullSecret: "" asserts: - - notContains: - path: spec.template.spec.containers[0].args - content: "-image-pull-secret" - - notContains: - path: spec.template.spec.containers[0].args - content: "pull-secret" + - notExists: + path: data.IMAGE_PULL_SECRET - it: should configure watch namespaces - template: controller-deployment.yaml + template: controller-configmap.yaml set: controller: watchNamespaces: - namespace-1 - namespace-2 asserts: - - contains: - path: spec.template.spec.containers[0].args - content: "namespace-1,namespace-2" + - equal: + path: data.WATCH_NAMESPACES + value: "namespace-1,namespace-2" - it: should set nodeSelector + template: controller-deployment.yaml set: controller: nodeSelector: @@ -148,6 +143,7 @@ tests: role: AI - it: should set tolerations + template: controller-deployment.yaml set: controller: tolerations: diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index c6aaba486..b2215a5eb 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -104,6 +104,7 @@ controller: port: 8083 targetPort: 8083 env: [] + envFrom: [] # Additional volumes on the output Deployment definition. volumes: [] From 7a212305683aa979d8b2f20de73fa7df083b8df2 Mon Sep 17 00:00:00 2001 From: Sam Heilbron Date: Tue, 2 Dec 2025 11:44:17 -0500 Subject: [PATCH 10/17] cncf/incubation: add technical review first draft, update contribution guidelines, update README (#1142) Expand the internal documentation for users to participate in the project. --------- Signed-off-by: Sam Heilbron Signed-off-by: Sam Heilbron Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Ivan Porta --- .githooks/prepare-commit-msg | 22 + CONTRIBUTING.md | 236 ++++++ CONTRIBUTION.md | 136 ---- Makefile | 25 + README.md | 71 +- contrib/README.md | 9 +- .../security-self-assessment.md | 7 +- contrib/cncf/technical-review.md | 723 ++++++++++++++++++ 8 files changed, 1064 insertions(+), 165 deletions(-) create mode 100755 .githooks/prepare-commit-msg create mode 100644 CONTRIBUTING.md delete mode 100644 CONTRIBUTION.md rename contrib/{security => cncf}/security-self-assessment.md (97%) create mode 100644 contrib/cncf/technical-review.md diff --git a/.githooks/prepare-commit-msg b/.githooks/prepare-commit-msg new file mode 100755 index 000000000..e0126a67a --- /dev/null +++ b/.githooks/prepare-commit-msg @@ -0,0 +1,22 @@ +#!/bin/sh + +# This script automatically adds a Signed-off-by trailer to each commit message, so that your commits +# will adhere to the DCO (Developer Certificate of Origin) requirements. +# To use, run `make init-git-hooks` or copy this file to .git/hooks/prepare-commit-msg in your copy of the repo. + +NAME=$(git config user.name) +EMAIL=$(git config user.email) + +if [ -z "$NAME" ]; then + echo "empty git config user.name" + exit 1 +fi + +if [ -z "$EMAIL" ]; then + echo "empty git config user.email" + exit 1 +fi + +git interpret-trailers --if-exists doNothing --trailer \ + "Signed-off-by: $NAME <$EMAIL>" \ + --in-place "$1" \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..2fe8b69bd --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,236 @@ +# Contribution Guidelines + +- [Ways to contribute](#ways-to-contribute) + - [Report Security Vulnerabilities](#report-security-vulnerabilities) + - [File issues](#file-issues) + - [Find something to work on](#find-something-to-work-on) +- [Community Assignments](#community-assignments) + - [Assignment Process](#assignment-process) + - [Stale Assignment Policy](#stale-assignment-policy) + - [Best Practices for Assignees](#best-practices-for-assignees) +- [Contributing code](#contributing-code) + - [Small changes (bug fixes)](#small-changes-bug-fixes) + - [Large changes (features, refactors)](#large-changes-features-refactors) + - [Tips to get started](#tips-to-get-started) +- [Requirements for PRs](#requirements-for-prs) + - [DCO](#dco) + - [Testing](#testing) + - [Unit Tests](#unit-tests) + - [End-to-End (E2E) Tests](#end-to-end-e2e-tests) + - [Code review guidelines](#code-review-guidelines) +- [Documentation](#documentation) +- [Get in touch](#get-in-touch) + +## Ways to contribute + +Thanks for your interest in contributing to kagent! We have a few different ways you can get involved. To understand contributor roles, refer to the [contributor ladder guide](https://github.com/kagent-dev/community/blob/main/CONTRIBUTOR_LADDER.md). + +### Report Security Vulnerabilities + +If you would like to report a security issue, please refer to our [SECURITY.md](SECURITY.md) file. + +### File issues + +To file a bug or feature request in the [kagent GitHub repo](https://github.com/kagent-dev/kagent): + +1. Search existing issues first. +2. If no existing issue addresses your case, create a new one. +3. Use [issue templates](https://github.com/kagent-dev/kagent/tree/main/.github/ISSUE_TEMPLATE) when available +4. Add information or react to existing issues, such as a thumbs-up 👍 to indicate agreement. + +### Find something to work on + +The project uses [GitHub issues](https://github.com/kagent-dev/kagent/issues) to track bugs and features. Issues labeled with the [`good first issue`](https://github.com/kagent-dev/kagent/issues?q=state%3Aopen%20label%3A%22good%20first%20issue%22) label are a great place to start. + +Additionally, the project has a [project board](https://github.com/orgs/kagent-dev/projects/3) tracking the roadmap. Any issues in the project board are a great source of things to work on. If an issue has not been assigned, you can ask to work on it by leaving a comment on the issue. + +Flaky tests are a common source of issues and a good place to start contributing to the project. You can find these issues by filtering with the `Type: CI Test Flake` label. If you see a test that is failing regularly, you can leave a comment asking if someone is working on it. + +## Community Assignments + +We welcome community contributions and encourage members to work on issues. To maintain an active and healthy development environment, we have the following policies: + +### Assignment Process + +- **Organization members**: Can self-assign issues using the GitHub assignee dropdown +- **External contributors**: Should comment on the issue expressing interest in working on it. A maintainer will then assign the issue to you. + +### Stale Assignment Policy + +- **Timeframe**: If an assignee hasn't made any visible progress (comments, commits, or draft PRs) within **30 days** of assignment, the issue assignment may be considered stale +- **Communication**: We'll reach out to check on progress and offer assistance before unassigning +- **Unassignment**: After **5 additional days** without response or progress, issues will be unassigned and made available for other contributors +- **Re-assignment**: Previous assignees are welcome to request re-assignment if they become available to work on the issue again + +### Best Practices for Assignees + +- Comment on the issue with your approach or ask questions if you need clarification +- Provide regular updates (even brief ones) if work is taking longer than expected +- Create draft PRs early to show progress and get feedback +- Don't hesitate to ask for help in the issue comments or community channels like Discord or CNCF Slack +- Join the community meetings to share progress or engage with other members for discussions + +## Contributing code + +Contributing features to kagent is a great way to get involved with the project. We welcome contributions of all sizes, from small bug fixes to large new features. Kagent uses a "fork and pull request" approach. This means that as a contributor, you create your own personal fork of a code repository in GitHub and push your contributions to a branch in your own fork first. When you are ready to contribute, open a pull request (PR) against the project's repository. For more details, see the [GitHub docs about working with forks](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks). + +### Small changes (bug fixes) + +For small changes (less than 100 lines of code): + +1. Open a pull request. +2. Ensure tests verify the fix. +3. If needed, [update the documentation](#documentation). + +### Large changes (features, refactors) + +Large features often touch many files, extend many lines of code, and often cover issues such as: + +* Large bug fixes +* New features +* Refactors of the existing codebase + +For large changes: + +1. **Open an issue first**: Open an issue about your bug or feature in the [kagent](https://github.com/kagent-dev/kagent) repo. +2. **Message us on Slack or Discord**: Reach out to us to discuss your proposed changes in our [CNCF Slack channel, `#kagent-dev`](https://cloud-native.slack.com/archives/C08ETST0076) or [Discord server](https://discord.gg/Fu3k65f2k3). +3. **Agree on implementation plan**: Write a plan for how this feature or bug fix should be implemented. Should this be one pull request or multiple incremental improvements? Who is going to do each part? Discuss it with us on Slack/Discord or join our [community meeting](https://calendar.google.com/calendar/u/0?cid=Y183OTI0OTdhNGU1N2NiNzVhNzE0Mjg0NWFkMzVkNTVmMTkxYTAwOWVhN2ZiN2E3ZTc5NDA5Yjk5NGJhOTRhMmVhQGdyb3VwLmNhbGVuZGFyLmdvb2dsZS5jb20). +4. **Submit a draft PR**: It's important to get feedback as early as possible to ensure that any big improvements end up being merged. Open a draft pull request from your fork, label it `work in progress`, and start getting feedback. +5. **Review**: At least one maintainer should sign off on the change before it's merged. Look at the following [Code review](#code-review-guidelines) section to learn about what we're looking for. +6. **Close out**: A maintainer will merge the PR and let you know about the next release plan. + +For large or broad changes, we may ask you to write an enhancement proposal. Use [this template](https://github.com/kagent-dev/kagent/blob/main/design/template.md) to get you started. You can find the existing enhancement proposals [here](https://github.com/kagent-dev/kagent/tree/main/design). + +### Tips to get started + +To help you get started with contributing code: + +- **Development Setup**: See the [DEVELOPMENT.md](DEVELOPMENT.md) file for detailed instructions on setting up your development environment. +- **Code of Conduct**: Please read and follow our [Code of Conduct](CODE_OF_CONDUCT.md). +- **Past PRs**: We recommend looking at past PRs that are doing similar things to what you are trying to do. +- **Agent Examples**: Check out the [sample agents](https://github.com/kagent-dev/kagent/tree/main/python/samples) for examples of how to build agents. +- **Architecture**: Review the [architecture documentation](https://github.com/kagent-dev/kagent#architecture) to understand how kagent works. + +## Requirements for PRs + +Contributing to open source can be a daunting task, especially if you are a new contributor and are not yet familiar with the workflows commonly used by open source projects. + +After you open a PR, the project maintainers will review your changes. Reviews typically include iterations of suggestions and changes. This is totally normal, so don't be discouraged if asked to make changes to your contribution. + +It's difficult to cover all the possible scenarios that you might encounter when contributing to open source software in a single document. However, this contributing guide outlines several requirements that even some well-versed contributors may not be familiar with. If you have questions, concerns or just need help getting started please don't hesitate to reach out through one of the channels covered in the [Get in touch section](#get-in-touch). + +### DCO + +DCO, short for Developer Certificate of Origin, is a per-commit signoff that you, the contributor, agree to the terms published at [https://developercertificate.org](https://developercertificate.org) for that particular commit. This will appear as a `Signed-off-by: Your Name ` trailer at the end of each commit message. The kagent project requires that every commit contains this DCO signoff. + +The easiest way to make sure each of your commits contains the signoff is to run make `init-git-hooks` in the repo to which you are contributing. This will configure your repo to use a Git hook which will automatically add the required trailer to all of your commit messages. + +```shell +make init-git-hooks +``` + +If you prefer not to use a Git hook, you must remember to use the `--signoff` option (or `-s` for short) on each of your commits when you check in code: + +```shell +git commit -s -m "description of my excellent contribution" +``` + +If you forget to sign off on a commit, your PR will be flagged and blocked from merging. You can sign off on previous commits by using the rebase command. The following example uses the `main` branch, which means this command rewrites the `git` history of your current branch while adding signoffs to commits visible from `main` (not inclusive). Please be aware that rewriting commit history does carry some risk, and if the commits you are rewriting are already pushed to a remote, you will need to force push the rewritten history. + +```shell +git rebase --signoff main +``` + +### Testing + +Tests are essential for any non-trivial PR. They ensure that your feature remains operational and does not break due to future updates. Tests are a critical part of maintaining kagent's stability and long-term maintainability. + +A useful way to explore the different tests that the project maintains, is to inspect the [GitHub action that runs the CI pipeline](.github/workflows/ci.yaml) + +We have the following types of tests: + +#### Unit Tests + +These are useful for testing small, isolated units of code, such as a single function or a small component. + +**Go Unit Tests**: + +```bash +cd go +go test -race -skip 'TestE2E.*' -v ./... +``` + +**Helm Unit Tests**: + +```bash +helm plugin install https://github.com/helm-unittest/helm-unittest +make helm-version +helm unittest helm/kagent +``` + +**Python Unit Tests**: + + ```bash +cd python +uv run pytest ./packages/**/tests/ + ``` + +**UI Unit Tests**: + + ```bash +cd ui +npm run test +``` + +#### End-to-End (E2E) Tests + +These tests are done in a `kind` cluster with real agents, using real or mock LLM providers. +See: [go/test/e2e](https://github.com/kagent-dev/kagent/tree/main/go/test/e2e) + +Features that introduce behavior changes should be covered by E2E tests (exceptions can be made for minor changes). Testing with real Kubernetes resources and agent invocations is crucial because it: + +- Prevents regressions. +- Detects behavior changes from dependencies. +- Ensures the feature is not deprecated. +- Confirms the feature works as the user expects it to. + +### Code review guidelines + +Code can be reviewed by anyone! Even if you are not a maintainer, please feel free to add your comments. +All code must be reviewed by at least one [maintainer](https://github.com/kagent-dev/community/blob/main/MAINTAINERS.md) before merging. Key requirements: + +1. **Code Style** + + **Go Code**: + - Follow [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments). + - Follow [Effective Go](https://golang.org/doc/effective_go). + - Run `make lint` to check for common issues before submitting. + + **Python Code**: + - Follow PEP 8 style guidelines. + - Run `make lint` to check for common issues before submitting. + + **UI Code**: + - Follow the project's ESLint configuration. + - Run `npm run lint` before submitting. + +2. **Testing** + + - Add unit tests for new functionality. + - Ensure existing tests pass. + - Include e2e tests when needed. + +3. **Documentation** + + - Update relevant documentation. + - Include code comments for non-obvious logic. + - Update API documentation if changing interfaces. + - Add examples for new features. + +## Documentation + +The kagent documentation lives at [kagent.dev/docs](https://kagent.dev/docs/kagent). The code lives at [kagent website](https://github.com/kagent-dev/website). + +## Get in touch + +Please refer to the [Project README](README.md#get-involved) for methods to get in touch diff --git a/CONTRIBUTION.md b/CONTRIBUTION.md deleted file mode 100644 index 0203c0980..000000000 --- a/CONTRIBUTION.md +++ /dev/null @@ -1,136 +0,0 @@ -# Contribution Guidelines - -## Development - -### Code of Conduct - -We are committed to providing a friendly, safe, and welcoming environment for all contributors. Please read and follow our [Code of Conduct](CODE_OF_CONDUCT.md). - -### Getting Started - -1. **Fork the repository** on GitHub. -2. **Clone your fork** locally: - ```bash - git clone https://github.com/YOUR-USERNAME/kagent.git - cd kagent - ``` -3. **Add the upstream repository** as a remote: - ```bash - git remote add upstream https://github.com/kagent-dev/kagent.git - ``` -4. **Create a new branch** for your changes: - ```bash - git checkout -b feature/your-feature-name - ``` - -### Development Environment Setup - -See the [DEVELOPMENT.md](DEVELOPMENT.md) file for more information. - -### Making Changes - -If you are making significant improvements to the kagent project, please create a design document using the [design template](design/template.md) and submit it as a pull request on GitHub. Additionally, post a notification in the #core-contrib channel on the kagent Discord for offline review, and be prepared to present the design document at an upcoming kagent community meeting. Thank you! - -#### Coding Standards - -- **Go Code**: - - Follow the [Go Code Review Comments](https://go.dev/wiki/CodeReviewComments) - - Run `make lint` before submitting your changes - - Ensure all tests pass with `make test` - - Add tests for new functionality - -- **UI Code**: - - Follow the project's ESLint configuration - - Run `npm run lint` before submitting changes - - Ensure all tests pass with `npm test` - - Add tests for new functionality - -- **Python Code**: - - check formatting with `uv run ruff check` - - check linting with `uv run ruff format` - - Use type hints where appropriate - - Run tests with `uv run pytest` - -#### Commit Guidelines - -We follow the [Conventional Commits](https://www.conventionalcommits.org/) specification: - -- **feat**: A new feature -- **fix**: A bug fix -- **docs**: Documentation only changes -- **style**: Changes that do not affect the meaning of the code -- **refactor**: A code change that neither fixes a bug nor adds a feature -- **perf**: A code change that improves performance -- **test**: Adding missing tests or correcting existing tests -- **chore**: Changes to the build process or auxiliary tools - -Example commit message: -``` -feat(controller): add support for custom resource validation - -This adds validation for the KagentApp custom resource to ensure -that the configuration is valid before applying it to the cluster. - -Closes #123 -``` - -### Pull Request Process - -1. **Update your fork** with the latest changes from upstream: - ```bash - git fetch upstream - git rebase upstream/main - ``` - -2. **Push your changes** to your fork: - ```bash - git push origin feature/your-feature-name - ``` - -3. **Create a Pull Request** from your fork to the main repository. - -4. **Fill out the PR template** with all required information. - -5. **Address review comments** if requested by maintainers. - -6. **Update your PR** if needed: - ```bash - git add . - git commit -m "address review comments" - git push origin feature/your-feature-name - ``` - -7. Once approved, a maintainer will merge your PR. - - -### Documentation - -- Update documentation for any changes to APIs, CLIs, or user-facing features -- Add examples for new features -- Update the README if necessary -- Add comments to your code explaining complex logic - -### Releasing - -Only project maintainers can create releases. The process is: - -1. Update version numbers in relevant files -2. Create a release branch -3. Create a tag for the release -4. Build and publish artifacts -5. Create a GitHub release with release notes - -### Community - -- Join our [Discord server](https://discord.gg/Fu3k65f2k3) for discussions -- Participate in community calls (scheduled on our website) -- Help answer questions in GitHub issues -- Review pull requests from other contributors - -## License - -By contributing to this project, you agree that your contributions will be licensed under the project's license. - -## Questions? - -If you have any questions about contributing, please open an issue or reach out to the maintainers. diff --git a/Makefile b/Makefile index 6142abca7..2240ad402 100644 --- a/Makefile +++ b/Makefile @@ -74,6 +74,31 @@ TOOLS_IMAGE_BUILD_ARGS += --build-arg TOOLS_BUN_VERSION=$(TOOLS_BUN_VERSION) TOOLS_IMAGE_BUILD_ARGS += --build-arg TOOLS_PYTHON_VERSION=$(TOOLS_PYTHON_VERSION) TOOLS_IMAGE_BUILD_ARGS += --build-arg TOOLS_NODE_VERSION=$(TOOLS_NODE_VERSION) + +##@ General + +# The help target prints out all targets with their descriptions organized +# beneath their categories. The categories are represented by '##@' and the +# target descriptions by '##'. The awk command is responsible for reading the +# entire set of makefiles included in this invocation, looking for lines of the +# file as xyz: ## something, and then pretty-format the target and help. Then, +# if there's a line with ##@ something, that gets pretty-printed as a category. +# More info on the usage of ANSI control characters for terminal formatting: +# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters +# More info on the awk command: +# http://linuxcommand.org/lc3_adv_awk.php + +.PHONY: help +help: ## Display this help. + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +##@ Git + +.PHONY: init-git-hooks +init-git-hooks: ## Use the tracked version of Git hooks from this repo + git config core.hooksPath .githooks + echo "Git hooks initialized" + # KMCP KMCP_ENABLED ?= true KMCP_VERSION ?= $(shell $(AWK) '/github\.com\/kagent-dev\/kmcp/ { print substr($$2, 2) }' go/go.mod) # KMCP version defaults to what's referenced in go.mod diff --git a/README.md b/README.md index 8dfa6a54c..ae50df598 100644 --- a/README.md +++ b/README.md @@ -38,24 +38,42 @@ --- -## Get started + + + + + + + + +
+ Getting Started + + Technical Details + + Get Involved + + Reference +
+ -- [Quick Start](https://kagent.dev/docs/kagent/getting-started/quickstart) -- [Installation guide](https://kagent.dev/docs/kagent/introduction/installation) +--- +## Getting started -## Documentation +- [Quick Start](https://kagent.dev/docs/kagent/getting-started/quickstart) +- [Installation guide](https://kagent.dev/docs/kagent/introduction/installation) -The kagent documentation is available at [kagent.dev/docs](https://kagent.dev/docs/kagent). +## Technical Details -## Core Concepts +### Core Concepts - **Agents**: Agents are the main building block of kagent. They are a system prompt, a set of tools and agents, and an LLM configuration represented with a Kubernetes custom resource called "Agent". - **LLM Providers**: Kagent supports multiple LLM providers, including [OpenAI](https://kagent.dev/docs/kagent/supported-providers/openai), [Azure OpenAI](https://kagent.dev/docs/kagent/supported-providers/azure-openai), [Anthropic](https://kagent.dev/docs/kagent/supported-providers/anthropic), [Google Vertex AI](https://kagent.dev/docs/kagent/supported-providers/google-vertexai), [Ollama](https://kagent.dev/docs/kagent/supported-providers/ollama) and any other [custom providers and models](https://kagent.dev/docs/kagent/supported-providers/custom-models) accessible via AI gateways. Providers are represented by the ModelConfig resource. - **MCP Tools**: Agents can connect to any MCP server that provides tools. Kagent comes with an MCP server with tools for Kubernetes, Istio, Helm, Argo, Prometheus, Grafana, Cilium, and others. All tools are Kubernetes custom resources (ToolServers) and can be used by multiple agents. - **Observability**: Kagent supports [OpenTelemetry tracing](https://kagent.dev/docs/kagent/getting-started/tracing), which allows you to monitor what's happening with your agents and tools. -## Core Principles +### Core Principles - **Kubernetes Native**: Kagent is designed to be easy to understand and use, and to provide a flexible and powerful way to build and manage AI agents. - **Extensible**: Kagent is designed to be extensible, so you can add your own agents and tools. @@ -64,7 +82,7 @@ The kagent documentation is available at [kagent.dev/docs](https://kagent.dev/do - **Declarative**: Kagent is designed to be declarative, so you can define the agents and tools in a YAML file. - **Testable**: Kagent is designed to be tested and debugged easily. This is especially important for AI agent applications. -## Architecture +### Architecture The kagent framework is designed to be easy to understand and use, and to provide a flexible and powerful way to build and manage AI agents. @@ -79,19 +97,31 @@ Kagent has 4 core components: - **Engine**: The engine runs your agents using [ADK](https://google.github.io/adk-docs/). - **CLI**: The CLI is a command-line tool that allows you to manage the agents and tools. -## Roadmap +## Get Involved -`kagent` is currently in active development. You can check out the full roadmap in the project Kanban board [here](https://github.com/orgs/kagent-dev/projects/3). +_We welcome contributions! Contributors are expected to [respect the kagent Code of Conduct](https://github.com/kagent-dev/community/blob/main/CODE-OF-CONDUCT.md)_ -## Local development +There are many ways to get involved: -For instructions on how to run everything locally, see the [DEVELOPMENT.md](DEVELOPMENT.md) file. +- 🐛 [Report bugs and issues](https://github.com/kagent-dev/kagent/issues/) +- 💡 [Suggest new features](https://github.com/kagent-dev/kagent/issues/) +- 📖 [Improve documentation](https://github.com/kagent-dev/website/) +- 🔧 [Submit pull requests](/CONTRIBUTION.md) +- ⭐ Star the repository +- 💬 [Help others in Discord](https://discord.gg/Fu3k65f2k3) +- 💬 [Join the kagent community meetings](https://calendar.google.com/calendar/u/0?cid=Y183OTI0OTdhNGU1N2NiNzVhNzE0Mjg0NWFkMzVkNTVmMTkxYTAwOWVhN2ZiN2E3ZTc5NDA5Yjk5NGJhOTRhMmVhQGdyb3VwLmNhbGVuZGFyLmdvb2dsZS5jb20) +- 🤝 [Share tips in the CNCF #kagent slack channel](https://cloud-native.slack.com/archives/C08ETST0076) +- 🔒 [Report security concerns](SECURITY.md) + +### Roadmap -## Contributing +`kagent` is currently in active development. You can check out the full roadmap in the project Kanban board [here](https://github.com/orgs/kagent-dev/projects/3). + +### Local development -For instructions on how to contribute to the kagent project, see the [CONTRIBUTION.md](CONTRIBUTION.md) file. +For instructions on how to run everything locally, see the [DEVELOPMENT.md](DEVELOPMENT.md) file. -## Contributors +### Contributors Thanks to all contributors who are helping to make kagent better. @@ -99,7 +129,7 @@ Thanks to all contributors who are helping to make kagent better. -## Star History +### Star History @@ -109,6 +139,12 @@ Thanks to all contributors who are helping to make kagent better. +## Reference + +### License + +This project is licensed under the [Apache 2.0 License.](/LICENSE) + ---
@@ -118,5 +154,4 @@ Thanks to all contributors who are helping to make kagent better. Cloud Native Computing Foundation logo

kagent is a Cloud Native Computing Foundation project.

-
- + \ No newline at end of file diff --git a/contrib/README.md b/contrib/README.md index 1511c391e..711594870 100644 --- a/contrib/README.md +++ b/contrib/README.md @@ -4,17 +4,15 @@ This directory contains community and extension contributions to the kagent proj ## Structure -- `agents/` — Community and third-party agent integrations. - - `github/` — Agent GitHub. -- `integration/` — Integrations with external systems. - - `kgateway/` — Integration with kgateway. - `memory/` — Memory backends and related extensions. - `supabase/` — Supabase-based memory backend. - `tools/` — Additional tools, scripts, or utilities contributed by the community. +- `cncf/` - Resources specifically requested by the CNCF ## Contributing To add your own contribution: + 1. Fork the repository and create a new branch. 2. Add your code or integration in the appropriate subdirectory. 3. Include a README.md in your subdirectory describing your contribution. @@ -24,5 +22,4 @@ For more details, see the main `CONTRIBUTION.md` in the project root. --- -If you have questions or need help, please open an issue or contact the maintainers. - +If you have questions or need help, please open an issue or contact the maintainers. \ No newline at end of file diff --git a/contrib/security/security-self-assessment.md b/contrib/cncf/security-self-assessment.md similarity index 97% rename from contrib/security/security-self-assessment.md rename to contrib/cncf/security-self-assessment.md index 46ecc4c80..c5dad54aa 100644 --- a/contrib/security/security-self-assessment.md +++ b/contrib/cncf/security-self-assessment.md @@ -198,7 +198,6 @@ Optional tooling: - **Argo**: Integration with Argo Rollouts - **Cilium**: Integration through specialized agents for eBPF-based networking - ## Security Issue Resolution ### Responsible Disclosure Process @@ -227,9 +226,7 @@ As of the time of this assessment, no critical security vulnerabilities have bee ### Open SSF Best Practices -kagent is working toward OpenSSF Best Practices certification. That work is tracked by [https://github.com/kagent-dev/community/issues/9](https://github.com/kagent-dev/community/issues/9). - -The badge is visible in the [project README](/README.md) +kagent has successfully achieved OpenSSF Best Practices certification. The badge is visible in the [project README](/README.md) or at [https://www.bestpractices.dev/projects/10723/badge](https://www.bestpractices.dev/projects/10723/badge) ### Case Studies @@ -245,4 +242,4 @@ Krateo, focused on cloud-native platform solutions, uses kagent to automate and ### Related Projects / Vendors - **Kubernetes Operators**: While Kubernetes operators provide automation, kagent adds AI-powered decision making and natural language interfaces. -- **AI/ML Platforms**: kagent focuses specifically on operational AI agents rather than model training or general ML workloads. \ No newline at end of file +- **AI/ML Platforms**: kagent focuses specifically on operational AI agents rather than model training or general ML workloads. diff --git a/contrib/cncf/technical-review.md b/contrib/cncf/technical-review.md new file mode 100644 index 000000000..763ff2bd8 --- /dev/null +++ b/contrib/cncf/technical-review.md @@ -0,0 +1,723 @@ +# General Technical Review - kagent / Incubation + +_This document provides a General Technical Review of the kagent project. This is a living document that demonstrates to the Technical Advisory Group (TAG) that the project satisfies the Engineering Principle requirements for moving levels. This document follows the template outlined [in the TOC subproject review](https://github.com/cncf/toc/blob/main/toc_subprojects/project-reviews-subproject/general-technical-questions.md)_ + +- **Project:** kagent +- **Project Version:** v0.7.5 +- **Website:** [https://kagent.dev](https://kagent.dev) +- **Date Updated:** 2025-12-01 +- **Template Version:** v1.0 +- **Description:** kagent is a Kubernetes native framework for building AI agents. Kubernetes is the most popular orchestration platform for running workloads, and **kagent** makes it easy to build, deploy and manage AI agents in Kubernetes. The **kagent** framework is designed to be easy to understand and use, and to provide a flexible and powerful way to build and manage AI agents. + +## Day 0 - Planning Phase + +### Scope + +**Roadmap Process:** +Kagent's roadmap is managed through a public GitHub project board at [https://github.com/orgs/kagent-dev/projects/3](https://github.com/orgs/kagent-dev/projects/3). The roadmap process includes: + +- Features are proposed through GitHub issues and design documents (see [design template](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/design/template.md)) +- Significant features require design documents following the enhancement proposal process (e.g., [EP-685-kmcp](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/design/EP-685-kmcp.md)) +- Community input is gathered through Discord (https://discord.gg/Fu3k65f2k3), Slack (#kagent-dev on CNCF Slack), and community meetings +- The maintainer ladder is defined in [CONTRIBUTION.md](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/CONTRIBUTION.md), with clear paths from contributor to maintainer based on sustained contributions + +**Target Personas:** + +1. **Platform Engineers**: Building and maintaining internal developer platforms with AI-powered automation +2. **DevOps/SRE Teams**: Automating operational tasks, troubleshooting, and incident response in Kubernetes environments +3. **Kubernetes Administrators**: Managing complex multi-cluster environments with intelligent agents +4. **Application Developers**: Building AI-powered applications that need to interact with Kubernetes infrastructure + +**Primary Use Case:** +The primary use case is enabling AI-powered automation and intelligent operations for Kubernetes clusters. This includes: + +- **Kubernetes-Native AI Agents**: Provide a framework for building AI agents that operate naturally within Kubernetes environments with full integration of Kubernetes security models. +- **Secure Multi-Tenancy**: Enable multiple users and teams to deploy and manage their own agents with proper isolation and access controls. This is not yet implemented, but is on the project roadmap. +- **Extensible Tool Ecosystem**: Offer a secure and extensible system for agents to access various tools and services while maintaining proper authorization boundaries. +- **Declarative Configuration**: Enable infrastructure-as-code practices for agent deployment and management with version control and review processes. + +**Additional Supported Use Cases:** + +- Multi-agent coordination for complex operational workflows (via A2A protocol) +- Integration with service mesh (Istio), observability (Prometheus/Grafana), and deployment tools (Helm, Argo Rollouts) +- Custom agent development using multiple frameworks (ADK, CrewAI, LangGraph) + +**Unsupported Use Cases:** + +- **Direct Cluster Administration**: kagent does not replace Kubernetes RBAC or cluster security policies; it operates within existing security boundaries. +- **LLM Model Hosting**: kagent does not host or provide LLM models; it integrates with external model providers. + +**Target Organizations:** + +- **Telecommunications**: Companies like Amdocs managing complex infrastructure requiring intelligent monitoring and malicious user detection +- **Financial Services**: Organizations requiring secure, auditable AI-powered operations with strict compliance requirements +- **Identity Verification**: Companies like Au10tix needing reliable, secure automation for critical verification platforms +- **Platform Engineering Teams**: Organizations like Krateo providing cloud-native platform solutions to internal teams and customers +- **Any organization** running production Kubernetes workloads seeking to reduce operational overhead through intelligent automation + +**End User Research:** +Current case studies are documented in the [security self-assessment](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/contrib/cncf/security-self-assessment.md#case-studies), including deployments at Amdocs, Au10tix, and Krateo. Formal user research reports are planned as the project matures toward v1.0. + +### Usability + +**Interaction Methods:** +Target personas interact with kagent through multiple interfaces: + +1. **Web UI**: A modern Next.js-based dashboard providing: + - Visual agent management and configuration + - Real-time conversation monitoring and session history + - Tool and model configuration interfaces + - Observability dashboards with metrics and traces + +2. **CLI (`kagent`)**: Command-line tool for: + - Agent deployment and management (`kagent agent deploy`) + - MCP server configuration (`kagent mcp`) + - Local development workflows + - CI/CD integration + - Installation: `curl -fsSL https://kagent.dev/install.sh | sh` + +3. **Kubernetes API**: Direct interaction via `kubectl` and Kubernetes manifests: + ```yaml + apiVersion: kagent.dev/v1alpha2 + kind: Agent + metadata: + name: my-agent + spec: + type: Declarative + declarative: + systemMessage: "You are a helpful Kubernetes assistant" + tools: [...] + ``` + +4. **HTTP REST API**: Programmatic access at `http://kagent-controller:8083/api` for: + - Agent invocation and management + - Session and task tracking + - Model configuration + - Integration with external systems + +5. **A2A Protocol**: Agent-to-agent communication following the [Google A2A specification](https://github.com/google/A2A) for multi-agent workflows + +**User Experience:** + +- **Declarative Configuration**: Infrastructure-as-code approach using Kubernetes CRDs +- **Quick Start**: Get running in minutes with Helm: `helm install kagent oci://ghcr.io/kagent-dev/kagent/helm/kagent` +- **Progressive Disclosure**: Start simple with default configurations, customize as needed +- **Observability First**: Built-in OpenTelemetry tracing and metrics from day one +- **Documentation**: Comprehensive docs at [https://kagent.dev/docs/kagent](https://kagent.dev/docs/kagent) with tutorials, API references, and examples + +**Integration with Other Projects:** +Kagent integrates seamlessly with cloud-native ecosystem projects: + +- **Kubernetes**: Native CRDs, RBAC integration, standard deployment patterns +- **Helm**: Official Helm charts for installation and upgrades +- **OpenTelemetry**: Distributed tracing for agent operations and tool invocations +- **Prometheus**: Metrics exposure for monitoring agent health and performance +- **Grafana**: Pre-built dashboards and MCP tools for visualization +- **Istio**: Service mesh integration for traffic management and security +- **Argo Rollouts**: Progressive delivery integration for agent deployments +- **Cilium**: eBPF-based networking and security policy management +- **LLM Providers**: OpenAI, Anthropic, Azure OpenAI, Google Vertex AI, Ollama, and custom models via AI gateways +- **MCP Ecosystem**: Extensible tool system compatible with Model Context Protocol servers + +### Design + +**Design Principles:** +Kagent follows these core design principles (documented in [README.md](https://github.com/kagent-dev/kagent#core-principles)): + +- **Kubernetes Native**: Kagent is designed to be easy to understand and use, and to provide a flexible and powerful way to build and manage AI agents. +- **Extensible**: Kagent is designed to be extensible, so you can add your own agents and tools. +- **Flexible**: Kagent is designed to be flexible, to suit any AI agent use case. +- **Observable**: Kagent is designed to be observable, so you can monitor the agents and tools using all common monitoring frameworks. +- **Declarative**: Kagent is designed to be declarative, so you can define the agents and tools in a YAML file. +- **Testable**: Kagent is designed to be tested and debugged easily. This is especially important for AI agent applications. + +**Architecture:** + +Core components: + +- **Controller**: The controller is a Kubernetes controller that watches the kagent custom resources and creates the necessary resources to run the agents. +- **UI**: The UI is a web UI that allows you to manage the agents and tools. +- **Engine**: The engine runs your agents using [ADK](https://google.github.io/adk-docs/). +- **CLI**: The CLI is a command-line tool that allows you to manage the agents and tools. + +
+ kagent +
+ +**Environment Differences:** + +- **Development**: + - Kind cluster with local registry + - SQLite database + - Single replica deployments + - Debug logging enabled + - See [DEVELOPMENT.md](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/DEVELOPMENT.md) + +- **Test/CI**: + - Automated Kind cluster creation + - Mock LLM servers for deterministic testing + - Ephemeral resources cleaned up after tests + - See [.github/workflows/ci.yaml](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/.github/workflows/ci.yaml) + +- **Production**: + - PostgreSQL database recommended for persistence + - Multi-replica controller deployments for HA + - Resource limits enforced + - TLS for external LLM connections + - Network policies for pod-to-pod communication + - See [helm/kagent/values.yaml](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/helm/kagent/values.yaml) + +**Service Dependencies:** +Required in-cluster: + +- **Kubernetes API Server**: Core dependency for all operations +- **etcd**: Via Kubernetes for state storage +- **DNS**: Kubernetes CoreDNS for service discovery + +Optional in-cluster: + +- **PostgreSQL**: For production database (SQLite default for development) +- **Qdrant**: For vector memory storage (optional feature) +- **KMCP**: For building and managing MCP servers (enabled by default) +- **Prometheus**: For metrics collection (optional) +- **Jaeger/OTLP Collector**: For distributed tracing (optional) + +External dependencies: + +- **LLM Providers**: OpenAI, Anthropic, Azure OpenAI, Google Vertex AI, or Ollama (user-configured) + +**Identity and Access Management:** +Kagent implements a multi-layered IAM approach: + +1. **Kubernetes RBAC**: + - Controller uses ServiceAccount with ClusterRole for CRD management + - Agents receive individual ServiceAccounts with configurable RBAC permissions + - Example roles in [go/config/rbac/role.yaml](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/go/config/rbac/role.yaml) + - Per-agent RBAC templates in [helm/agents/*/templates/rbac.yaml](https://github.com/kagent-dev/kagent/tree/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/helm/agents) + +2. **API Authentication** (planned enhancement - [Issue #476](https://github.com/kagent-dev/kagent/issues/476)): + - Current: UnsecureAuthenticator for development, A2AAuthenticator for agent-to-agent + - Planned: Extensible authentication system with support for API keys, OAuth, and service accounts + - Framework in [go/pkg/auth/auth.go](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/go/pkg/auth/auth.go) + +3. **Secret Management**: + - LLM API keys stored in Kubernetes Secrets + - Secrets mounted as environment variables or files + - No cross-namespace secret access (potential future enhancement via ReferenceGrant) + - Secrets managed via [go/internal/utils/secret.go](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/go/internal/utils/secret.go) + +4. **Session Isolation** (roadmap - [Issue #476](https://github.com/kagent-dev/kagent/issues/476)): + - Database-backed session management + - Per-user and per-agent session tracking + - Planned: Full multi-tenancy with namespace-based isolation + +**Sovereignty:** +Kagent addresses data sovereignty through: + +- **On-Premises Deployment**: Full support for air-gapped and on-premises Kubernetes clusters +- **LLM Provider Choice**: Support for self-hosted models via Ollama or custom endpoints +- **Data Residency**: All operational data stored in user-controlled databases (SQLite/PostgreSQL) +- **No Phone-Home**: No telemetry or data sent to kagent maintainers +- **Regional LLM Endpoints**: Support for region-specific LLM endpoints (e.g., Azure OpenAI regional deployments) + +**Compliance:** + +- **Apache 2.0 License**: Clear open-source [licensing](LICENSE.md) +- **OpenSSF Best Practices**: Badge at [https://www.bestpractices.dev/projects/10723](https://www.bestpractices.dev/projects/10723) +- **Dependency Scanning**: Automated [CVE scanning via Trivy in CI/CD](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/.github/workflows/image-scan.yaml) +- **SBOM Generation**: Part of the [future state of the project](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/contrib/cncf/security-self-assessment.md#future-state) +- **Audit Logging**: Comprehensive logging of all agent operations and API calls +- **Security Self-Assessment**: Available at [contrib/cncf/security-self-assessment.md](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/contrib/cncf/security-self-assessment.md) + +**High Availability:** + +- **Controller**: Supports multi-replica deployments with leader election (via controller-runtime) +- **Agents**: Configurable replica counts per agent (default: 1, can scale horizontally) +- **Database**: Supports PostgreSQL with HA configurations (replication, failover) +- **Stateless Design**: Controllers and agents are stateless, state in Kubernetes API and database +- **Rolling Updates**: Zero-downtime upgrades via Kubernetes rolling deployment strategy +- **Health Checks**: Liveness and readiness probes on all components + +**Resource Requirements:** + +Default per-component requirements (from [helm/kagent/values.yaml](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/helm/kagent/values.yaml)): + +Controller: + +- CPU: 100m request, 2000m limit +- Memory: 128Mi request, 512Mi limit +- Network: ClusterIP service on port 8083 + +UI: + +- CPU: 100m request, 1000m limit +- Memory: 256Mi request, 1Gi limit +- Network: ClusterIP/LoadBalancer on port 8080 + +Per Agent (default): + +- CPU: 100m request, 1000m limit +- Memory: 256Mi request, 1Gi limit (384Mi-1Gi depending on agent type) +- Network: ClusterIP service on port 8080 + +**Storage Requirements:** + +Ephemeral Storage: + +- Container images: ~500MB per component (controller, UI, agent base images) +- Temporary files: Minimal, used for skill loading and code execution sandboxes +- Logs: Configurable retention + +Persistent Storage: + +- **Database** (optional, SQLite vs PostgreSQL): + - SQLite: 10MB-1GB depending on session history (ephemeral, lost on pod restart) + - PostgreSQL: 1GB-100GB+ depending on retention policies and usage + - Stores: sessions, tasks, events, feedback, agent memory, checkpoints +- **Vector Memory** (optional, Qdrant): + - 100MB-10GB+ depending on document corpus size + - Used for RAG and long-term agent memory + +**API Design:** + +**API Topology:** +Kagent exposes multiple API surfaces: + +1. **Kubernetes API** (CRDs): + - `agents.kagent.dev/v1alpha2` - Agent definitions + - `modelconfigs.kagent.dev/v1alpha2` - LLM model configurations + - `toolservers.kagent.dev/v1alpha1` - MCP tool server definitions + - `remotemcpservers.kagent.dev/v1alpha2` - Remote MCP servers + - `memories.kagent.dev/v1alpha1` - Memory/vector store configurations + - `mcpservers.kagent.dev` (inherited via KMCP dependency) + +2. **HTTP REST API** (controller): + - Base path: `/api` + - Endpoints: `/agents`, `/sessions`, `/modelconfigs`, `/tools`, `/feedback`, etc. + - Format: JSON request/response + - Authentication: Pluggable (currently development mode) + - See [go/internal/httpserver/server.go](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/go/internal/httpserver/server.go) + +3. **A2A Protocol** (per-agent): + - Path: `/api/a2a/{namespace}/{agent-name}` + - Spec: https://github.com/google/A2A + - Supports streaming and synchronous invocations + +**API Conventions:** + +- RESTful resource naming (plural nouns) +- Standard HTTP methods (GET, POST, PUT, DELETE) +- JSON for all request/response bodies +- Kubernetes-style metadata (namespace, name, labels, annotations) +- Status subresources for CRDs following Kubernetes conventions + +**Defaults:** + +Default values can be found in [helm/kagent/values.yaml](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/helm/kagent/values.yaml): + +- Default model provider: OpenAI (configurable via `providers.default` in Helm) +- Default model: `gpt-4.1-mini` for OpenAI +- Default namespace: `kagent` +- Default database: SQLite (ephemeral) +- Default agent type: `Declarative` +- Default streaming: `true` +- Default resource requests: 100m CPU, 256Mi memory + +**Additional Configurations:** +For production use, configure: + +- PostgreSQL database connection (`database.type=postgres`, `database.postgres.url`) +- LLM API keys via Secrets (`providers.openAI.apiKeySecretRef`) +- TLS for external LLM connections (`modelConfig.tls`) +- Resource limits based on workload (`agents.*.resources`) +- OpenTelemetry endpoints (`otel.tracing.enabled`, `otel.tracing.exporter.otlp.endpoint`) +- Network policies for pod isolation +- RBAC policies per agent based on required permissions + +**New API Types:** +Kagent introduces these Kubernetes API types: + +- `Agent`: Defines an AI agent with tools, model config, and deployment spec +- `ModelConfig`: LLM provider configuration with credentials and parameters +- `RemoteMCPServer`: External MCP tool server registration +- `Memory`: Vector store configuration for agent memory +- `ToolServer`: MCP tool server registration + +These do not modify existing Kubernetes APIs or cloud provider APIs. + +**API Compatibility:** + +- **Kubernetes API Server**: Compatible with Kubernetes 1.27+ (uses standard CRD and controller-runtime patterns) +- **API Versioning**: Currently `v1alpha2` for core types, `v1alpha1` for memory types +- **Backward Compatibility**: Breaking changes allowed in alpha versions, will stabilize in v1beta1 and v1 +- **Conversion Webhooks**: Planned for v1beta1 to support multiple API versions simultaneously + +**API Versioning and Breaking Changes:** + +- **Alpha** (`v1alpha1`, `v1alpha2`): Breaking changes allowed between versions, deprecated APIs removed after 1-2 releases +- **Beta** (planned `v1beta1`): Breaking changes discouraged, deprecated APIs supported for 2+ releases +- **Stable** (planned `v1`): Strong backward compatibility guarantees, deprecated APIs supported for 3+ releases +- **Deprecation Policy**: Follows Kubernetes deprecation policy - announcements in release notes, migration guides provided +- **Version Skew**: Controller supports N and N-1 API versions during transitions + +**Release Process:** +Kagent follows semantic versioning ([https://semver.org/](https://semver.org/)): + +- **Major Releases** (x.0.0): Breaking API changes, major new features + - Require migration guides + - Example: v1.0.0 (planned) + +- **Minor Releases** (0.x.0): New features, non-breaking changes + - Monthly cadence (target) + - New agent types, tool integrations, LLM providers + - Backward compatible within major version + +- **Patch Releases** (0.0.x): Bug fixes, security patches + - As needed, typically weekly for active issues + - No new features + - Always backward compatible + +Release artifacts: + +- Container images: `cr.kagent.dev/kagent-dev/kagent/*` +- Helm charts: `oci://ghcr.io/kagent-dev/kagent/helm/*` +- CLI binaries: GitHub releases +- Release notes: GitHub releases with changelog + +Release process (managed by maintainers): + +1. Version bump in relevant files +2. Create release branch +3. Run full CI/CD pipeline including E2E tests +4. Build and push multi-arch container images +5. Package and publish Helm charts +6. Create GitHub release with notes +7. Update documentation site + +See [CONTRIBUTION.md](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/CONTRIBUTION.md#releasing) for details. + +### Installation + +**Installation Methods:** + +Kagent provides multiple installation paths to suit different use cases. See https://kagent.dev/docs/kagent/introduction/installation for end-user details. There are also more detailed developer installation guides for each method below. + +**1. Helm Installation (Recommended for Production):** + +See [helm/README.md](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/helm/README.md#using-helm) for more details. + +**2. CLI Installation (Quickest for Getting Started):** + +See [README.md](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/helm/README.md#using-kagent-cli) for more details. + + +**3. Local Development (Kind):** + +See [README.md](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/helm/README.md#using-make) for more details. + + +**Configuration Requirements:** + +- **Minimal**: Kubernetes cluster (1.27+), LLM API key +- **Optional**: PostgreSQL for persistence, Prometheus/Grafana for observability, custom RBAC policies + +**Initialization:** + +After installation, kagent automatically: + +1. Deploys controller, UI, and default agents +2. Creates default ModelConfig from Helm values +3. Registers KMCP tool server (if enabled) +4. Starts health check endpoints + +No manual initialization steps required beyond providing LLM credentials. + +**Installation Validation:** + +**1. Check Pod Status:** + +```bash +kubectl get pods -n kagent +# Expected: All pods Running + +kubectl wait --for=condition=Ready pods --all -n kagent --timeout=120s +``` + +**2. Verify CRDs:** + +```bash +kubectl get crds | grep kagent.dev +# Expected: agents.kagent.dev, modelconfigs.kagent.dev, etc. +``` + +**3. Check Agents:** + +```bash +kubectl get agents -n kagent +# Expected: Default agents (k8s-agent, observability-agent, etc.) with Ready=True +``` + +**4. Test API:** + +```bash +# Port-forward controller +kubectl port-forward svc/kagent-controller 8083:8083 -n kagent + +# Check version +curl http://localhost:8083/version +# Expected: {"kagent_version":"v0.x.x","git_commit":"...","build_date":"..."} + +# List agents +curl http://localhost:8083/api/agents +``` + +**5. Test Agent Invocation:** + +```bash +# Using CLI +kagent agent invoke k8s-agent "List all namespaces" + +# Or via UI +# Navigate to http://localhost:8001 (after port-forward) +# Select agent and send message +``` + +**6. Check Logs:** + +```bash +# Controller logs +kubectl logs -n kagent deployment/kagent-controller --tail=50 + +# Agent logs +kubectl logs -n kagent deployment/k8s-agent --tail=50 +``` + +**7. Validate Observability (if enabled):** + +```bash +# Check metrics endpoint +kubectl port-forward svc/kagent-controller 8083:8083 -n kagent +curl http://localhost:8083/metrics + +# Check traces in Jaeger (if configured) +# Navigate to Jaeger UI and search for kagent traces +``` + +**Troubleshooting:** +Common issues and solutions documented at: +- [DEVELOPMENT.md#troubleshooting](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/DEVELOPMENT.md#troubleshooting) +- Helm README: [helm/README.md](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/helm/README.md) + +**Quick Start Guide:** + +https://kagent.dev/docs/kagent/getting-started/quickstart + +### Security + +**Security Self-Assessment:** +Kagent's comprehensive security self-assessment is available at: +[contrib/cncf/security-self-assessment.md](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/contrib/cncf/security-self-assessment.md) + +**Cloud Native Security Tenets:** + +Kagent satisfies the [Cloud Native Security Tenets](https://github.com/cncf/tag-security/blob/main/community/resources/security-whitepaper/secure-defaults-cloud-native-8.md) as follows: + +1. **Secure by Default:** + - RBAC enforced by default for all agents + - Secrets required for LLM API keys (no plaintext credentials) + - Network policies supported out-of-box + - No privileged containers by default + - TLS support for external LLM connections + +2. **Defense in Depth:** + - Multiple security layers: Kubernetes RBAC, namespace isolation, secret management, network policies + - Container security scanning (Trivy) in CI/CD + - Audit logging of all agent operations + - Session isolation in database + +3. **Least Privilege:** + - Controller runs with minimal RBAC permissions (see [go/config/rbac/role.yaml](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/go/config/rbac/role.yaml)) + - Each agent gets individual ServiceAccount with scoped permissions + - No cluster-admin privileges required + - Agents cannot access secrets in other namespaces + +4. **Immutable Infrastructure:** + - Container images are immutable + - Configuration via Kubernetes manifests (GitOps compatible) + - No runtime modification of agent code + - Declarative agent definitions + +5. **Auditable:** + - All API calls logged + - Agent operations tracked in database + - OpenTelemetry traces for complete request flow + - Kubernetes audit logs capture CRD changes + +6. **Automated:** + - Automated vulnerability scanning in CI/CD + - Automated testing including security scenarios + - Dependency updates via Dependabot (in-progress: [https://github.com/kagent-dev/kagent/pull/958](https://github.com/kagent-dev/kagent/pull/958)) + +7. **Segregated:** + - Namespace-based isolation + - Per-agent RBAC policies + - Network policies for pod-to-pod communication + - Database session isolation (planned full multi-tenancy) + +8. **Hardened:** + - Minimal container base images + - No unnecessary packages or tools + - Non-root user execution where possible + - Read-only root filesystems supported + +**Loosening Security Defaults:** + +For development or specific use cases, users may need to relax security: + +1. **Development Mode Authentication:** + - Default: UnsecureAuthenticator (no auth checks) + - Production: Configure proper authentication via [Issue #476](https://github.com/kagent-dev/kagent/issues/476) + - Documentation: Planned for v1.0 release + +2. **Expanded RBAC Permissions:** + - Default: Read-only access to most resources + - Custom: Edit agent RBAC templates in [helm/agents/*/templates/rbac.yaml](https://github.com/kagent-dev/kagent/tree/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/helm/agents) + - Example: Grant write access for agents that need to modify resources + +3. **Cross-Namespace Access:** + - Default: Agents can only access resources in their namespace + - Custom: Use ClusterRole instead of Role for cluster-wide access + - Warning: Increases security risk, use with caution + +4. **TLS Verification:** + - Default: TLS verification enabled for external connections + - Custom: Disable via `modelConfig.tls.insecureSkipVerify: true` (not recommended) + - Use case: Self-signed certificates in development + +5. **Network Policies:** + - Default: No network policies (Kubernetes default-allow) + - Recommended: Apply network policies to restrict pod-to-pod traffic + - Example policies: To be documented + +Documentation for security configuration: https://kagent.dev/docs/kagent (security section planned) + +**Security Hygiene:** + +**Frameworks and Practices:** + +1. **Code Review**: All PRs require maintainer review before merge +2. **Automated Testing**: Unit, integration, and E2E tests in CI/CD +3. **Vulnerability Scanning**: + - Trivy scans for container images + - `govulncheck` for Go dependencies + - `uv run pip-audit` for Python dependencies + - `npm audit` for UI dependencies + - Run via `make audit` (see [Makefile](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/Makefile)) +4. **Dependency Management**: + - Go modules with version pinning + - Python uv.lock for reproducible builds + - npm package-lock.json + - Regular dependency updates +5. **Signed Commits**: DCO (Developer Certificate of Origin) required +6. **Security Policy**: [SECURITY.md](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/SECURITY.md) with responsible disclosure process +7. **OpenSSF Best Practices**: Badge at https://www.bestpractices.dev/projects/10723 + +**Security Risk Evaluation:** +Features evaluated for security risk: + +- **Agent Code Execution**: Sandboxed Python code execution for `executeCodeBlocks` feature +- **Tool Invocation**: RBAC-controlled access to Kubernetes APIs and external services +- **Secret Access**: Scoped to agent's namespace, no cross-namespace access +- **Database Access**: Session isolation prevents cross-user data access +- **A2A Communication**: Authentication framework for agent-to-agent calls + +Ongoing evaluation via: + +- Security issue triage (severity-based prioritization) +- Community security reports via kagent-vulnerability-reports@googlegroups.com + +**Cloud Native Threat Modeling:** + +**Minimal Privileges:** +Controller requires: + +- **Read/Write**: `agents`, `modelconfigs`, `toolservers`, `memories`, `remotemcpservers`, `mcpservers` (kagent.dev API group) +- **Read/Write**: `deployments`, `services`, `configmaps`, `secrets`, `serviceaccounts` (for agent lifecycle) +- **Read**: All other resources (for status reporting and validation) + +Agents require (configurable per agent): + +- **Read**: Kubernetes resources relevant to their function (e.g., k8s-agent needs read access to pods, deployments, etc.) +- **Write**: Only for agents that modify resources (e.g., helm-agent needs write access for releases) +- **Execute**: Tool invocation via MCP servers + +Reasons for privileges: + +- Controller needs write access to create/update agent deployments and services +- Agents need read access to perform their operational tasks +- Write access for agents is optional and scoped to specific use cases + +**Certificate Rotation:** + +- **LLM Connections**: TLS certificates for external LLM providers are managed by the provider +- **In-Cluster**: Kubernetes handles certificate rotation for service-to-service communication +- **Custom CA**: Support for custom CA certificates via `modelConfig.tls.caCert` (base64-encoded PEM) +- **Certificate Expiry**: No automatic rotation, users must update secrets when certificates expire +- **Planned**: Automatic certificate rotation via cert-manager integration (roadmap item) + +**Secure Software Supply Chain:** + +Kagent follows [CNCF SSCP best practices](https://project.linuxfoundation.org/hubfs/CNCF_SSCP_v1.pdf): + +1. **Source Code Management:** + - Public GitHub repository with branch protection + - Required code reviews for all changes + - Signed commits via DCO + - No force-push to main branch + +2. **Build Process:** + - Reproducible builds via Docker multi-stage builds + - Build provenance tracked (version, git commit, build date) + - Automated builds in GitHub Actions (no manual builds) + - Build logs publicly available + +3. **Artifact Management:** + - Container images signed (planned via Cosign) + - SBOM generation (planned for v1.0) + - Multi-architecture builds (amd64, arm64) + - Immutable tags (version-based, no `latest` in production) + +4. **Dependency Management:** + - Lock files for all dependencies (go.sum, uv.lock, package-lock.json) + - Automated vulnerability scanning + - Dependabot for security updates + - Minimal dependencies (reduce attack surface) + +5. **Testing:** + - Comprehensive test suite (unit, integration, E2E) + - Security-focused tests (RBAC, secret handling, TLS) + - Mock LLM servers for deterministic testing + - Test coverage tracking + +6. **Release Process:** + - Semantic versioning + - Release notes with security advisories + - Changelog generation + - Signed releases (planned) + +7. **Monitoring:** + - CVE scanning in CI/CD (blocks on high/critical) + - OpenSSF Scorecard (planned) + - Security advisories via GitHub Security + +**Planned Enhancements:** +See [security self-assessment](https://github.com/kagent-dev/kagent/blob/9438c9c0f2c79daf632555df1d7d3cb2d04b7b81/contrib/cncf/security-self-assessment.md#future-state) for details: + +- SLSA provenance attestations +- Cosign image signing +- SBOM in SPDX/CycloneDX format + + +## Day 1 \- Installation and Deployment Phase + +_Coming Soon_ From 5ff1df8d1ce5c34f18fe8533ce37ccf92f59316d Mon Sep 17 00:00:00 2001 From: Brian Fox <878612+onematchfox@users.noreply.github.com> Date: Tue, 2 Dec 2025 20:14:41 +0100 Subject: [PATCH 11/17] Enable leader election on controller when scaled (#1146) This PR enables leader election on the controller if it is configured with one than 1 replica to ensure that only 1 replica is actively reconciling watched manifests. It also ensures that the necessary RBAC manifests are created. Final part of #1133 (excluding #1138). --------- Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> Signed-off-by: Ivan Porta --- helm/kagent/templates/_helpers.tpl | 7 +++++ .../templates/controller-configmap.yaml | 1 + .../templates/rbac/leader-election-role.yaml | 28 +++++++++++++++++++ .../rbac/leader-election-rolebinding.yaml | 17 +++++++++++ 4 files changed, 53 insertions(+) create mode 100644 helm/kagent/templates/rbac/leader-election-role.yaml create mode 100644 helm/kagent/templates/rbac/leader-election-rolebinding.yaml diff --git a/helm/kagent/templates/_helpers.tpl b/helm/kagent/templates/_helpers.tpl index 73a933553..059b93e73 100644 --- a/helm/kagent/templates/_helpers.tpl +++ b/helm/kagent/templates/_helpers.tpl @@ -107,6 +107,13 @@ Engine labels app.kubernetes.io/component: engine {{- end }} +{{/* +Check if leader election should be enabled (more than 1 replica) +*/}} +{{- define "kagent.leaderElectionEnabled" -}} +{{- gt (.Values.controller.replicas | int) 1 -}} +{{- end -}} + {{/* Validate controller configuration */}} diff --git a/helm/kagent/templates/controller-configmap.yaml b/helm/kagent/templates/controller-configmap.yaml index 792b58c01..dcf8b34a3 100644 --- a/helm/kagent/templates/controller-configmap.yaml +++ b/helm/kagent/templates/controller-configmap.yaml @@ -15,6 +15,7 @@ data: IMAGE_REGISTRY: {{ .Values.controller.agentImage.registry | default .Values.registry | quote }} IMAGE_REPOSITORY: {{ .Values.controller.agentImage.repository | quote }} IMAGE_TAG: {{ coalesce .Values.controller.agentImage.tag .Values.tag .Chart.Version | quote }} + LEADER_ELECT: {{ include "kagent.leaderElectionEnabled" . | quote }} OTEL_EXPORTER_OTLP_ENDPOINT: {{ .Values.otel.tracing.exporter.otlp.endpoint | quote }} OTEL_EXPORTER_OTLP_LOGS_ENDPOINT: {{ .Values.otel.logging.exporter.otlp.endpoint | quote }} OTEL_EXPORTER_OTLP_LOGS_INSECURE: {{ .Values.otel.logging.exporter.otlp.insecure | quote }} diff --git a/helm/kagent/templates/rbac/leader-election-role.yaml b/helm/kagent/templates/rbac/leader-election-role.yaml new file mode 100644 index 000000000..82df4b781 --- /dev/null +++ b/helm/kagent/templates/rbac/leader-election-role.yaml @@ -0,0 +1,28 @@ +{{- if eq (include "kagent.leaderElectionEnabled" .) "true" }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "kagent.fullname" . }}-leader-election-role + namespace: {{ include "kagent.namespace" . }} + labels: + {{- include "kagent.controller.labels" . | nindent 4 }} +rules: +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - list + - watch + - create + - update + - patch +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +{{- end }} \ No newline at end of file diff --git a/helm/kagent/templates/rbac/leader-election-rolebinding.yaml b/helm/kagent/templates/rbac/leader-election-rolebinding.yaml new file mode 100644 index 000000000..4a7a86949 --- /dev/null +++ b/helm/kagent/templates/rbac/leader-election-rolebinding.yaml @@ -0,0 +1,17 @@ +{{- if eq (include "kagent.leaderElectionEnabled" .) "true" }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "kagent.fullname" . }}-leader-election-rolebinding + namespace: {{ include "kagent.namespace" . }} + labels: + {{- include "kagent.controller.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ include "kagent.fullname" . }}-leader-election-role +subjects: +- kind: ServiceAccount + name: {{ include "kagent.fullname" . }}-controller + namespace: {{ include "kagent.namespace" . }} +{{- end }} \ No newline at end of file From 8f93d5a3a3a44876e3a5ba13cb36b6d8233bf59e Mon Sep 17 00:00:00 2001 From: Brian Fox <878612+onematchfox@users.noreply.github.com> Date: Wed, 3 Dec 2025 15:16:08 +0100 Subject: [PATCH 12/17] feat(controller): decouple A2A handler registration from controller reconcilation (#1138) **Decided to split this out of https://github.com/kagent-dev/kagent/pull/1133 to try make review a little easier as it's a chunky commit that can live in isolation of the rest of the changes in that PR** This change separates A2A handler registration from the main `Agent` controller reconciliation loop by introducing a dedicated `A2ARegistrar` that manages the A2A routing table independently from the main controller. Currently, A2A handler registration is tightly coupled to the `Agent` controller's reconciliation loop, which performs the following operations: 1. Reconcile Kubernetes resources (Deployment, Service, etc.) 2. Store agent metadata in database 3. Register A2A handler in routing table 4. Update resource status This coupling is problematic for a number of reasons: 1. Breaks horizontal scaling - with leader election enabled (required to prevent duplicate reconciliation), only the leader pod performs reconciliation and registers A2A handlers. When API requests hit non-leader replicas, they fail because those replicas lack the necessary handler registrations. 2. Could be argued that this violates separation of concerns - the controller handles both cluster resource management (its core responsibility) and API routing configuration (an orthogonal concern). 3. Makes future architectural changes (e.g., splitting API and control plane) unnecessarily complex. This PR attempts to address those concerns ensuring that all controller replicas, when scaled, will maintain consistent A2A routing tables enabling transparent load balancing across replicas. A2A logic is also consolidated into a dedicated package rather than scattered across controller code ensuring a clean separation of API and control plane such that these could be split into independent deployments without significant refactoring in future. --------- Signed-off-by: Brian Fox <878612+onematchfox@users.noreply.github.com> Signed-off-by: Ivan Porta --- go/internal/a2a/a2a_registrar.go | 168 ++++++++++++++++++ go/internal/controller/a2a/a2a_reconciler.go | 109 ------------ .../controller/reconciler/reconciler.go | 20 --- .../translator/agent/adk_api_translator.go | 73 +++----- .../controller/translator/agent/utils.go | 35 ++++ go/pkg/app/app.go | 32 ++-- 6 files changed, 242 insertions(+), 195 deletions(-) create mode 100644 go/internal/a2a/a2a_registrar.go delete mode 100644 go/internal/controller/a2a/a2a_reconciler.go create mode 100644 go/internal/controller/translator/agent/utils.go diff --git a/go/internal/a2a/a2a_registrar.go b/go/internal/a2a/a2a_registrar.go new file mode 100644 index 000000000..684e552a0 --- /dev/null +++ b/go/internal/a2a/a2a_registrar.go @@ -0,0 +1,168 @@ +package a2a + +import ( + "context" + "fmt" + "net" + "net/http" + "os" + "reflect" + "time" + + "github.com/go-logr/logr" + "github.com/kagent-dev/kagent/go/api/v1alpha2" + agent_translator "github.com/kagent-dev/kagent/go/internal/controller/translator/agent" + authimpl "github.com/kagent-dev/kagent/go/internal/httpserver/auth" + common "github.com/kagent-dev/kagent/go/internal/utils" + "github.com/kagent-dev/kagent/go/pkg/auth" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" + crcache "sigs.k8s.io/controller-runtime/pkg/cache" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/manager" + a2aclient "trpc.group/trpc-go/trpc-a2a-go/client" +) + +type A2ARegistrar struct { + cache crcache.Cache + translator agent_translator.AdkApiTranslator + handlerMux A2AHandlerMux + a2aBaseUrl string + authenticator auth.AuthProvider + a2aBaseOptions []a2aclient.Option +} + +var _ manager.Runnable = (*A2ARegistrar)(nil) + +func NewA2ARegistrar( + cache crcache.Cache, + translator agent_translator.AdkApiTranslator, + mux A2AHandlerMux, + a2aBaseUrl string, + authenticator auth.AuthProvider, + streamingMaxBuf int, + streamingInitialBuf int, + streamingTimeout time.Duration, +) *A2ARegistrar { + reg := &A2ARegistrar{ + cache: cache, + translator: translator, + handlerMux: mux, + a2aBaseUrl: a2aBaseUrl, + authenticator: authenticator, + a2aBaseOptions: []a2aclient.Option{ + a2aclient.WithTimeout(streamingTimeout), + a2aclient.WithBuffer(streamingInitialBuf, streamingMaxBuf), + debugOpt(), + }, + } + + return reg +} + +func (a *A2ARegistrar) NeedLeaderElection() bool { + return false +} + +func (a *A2ARegistrar) Start(ctx context.Context) error { + log := ctrllog.FromContext(ctx).WithName("a2a-registrar") + + informer, err := a.cache.GetInformer(ctx, &v1alpha2.Agent{}) + if err != nil { + return fmt.Errorf("failed to get cache informer: %w", err) + } + + if _, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + if agent, ok := obj.(*v1alpha2.Agent); ok { + if err := a.upsertAgentHandler(ctx, agent, log); err != nil { + log.Error(err, "failed to upsert A2A handler", "agent", common.GetObjectRef(agent)) + } + } + }, + UpdateFunc: func(oldObj, newObj interface{}) { + oldAgent, ok1 := oldObj.(*v1alpha2.Agent) + newAgent, ok2 := newObj.(*v1alpha2.Agent) + if !ok1 || !ok2 { + return + } + if oldAgent.Generation != newAgent.Generation || !reflect.DeepEqual(oldAgent.Spec, newAgent.Spec) { + if err := a.upsertAgentHandler(ctx, newAgent, log); err != nil { + log.Error(err, "failed to upsert A2A handler", "agent", common.GetObjectRef(newAgent)) + } + } + }, + DeleteFunc: func(obj interface{}) { + agent, ok := obj.(*v1alpha2.Agent) + if !ok { + if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { + if a2, ok := tombstone.Obj.(*v1alpha2.Agent); ok { + agent = a2 + } + } + } + if agent == nil { + return + } + ref := common.GetObjectRef(agent) + a.handlerMux.RemoveAgentHandler(ref) + log.V(1).Info("removed A2A handler", "agent", ref) + }, + }); err != nil { + return fmt.Errorf("failed to add informer event handler: %w", err) + } + + if ok := a.cache.WaitForCacheSync(ctx); !ok { + return fmt.Errorf("cache sync failed") + } + + <-ctx.Done() + return nil +} + +func (a *A2ARegistrar) upsertAgentHandler(ctx context.Context, agent *v1alpha2.Agent, log logr.Logger) error { + agentRef := types.NamespacedName{Namespace: agent.GetNamespace(), Name: agent.GetName()} + card := agent_translator.GetA2AAgentCard(agent) + + client, err := a2aclient.NewA2AClient( + card.URL, + append( + a.a2aBaseOptions, + a2aclient.WithHTTPReqHandler( + authimpl.A2ARequestHandler( + a.authenticator, + agentRef, + ), + ), + )..., + ) + if err != nil { + return fmt.Errorf("create A2A client for %s: %w", agentRef, err) + } + + cardCopy := *card + cardCopy.URL = fmt.Sprintf("%s/%s/", a.a2aBaseUrl, agentRef) + + if err := a.handlerMux.SetAgentHandler(agentRef.String(), client, cardCopy); err != nil { + return fmt.Errorf("set handler for %s: %w", agentRef, err) + } + + log.V(1).Info("registered/updated A2A handler", "agent", agentRef) + return nil +} + +func debugOpt() a2aclient.Option { + debugAddr := os.Getenv("KAGENT_A2A_DEBUG_ADDR") + if debugAddr != "" { + client := new(http.Client) + client.Transport = &http.Transport{ + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { + var zeroDialer net.Dialer + return zeroDialer.DialContext(ctx, network, debugAddr) + }, + } + return a2aclient.WithHTTPClient(client) + } else { + return func(*a2aclient.A2AClient) {} + } +} diff --git a/go/internal/controller/a2a/a2a_reconciler.go b/go/internal/controller/a2a/a2a_reconciler.go deleted file mode 100644 index 8d71f94f5..000000000 --- a/go/internal/controller/a2a/a2a_reconciler.go +++ /dev/null @@ -1,109 +0,0 @@ -package a2a - -import ( - "context" - "fmt" - "net" - "net/http" - "os" - "time" - - "github.com/kagent-dev/kagent/go/api/v1alpha2" - "github.com/kagent-dev/kagent/go/internal/a2a" - authimpl "github.com/kagent-dev/kagent/go/internal/httpserver/auth" - common "github.com/kagent-dev/kagent/go/internal/utils" - "github.com/kagent-dev/kagent/go/pkg/auth" - "k8s.io/apimachinery/pkg/types" - a2aclient "trpc.group/trpc-go/trpc-a2a-go/client" - "trpc.group/trpc-go/trpc-a2a-go/server" -) - -type A2AReconciler interface { - ReconcileAgent( - ctx context.Context, - agent *v1alpha2.Agent, - card server.AgentCard, - ) error - - ReconcileAgentDeletion( - agentRef string, - ) -} - -type ClientOptions struct { - StreamingMaxBufSize int - StreamingInitialBufSize int - Timeout time.Duration -} - -type a2aReconciler struct { - a2aHandler a2a.A2AHandlerMux - a2aBaseUrl string - authenticator auth.AuthProvider - clientOptions ClientOptions -} - -func NewReconciler( - a2aHandler a2a.A2AHandlerMux, - a2aBaseUrl string, - clientOptions ClientOptions, - authenticator auth.AuthProvider, -) A2AReconciler { - return &a2aReconciler{ - a2aHandler: a2aHandler, - a2aBaseUrl: a2aBaseUrl, - clientOptions: clientOptions, - authenticator: authenticator, - } -} - -func (a *a2aReconciler) ReconcileAgent( - ctx context.Context, - agent *v1alpha2.Agent, - card server.AgentCard, -) error { - agentRef := common.GetObjectRef(agent) - agentNns := types.NamespacedName{Namespace: agent.GetNamespace(), Name: agent.GetName()} - - client, err := a2aclient.NewA2AClient(card.URL, - a2aclient.WithTimeout(a.clientOptions.Timeout), - a2aclient.WithBuffer(a.clientOptions.StreamingInitialBufSize, a.clientOptions.StreamingMaxBufSize), - debugOpt(), - a2aclient.WithHTTPReqHandler(authimpl.A2ARequestHandler(a.authenticator, agentNns)), - ) - if err != nil { - return err - } - - // Modify card for kagent proxy - cardCopy := card - cardCopy.URL = fmt.Sprintf("%s/%s/", a.a2aBaseUrl, agentRef) - - return a.a2aHandler.SetAgentHandler( - agentRef, - client, - cardCopy, - ) -} - -func (a *a2aReconciler) ReconcileAgentDeletion( - agentRef string, -) { - a.a2aHandler.RemoveAgentHandler(agentRef) -} - -func debugOpt() a2aclient.Option { - debugAddr := os.Getenv("KAGENT_A2A_DEBUG_ADDR") - if debugAddr != "" { - client := new(http.Client) - client.Transport = &http.Transport{ - DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { - var zeroDialer net.Dialer - return zeroDialer.DialContext(ctx, network, debugAddr) - }, - } - return a2aclient.WithHTTPClient(client) - } else { - return func(*a2aclient.A2AClient) {} - } -} diff --git a/go/internal/controller/reconciler/reconciler.go b/go/internal/controller/reconciler/reconciler.go index d7fdb1bbd..b7443c777 100644 --- a/go/internal/controller/reconciler/reconciler.go +++ b/go/internal/controller/reconciler/reconciler.go @@ -19,10 +19,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/util/retry" - "trpc.group/trpc-go/trpc-a2a-go/server" "github.com/kagent-dev/kagent/go/api/v1alpha2" - "github.com/kagent-dev/kagent/go/internal/controller/a2a" "github.com/kagent-dev/kagent/go/internal/controller/translator" agent_translator "github.com/kagent-dev/kagent/go/internal/controller/translator/agent" "github.com/kagent-dev/kagent/go/internal/database" @@ -53,7 +51,6 @@ type KagentReconciler interface { type kagentReconciler struct { adkTranslator agent_translator.AdkApiTranslator - a2aReconciler a2a.A2AReconciler kube client.Client dbClient database.Client @@ -69,14 +66,12 @@ func NewKagentReconciler( kube client.Client, dbClient database.Client, defaultModelConfig types.NamespacedName, - a2aReconciler a2a.A2AReconciler, ) KagentReconciler { return &kagentReconciler{ adkTranslator: translator, kube: kube, dbClient: dbClient, defaultModelConfig: defaultModelConfig, - a2aReconciler: a2aReconciler, } } @@ -100,9 +95,6 @@ func (a *kagentReconciler) ReconcileKagentAgent(ctx context.Context, req ctrl.Re } func (a *kagentReconciler) handleAgentDeletion(req ctrl.Request) error { - // remove a2a handler if it exists - a.a2aReconciler.ReconcileAgentDeletion(req.String()) - if err := a.dbClient.DeleteAgent(req.String()); err != nil { return fmt.Errorf("failed to delete agent %s: %w", req.String(), err) @@ -499,10 +491,6 @@ func (a *kagentReconciler) reconcileAgent(ctx context.Context, agent *v1alpha2.A return fmt.Errorf("failed to reconcile owned objects: %v", err) } - if err := a.reconcileA2A(ctx, agent, agentOutputs.AgentCard); err != nil { - return fmt.Errorf("failed to reconcile A2A for agent %s/%s: %v", agent.Namespace, agent.Name, err) - } - if err := a.upsertAgent(ctx, agent, agentOutputs); err != nil { return fmt.Errorf("failed to upsert agent %s/%s: %v", agent.Namespace, agent.Name, err) } @@ -744,14 +732,6 @@ func (a *kagentReconciler) getDiscoveredMCPTools(ctx context.Context, serverRef return discoveredTools, nil } -func (a *kagentReconciler) reconcileA2A( - ctx context.Context, - agent *v1alpha2.Agent, - card server.AgentCard, -) error { - return a.a2aReconciler.ReconcileAgent(ctx, agent, card) -} - func convertTool(tool *database.Tool) (*v1alpha2.MCPTool, error) { return &v1alpha2.MCPTool{ Name: tool.ID, diff --git a/go/internal/controller/translator/agent/adk_api_translator.go b/go/internal/controller/translator/agent/adk_api_translator.go index 65cb1701b..3ffa09aa4 100644 --- a/go/internal/controller/translator/agent/adk_api_translator.go +++ b/go/internal/controller/translator/agent/adk_api_translator.go @@ -118,45 +118,36 @@ func (a *adkApiTranslator) TranslateAgent( return nil, err } + var cfg *adk.AgentConfig + var dep *resolvedDeployment + var secretHashBytes []byte + switch agent.Spec.Type { case v1alpha2.AgentType_Declarative: - - cfg, card, mdd, secretHashBytes, err := a.translateInlineAgent(ctx, agent) + var mdd *modelDeploymentData + cfg, mdd, secretHashBytes, err = a.translateInlineAgent(ctx, agent) if err != nil { return nil, err } - dep, err := a.resolveInlineDeployment(agent, mdd) + dep, err = a.resolveInlineDeployment(agent, mdd) if err != nil { return nil, err } - return a.buildManifest(ctx, agent, dep, cfg, card, secretHashBytes) case v1alpha2.AgentType_BYO: - dep, err := a.resolveByoDeployment(agent) + dep, err = a.resolveByoDeployment(agent) if err != nil { return nil, err } - // TODO: Resolve this from the actual pod - agentCard := &server.AgentCard{ - Name: strings.ReplaceAll(agent.Name, "-", "_"), - Description: agent.Spec.Description, - URL: fmt.Sprintf("http://%s.%s:8080", agent.Name, agent.Namespace), - Capabilities: server.AgentCapabilities{ - Streaming: ptr.To(true), - PushNotifications: ptr.To(false), - StateTransitionHistory: ptr.To(true), - }, - // Can't be null for Python, so set to empty list - Skills: []server.AgentSkill{}, - DefaultInputModes: []string{"text"}, - DefaultOutputModes: []string{"text"}, - } - return a.buildManifest(ctx, agent, dep, nil, agentCard, nil) default: return nil, fmt.Errorf("unknown agent type: %s", agent.Spec.Type) } + + card := GetA2AAgentCard(agent) + + return a.buildManifest(ctx, agent, dep, cfg, card, secretHashBytes) } // GetOwnedResourceTypes returns all the resource types that may be created for an agent. @@ -508,16 +499,16 @@ func (a *adkApiTranslator) buildManifest( return outputs, a.runPlugins(ctx, agent, outputs) } -func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1alpha2.Agent) (*adk.AgentConfig, *server.AgentCard, *modelDeploymentData, []byte, error) { +func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1alpha2.Agent) (*adk.AgentConfig, *modelDeploymentData, []byte, error) { model, mdd, secretHashBytes, err := a.translateModel(ctx, agent.Namespace, agent.Spec.Declarative.ModelConfig) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, err } systemMessage, err := a.resolveSystemMessage(ctx, agent) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, err } cfg := &adk.AgentConfig{ @@ -526,26 +517,6 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1al Model: model, ExecuteCode: ptr.Deref(agent.Spec.Declarative.ExecuteCodeBlocks, false), } - agentCard := &server.AgentCard{ - Name: strings.ReplaceAll(agent.Name, "-", "_"), - Description: agent.Spec.Description, - URL: fmt.Sprintf("http://%s.%s:8080", agent.Name, agent.Namespace), - Capabilities: server.AgentCapabilities{ - Streaming: ptr.To(true), - PushNotifications: ptr.To(false), - StateTransitionHistory: ptr.To(true), - }, - // Can't be null for Python, so set to empty list - Skills: []server.AgentSkill{}, - DefaultInputModes: []string{"text"}, - DefaultOutputModes: []string{"text"}, - } - - if agent.Spec.Declarative.A2AConfig != nil { - agentCard.Skills = slices.Collect(utils.Map(slices.Values(agent.Spec.Declarative.A2AConfig.Skills), func(skill v1alpha2.AgentSkill) server.AgentSkill { - return server.AgentSkill(skill) - })) - } for _, tool := range agent.Spec.Declarative.Tools { // Skip tools that are not applicable to the model provider @@ -553,7 +524,7 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1al case tool.McpServer != nil: err := a.translateMCPServerTarget(ctx, cfg, agent.Namespace, tool.McpServer, tool.HeadersFrom) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, err } case tool.Agent != nil: agentRef := types.NamespacedName{ @@ -562,14 +533,14 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1al } if agentRef.Namespace == agent.Namespace && agentRef.Name == agent.Name { - return nil, nil, nil, nil, fmt.Errorf("agent tool cannot be used to reference itself, %s", agentRef) + return nil, nil, nil, fmt.Errorf("agent tool cannot be used to reference itself, %s", agentRef) } // Translate a nested tool toolAgent := &v1alpha2.Agent{} err := a.kube.Get(ctx, agentRef, toolAgent) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, err } switch toolAgent.Spec.Type { @@ -577,7 +548,7 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1al url := fmt.Sprintf("http://%s.%s:8080", toolAgent.Name, toolAgent.Namespace) headers, err := tool.ResolveHeaders(ctx, a.kube, agent.Namespace) if err != nil { - return nil, nil, nil, nil, err + return nil, nil, nil, err } cfg.RemoteAgents = append(cfg.RemoteAgents, adk.RemoteAgentConfig{ @@ -587,15 +558,15 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1al Description: toolAgent.Spec.Description, }) default: - return nil, nil, nil, nil, fmt.Errorf("unknown agent type: %s", toolAgent.Spec.Type) + return nil, nil, nil, fmt.Errorf("unknown agent type: %s", toolAgent.Spec.Type) } default: - return nil, nil, nil, nil, fmt.Errorf("tool must have a provider or tool server") + return nil, nil, nil, fmt.Errorf("tool must have a provider or tool server") } } - return cfg, agentCard, mdd, secretHashBytes, nil + return cfg, mdd, secretHashBytes, nil } func (a *adkApiTranslator) resolveSystemMessage(ctx context.Context, agent *v1alpha2.Agent) (string, error) { diff --git a/go/internal/controller/translator/agent/utils.go b/go/internal/controller/translator/agent/utils.go new file mode 100644 index 000000000..c5b88903e --- /dev/null +++ b/go/internal/controller/translator/agent/utils.go @@ -0,0 +1,35 @@ +package agent + +import ( + "fmt" + "slices" + "strings" + + "github.com/kagent-dev/kagent/go/api/v1alpha2" + "github.com/kagent-dev/kagent/go/internal/utils" + "k8s.io/utils/ptr" + "trpc.group/trpc-go/trpc-a2a-go/server" +) + +func GetA2AAgentCard(agent *v1alpha2.Agent) *server.AgentCard { + card := server.AgentCard{ + Name: strings.ReplaceAll(agent.Name, "-", "_"), + Description: agent.Spec.Description, + URL: fmt.Sprintf("http://%s.%s:8080", agent.Name, agent.Namespace), + Capabilities: server.AgentCapabilities{ + Streaming: ptr.To(true), + PushNotifications: ptr.To(false), + StateTransitionHistory: ptr.To(true), + }, + // Can't be null for Python, so set to empty list + Skills: []server.AgentSkill{}, + DefaultInputModes: []string{"text"}, + DefaultOutputModes: []string{"text"}, + } + if agent.Spec.Type == v1alpha2.AgentType_Declarative && agent.Spec.Declarative.A2AConfig != nil { + card.Skills = slices.Collect(utils.Map(slices.Values(agent.Spec.Declarative.A2AConfig.Skills), func(skill v1alpha2.AgentSkill) server.AgentSkill { + return server.AgentSkill(skill) + })) + } + return &card +} diff --git a/go/pkg/app/app.go b/go/pkg/app/app.go index 1ee1e65e1..de61918f9 100644 --- a/go/pkg/app/app.go +++ b/go/pkg/app/app.go @@ -40,7 +40,6 @@ import ( "github.com/kagent-dev/kagent/go/internal/database" versionmetrics "github.com/kagent-dev/kagent/go/internal/metrics" - a2a_reconciler "github.com/kagent-dev/kagent/go/internal/controller/a2a" "github.com/kagent-dev/kagent/go/internal/controller/reconciler" reconcilerutils "github.com/kagent-dev/kagent/go/internal/controller/reconciler/utils" agent_translator "github.com/kagent-dev/kagent/go/internal/controller/translator/agent" @@ -351,25 +350,11 @@ func Start(getExtensionConfig GetExtensionConfig) { extensionCfg.AgentPlugins, ) - a2aHandler := a2a.NewA2AHttpMux(httpserver.APIPathA2A, extensionCfg.Authenticator) - - a2aReconciler := a2a_reconciler.NewReconciler( - a2aHandler, - cfg.A2ABaseUrl+httpserver.APIPathA2A, - a2a_reconciler.ClientOptions{ - StreamingMaxBufSize: int(cfg.Streaming.MaxBufSize.Value()), - StreamingInitialBufSize: int(cfg.Streaming.InitialBufSize.Value()), - Timeout: cfg.Streaming.Timeout, - }, - extensionCfg.Authenticator, - ) - rcnclr := reconciler.NewKagentReconciler( apiTranslator, mgr.GetClient(), dbClient, cfg.DefaultModelConfig, - a2aReconciler, ) if err := (&controller.ServiceController{ @@ -418,6 +403,23 @@ func Start(getExtensionConfig GetExtensionConfig) { os.Exit(1) } + // Register A2A handlers on all replicas + a2aHandler := a2a.NewA2AHttpMux(httpserver.APIPathA2A, extensionCfg.Authenticator) + + if err := mgr.Add(a2a.NewA2ARegistrar( + mgr.GetCache(), + apiTranslator, + a2aHandler, + cfg.A2ABaseUrl+httpserver.APIPathA2A, + extensionCfg.Authenticator, + int(cfg.Streaming.MaxBufSize.Value()), + int(cfg.Streaming.InitialBufSize.Value()), + cfg.Streaming.Timeout, + )); err != nil { + setupLog.Error(err, "unable to set up a2a registrar") + os.Exit(1) + } + // +kubebuilder:scaffold:builder if metricsCertWatcher != nil { setupLog.Info("Adding metrics certificate watcher to manager") From 99a6a013bca3024a74e6eebd2af2c1104da3fd04 Mon Sep 17 00:00:00 2001 From: dongjiang Date: Thu, 4 Dec 2025 22:41:46 +0800 Subject: [PATCH 13/17] chore: Update golangci-lint version and add new linters (#1154) Signed-off-by: jiangdong Signed-off-by: Ivan Porta --- .github/workflows/ci.yaml | 6 +- go/.golangci.yaml | 109 ++++++++++++++++-- go/cli/cmd/kagent/main.go | 1 - .../agent/frameworks/common/base_generator.go | 2 +- go/cli/internal/cli/agent/bug_report.go | 4 +- go/cli/internal/cli/agent/const.go | 3 +- go/cli/internal/cli/agent/const_test.go | 4 +- go/cli/internal/cli/agent/deploy.go | 29 +++-- go/cli/internal/cli/agent/format.go | 8 +- go/cli/internal/cli/agent/get.go | 4 +- go/cli/internal/cli/agent/install.go | 12 +- go/cli/internal/cli/agent/invoke.go | 3 +- go/cli/internal/cli/agent/run.go | 2 +- go/cli/internal/cli/agent/utils.go | 14 +-- go/cli/internal/cli/mcp/add_tool.go | 7 +- go/cli/internal/cli/mcp/deploy.go | 2 +- go/cli/internal/cli/mcp/init.go | 1 - go/cli/internal/cli/mcp/inspector.go | 6 +- go/cli/internal/cli/mcp/run.go | 12 +- go/cli/internal/cli/mcp/secrets.go | 4 +- go/cli/internal/common/generator/base.go | 2 +- go/cli/internal/common/k8s/config.go | 4 +- go/cli/internal/config/utils_test.go | 6 +- .../mcp/frameworks/common/base_generator.go | 3 +- .../mcp/frameworks/golang/generator.go | 1 - .../internal/mcp/frameworks/java/generator.go | 4 +- go/cli/internal/mcp/manifests/manager.go | 15 +-- go/cli/internal/mcp/manifests/types.go | 12 +- go/cli/internal/tui/chat.go | 28 ++--- go/cli/internal/tui/dialogs/agent_chooser.go | 12 +- .../internal/tui/dialogs/mcp_server_wizard.go | 5 +- go/cli/internal/tui/workspace.go | 60 ++++------ go/cmd/controller/main.go | 2 +- go/internal/a2a/a2a_handler_mux.go | 1 - go/internal/a2a/a2a_registrar.go | 6 +- go/internal/adk/types.go | 14 +-- go/internal/controller/agent_controller.go | 4 - .../controller/modelconfig_controller.go | 4 +- .../controller/reconciler/reconciler.go | 25 ++-- .../reconciler/utils/reconciler_utils.go | 9 +- go/internal/controller/service_controller.go | 4 +- .../translator/agent/adk_api_translator.go | 4 - .../agent/adk_translator_golden_test.go | 24 ++-- go/internal/controller/translator/mutate.go | 2 +- go/internal/database/client.go | 18 ++- go/internal/database/fake/client.go | 90 ++++++++------- go/internal/database/service.go | 11 +- go/internal/goruntime/cpu.go | 2 +- go/internal/httpserver/auth/authn.go | 2 +- go/internal/httpserver/handlers/agents.go | 9 +- .../httpserver/handlers/checkpoints.go | 1 - go/internal/httpserver/handlers/crewai.go | 16 +-- go/internal/httpserver/handlers/health.go | 2 +- go/internal/httpserver/handlers/helpers.go | 8 +- go/internal/httpserver/handlers/memory.go | 12 +- .../httpserver/handlers/modelconfig.go | 16 +-- go/internal/httpserver/handlers/namespaces.go | 10 +- go/internal/httpserver/handlers/providers.go | 24 ++-- .../httpserver/handlers/sessions_test.go | 1 - .../httpserver/handlers/toolservers.go | 9 +- .../httpserver/handlers/toolservertypes.go | 8 +- .../handlers/toolservertypes_test.go | 1 - go/internal/httpserver/handlers/utils.go | 4 +- go/internal/utils/client_wrapper.go | 1 - go/internal/utils/client_wrapper_test.go | 52 ++++----- go/pkg/app/app.go | 2 +- go/pkg/client/agent.go | 2 +- go/pkg/client/api/types.go | 30 ++--- go/pkg/client/base.go | 12 +- go/pkg/client/session.go | 6 +- go/test/e2e/invoke_api_test.go | 1 - go/test/e2e/mocks/mock_sts_server.go | 6 +- 72 files changed, 436 insertions(+), 404 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index bc1df1f25..87af6799d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -240,9 +240,9 @@ jobs: cache: true cache-dependency-path: go/go.sum - name: golangci-lint - uses: golangci/golangci-lint-action@v8 + uses: golangci/golangci-lint-action@v9 with: - version: v2.5.0 + version: v2.6.2 working-directory: go python-test: @@ -331,4 +331,4 @@ jobs: git diff exit 1 fi - echo "✓ Controller manifests are up to date" \ No newline at end of file + echo "✓ Controller manifests are up to date" diff --git a/go/.golangci.yaml b/go/.golangci.yaml index b7bc9bf7a..85e62030c 100644 --- a/go/.golangci.yaml +++ b/go/.golangci.yaml @@ -1,11 +1,102 @@ version: "2" - +run: + go: "1.25" + timeout: 10m + allow-parallel-runners: true linters: - # Default set of linters. - # The value can be: - # - `standard`: https://golangci-lint.run/docs/linters/#enabled-by-default - # - `all`: enables all linters by default. - # - `none`: disables all linters by default. - # - `fast`: enables only linters considered as "fast" (`golangci-lint help linters --json | jq '[ .[] | select(.fast==true) ] | map(.name)'`). - # Default: standard - default: standard \ No newline at end of file + default: none + enable: + - asasalint + - asciicheck + - bidichk + - copyloopvar + - depguard + - dogsled + - goprintffuncname + - govet + - importas + - ineffassign + - iotamixing + - makezero + - misspell + - modernize + - nakedret + - nolintlint + - staticcheck + - unused + - whitespace + settings: + depguard: + rules: + forbid-pkg-errors: + deny: + - pkg: sort + desc: Should be replaced with slices package + govet: + disable: + - fieldalignment + - shadow + enable-all: true + importas: + alias: + - pkg: k8s.io/api/core/v1 + alias: corev1 + - pkg: k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1 + alias: apiextensionsv1 + - pkg: k8s.io/apimachinery/pkg/apis/meta/v1 + alias: metav1 + - pkg: k8s.io/apimachinery/pkg/api/errors + alias: apierrors + - pkg: k8s.io/apimachinery/pkg/util/errors + alias: kerrors + no-unaliased: true + modernize: + disable: + - omitzero + - fmtappendf + revive: + rules: + # The following rules are recommended https://github.com/mgechev/revive#recommended-configuration + - name: blank-imports + - name: context-as-argument + - name: context-keys-type + - name: dot-imports + - name: error-return + - name: error-strings + - name: error-naming + - name: exported + - name: if-return + - name: increment-decrement + - name: var-naming + - name: var-declaration + - name: range + - name: receiver-naming + - name: time-naming + - name: unexported-return + - name: indent-error-flow + - name: errorf + - name: superfluous-else + - name: unreachable-code + - name: redefines-builtin-id + # + # Rules in addition to the recommended configuration above. + # + - name: bool-literal-in-expr + - name: constant-logical-expr + exclusions: + generated: strict + paths: + - zz_generated.*\.go$ + - .*conversion.*\.go$ +issues: + max-issues-per-linter: 0 + max-same-issues: 0 +formatters: + enable: + - gofmt + - goimports + exclusions: + generated: strict + paths: + - zz_generated.*\.go$ + - .*conversion.*\.go$ diff --git a/go/cli/cmd/kagent/main.go b/go/cli/cmd/kagent/main.go index 3dc4efd84..415337139 100644 --- a/go/cli/cmd/kagent/main.go +++ b/go/cli/cmd/kagent/main.go @@ -426,7 +426,6 @@ Examples: os.Exit(1) } - } func runInteractive(cmd *cobra.Command, args []string) { diff --git a/go/cli/internal/agent/frameworks/common/base_generator.go b/go/cli/internal/agent/frameworks/common/base_generator.go index c6c73955c..2e664108f 100644 --- a/go/cli/internal/agent/frameworks/common/base_generator.go +++ b/go/cli/internal/agent/frameworks/common/base_generator.go @@ -60,6 +60,6 @@ func (g *BaseGenerator) GenerateProject(config AgentConfig) error { // RenderTemplate renders a template string with the provided data. // This delegates to the shared generator implementation. -func (g *BaseGenerator) RenderTemplate(tmplContent string, data interface{}) (string, error) { +func (g *BaseGenerator) RenderTemplate(tmplContent string, data any) (string, error) { return g.BaseGenerator.RenderTemplate(tmplContent, data) } diff --git a/go/cli/internal/cli/agent/bug_report.go b/go/cli/internal/cli/agent/bug_report.go index f435fd3fd..549c67f4c 100644 --- a/go/cli/internal/cli/agent/bug_report.go +++ b/go/cli/internal/cli/agent/bug_report.go @@ -55,8 +55,8 @@ func BugReportCmd(cfg *config.Config) { if err != nil { fmt.Fprintf(os.Stderr, "Error getting pod names: %v\n", err) } else { - pods := strings.Split(string(output), "\n") - for _, pod := range pods { + pods := strings.SplitSeq(string(output), "\n") + for pod := range pods { if pod == "" { continue } diff --git a/go/cli/internal/cli/agent/const.go b/go/cli/internal/cli/agent/const.go index 5c64fc9c3..f982d4f18 100644 --- a/go/cli/internal/cli/agent/const.go +++ b/go/cli/internal/cli/agent/const.go @@ -12,7 +12,7 @@ const ( DefaultModelProvider = v1alpha2.ModelProviderOpenAI DefaultHelmOciRegistry = "oci://ghcr.io/kagent-dev/kagent/helm/" - //Provider specific env variables + // Provider specific env variables OPENAI_API_KEY = "OPENAI_API_KEY" ANTHROPIC_API_KEY = "ANTHROPIC_API_KEY" AZUREOPENAI_API_KEY = "AZUREOPENAI_API_KEY" @@ -28,7 +28,6 @@ const ( func GetModelProvider() v1alpha2.ModelProvider { modelProvider := os.Getenv(KAGENT_DEFAULT_MODEL_PROVIDER) if modelProvider == "" { - return DefaultModelProvider } switch modelProvider { diff --git a/go/cli/internal/cli/agent/const_test.go b/go/cli/internal/cli/agent/const_test.go index 16b126284..5f5fe0508 100644 --- a/go/cli/internal/cli/agent/const_test.go +++ b/go/cli/internal/cli/agent/const_test.go @@ -64,8 +64,8 @@ func TestGetModelProvider(t *testing.T) { if tc.envVarValue == "" { os.Unsetenv(KAGENT_DEFAULT_MODEL_PROVIDER) //nolint:errcheck } else { - os.Setenv(KAGENT_DEFAULT_MODEL_PROVIDER, tc.expectedHelmKey) //nolint:errcheck - defer os.Unsetenv(KAGENT_DEFAULT_MODEL_PROVIDER) //nolint:errcheck + os.Setenv(KAGENT_DEFAULT_MODEL_PROVIDER, tc.expectedHelmKey) + defer os.Unsetenv(KAGENT_DEFAULT_MODEL_PROVIDER) //nolint:errcheck } result := GetModelProvider() diff --git a/go/cli/internal/cli/agent/deploy.go b/go/cli/internal/cli/agent/deploy.go index 87b783688..2544a156f 100644 --- a/go/cli/internal/cli/agent/deploy.go +++ b/go/cli/internal/cli/agent/deploy.go @@ -4,9 +4,10 @@ import ( "bufio" "context" "fmt" + "maps" "os" "regexp" - "sort" + "slices" "strings" "time" @@ -18,7 +19,7 @@ import ( "github.com/kagent-dev/kmcp/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -246,7 +247,7 @@ func extractEnvVarsFromManifest(manifest *common.AgentManifest) []string { for varName := range envVarSet { envVars = append(envVars, varName) } - sort.Strings(envVars) + slices.Sort(envVars) return envVars } @@ -412,7 +413,7 @@ func createEnvFileSecret(ctx context.Context, k8sClient client.Client, namespace err := k8sClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: secretName}, existingSecret) if err != nil { - if errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { if err := k8sClient.Create(ctx, secret); err != nil { return fmt.Errorf("failed to create env file secret: %v", err) } @@ -447,7 +448,7 @@ func waitForDeployment(ctx context.Context, k8sClient client.Client, namespace, for { select { case <-timeoutTimer.C: - return nil, errors.NewNotFound(appsv1.Resource("deployment"), name) + return nil, apierrors.NewNotFound(appsv1.Resource("deployment"), name) case <-ticker.C: err := k8sClient.Get(ctx, types.NamespacedName{ Name: name, @@ -461,7 +462,7 @@ func waitForDeployment(ctx context.Context, k8sClient client.Client, namespace, return deployment, nil } - if !errors.IsNotFound(err) { + if !apierrors.IsNotFound(err) { return nil, fmt.Errorf("error checking for deployment: %v", err) } } @@ -475,7 +476,7 @@ func restartAgentDeployment(ctx context.Context, k8sClient client.Client, cfg *D _, err := waitForDeployment(ctx, k8sClient, namespace, deploymentName, 30*time.Second, cfg.Config) if err != nil { - if errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { if IsVerbose(cfg.Config) { fmt.Printf("Deployment '%s' not found after timeout, it may still be being created by the controller\n", deploymentName) } @@ -619,7 +620,7 @@ func createOrUpdateSecret(ctx context.Context, k8sClient client.Client, secret * }, existingSecret) if err != nil { - if errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { // Create new secret if err := k8sClient.Create(ctx, secret); err != nil { return fmt.Errorf("failed to create secret: %v", err) @@ -713,7 +714,7 @@ func createOrUpdateAgent(ctx context.Context, k8sClient client.Client, agent *v1 err := k8sClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, existingAgent) if err != nil { - if errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { // Agent does not exist, create it if err := k8sClient.Create(ctx, agent); err != nil { return fmt.Errorf("failed to create agent: %v", err) @@ -812,7 +813,7 @@ func createOrUpdateRemoteMCPServer(ctx context.Context, k8sClient client.Client, err := k8sClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, existingRemoteMCPServer) if err != nil { - if errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { // Create new RemoteMCPServer if err := k8sClient.Create(ctx, remoteMCPServer); err != nil { return fmt.Errorf("failed to create RemoteMCPServer: %v", err) @@ -900,7 +901,7 @@ func createOrUpdateMCPServer(ctx context.Context, k8sClient client.Client, mcpSe err := k8sClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: name}, existingMCPServer) if err != nil { - if errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { // Create new MCPServer if err := k8sClient.Create(ctx, mcpServerResource); err != nil { return fmt.Errorf("failed to create MCPServer: %v", err) @@ -1086,7 +1087,7 @@ func createOrUpdateEnvSecret(ctx context.Context, k8sClient client.Client, names err := k8sClient.Get(ctx, client.ObjectKey{Namespace: namespace, Name: secretName}, existingSecret) if err != nil { - if errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { // Secret doesn't exist, create it with all data secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ @@ -1109,9 +1110,7 @@ func createOrUpdateEnvSecret(ctx context.Context, k8sClient client.Client, names } // Secret exists, merge the new data with existing data - for key, value := range secretData { - existingSecret.Data[key] = value - } + maps.Copy(existingSecret.Data, secretData) if err := k8sClient.Update(ctx, existingSecret); err != nil { return fmt.Errorf("failed to update existing secret: %v", err) diff --git a/go/cli/internal/cli/agent/format.go b/go/cli/internal/cli/agent/format.go index a1f5bb619..0688bd599 100644 --- a/go/cli/internal/cli/agent/format.go +++ b/go/cli/internal/cli/agent/format.go @@ -17,16 +17,16 @@ const ( OutputFormatTable OutputFormat = "table" ) -func printOutput(data interface{}, tableHeaders []string, tableRows [][]string) error { +func printOutput(data any, tableHeaders []string, tableRows [][]string) error { format := OutputFormat(viper.GetString("output_format")) tw := table.NewWriter() - headers := slices.Collect(utils.Map(slices.Values(tableHeaders), func(header string) interface{} { + headers := slices.Collect(utils.Map(slices.Values(tableHeaders), func(header string) any { return header })) tw.AppendHeader(headers) rows := slices.Collect(utils.Map(slices.Values(tableRows), func(row []string) table.Row { - return slices.Collect(utils.Map(slices.Values(row), func(cell string) interface{} { + return slices.Collect(utils.Map(slices.Values(row), func(cell string) any { return cell })) })) @@ -43,7 +43,7 @@ func printOutput(data interface{}, tableHeaders []string, tableRows [][]string) } } -func printJSON(data interface{}) error { +func printJSON(data any) error { output, err := json.MarshalIndent(data, "", " ") if err != nil { return fmt.Errorf("error formatting JSON: %w", err) diff --git a/go/cli/internal/cli/agent/get.go b/go/cli/internal/cli/agent/get.go index 241831178..9c07e7c6f 100644 --- a/go/cli/internal/cli/agent/get.go +++ b/go/cli/internal/cli/agent/get.go @@ -40,7 +40,7 @@ func GetAgentCmd(cfg *config.Config, resourceName string) { return } byt, _ := json.MarshalIndent(agent, "", " ") - fmt.Fprintln(os.Stdout, string(byt)) //nolint:errcheck + fmt.Fprintln(os.Stdout, string(byt)) } } @@ -69,7 +69,7 @@ func GetSessionCmd(cfg *config.Config, resourceName string) { return } byt, _ := json.MarshalIndent(session, "", " ") - fmt.Fprintln(os.Stdout, string(byt)) //nolint:errcheck + fmt.Fprintln(os.Stdout, string(byt)) } } diff --git a/go/cli/internal/cli/agent/install.go b/go/cli/internal/cli/agent/install.go index 96baf4fb1..5742efb57 100644 --- a/go/cli/internal/cli/agent/install.go +++ b/go/cli/internal/cli/agent/install.go @@ -119,7 +119,7 @@ func InteractiveInstallCmd(ctx context.Context, c *ishell.Context) *PortForward // get model provider from KAGENT_DEFAULT_MODEL_PROVIDER environment variable or use DefaultModelProvider modelProvider := GetModelProvider() - //if model provider is openai, check if the api key is set + // if model provider is openai, check if the api key is set apiKeyName := GetProviderAPIKey(modelProvider) apiKeyValue := os.Getenv(apiKeyName) @@ -160,7 +160,7 @@ func setupHelmConfig(modelProvider v1alpha2.ModelProvider, apiKeyValue string) h fmt.Sprintf("providers.%s.apiKey=%s", helmProviderKey, apiKeyValue), } - //allow user to set the helm registry and version + // allow user to set the helm registry and version helmRegistry := GetEnvVarWithDefault(KAGENT_HELM_REPO, DefaultHelmOciRegistry) helmVersion := GetEnvVarWithDefault(KAGENT_HELM_VERSION, version.Version) helmExtraArgs := GetEnvVarWithDefault(KAGENT_HELM_EXTRA_ARGS, "") @@ -227,7 +227,7 @@ func install(ctx context.Context, cfg *config.Config, helmConfig helmConfig, mod // Stop the spinner completely before printing the success message s.Stop() - fmt.Fprintln(os.Stdout, "kagent installed successfully") //nolint:errcheck + fmt.Fprintln(os.Stdout, "kagent installed successfully") pf, err := NewPortForward(ctx, cfg) if err != nil { @@ -253,11 +253,11 @@ func deleteCRDs(ctx context.Context) error { if out, err := deleteCmd.CombinedOutput(); err != nil { if !strings.Contains(string(out), "not found") { errMsg := fmt.Sprintf("Error deleting CRD %s: %s", crd, string(out)) - fmt.Fprintln(os.Stderr, errMsg) //nolint:errcheck + fmt.Fprintln(os.Stderr, errMsg) deleteErrors = append(deleteErrors, errMsg) } } else { - fmt.Fprintf(os.Stdout, "Successfully deleted CRD %s\n", crd) //nolint:errcheck + fmt.Fprintf(os.Stdout, "Successfully deleted CRD %s\n", crd) } } @@ -329,7 +329,7 @@ func UninstallCmd(ctx context.Context, cfg *config.Config) { } s.Stop() - fmt.Fprintln(os.Stdout, "\nkagent uninstalled successfully") //nolint:errcheck + fmt.Fprintln(os.Stdout, "\nkagent uninstalled successfully") } func checkHelmAvailable() error { diff --git a/go/cli/internal/cli/agent/invoke.go b/go/cli/internal/cli/agent/invoke.go index 470e802b5..f820c9d87 100644 --- a/go/cli/internal/cli/agent/invoke.go +++ b/go/cli/internal/cli/agent/invoke.go @@ -67,7 +67,6 @@ func InvokeCmd(ctx context.Context, cfg *InvokeCfg) { var a2aClient *a2aclient.A2AClient var err error if cfg.URLOverride != "" { - a2aClient, err = a2aclient.NewA2AClient(cfg.URLOverride, a2aclient.WithTimeout(cfg.Config.Timeout)) if err != nil { fmt.Fprintf(os.Stderr, "Error creating A2A client: %v\n", err) @@ -139,6 +138,6 @@ func InvokeCmd(ctx context.Context, cfg *InvokeCfg) { return } - fmt.Fprintf(os.Stdout, "%+v\n", string(jsn)) //nolint:errcheck + fmt.Fprintf(os.Stdout, "%+v\n", string(jsn)) } } diff --git a/go/cli/internal/cli/agent/run.go b/go/cli/internal/cli/agent/run.go index f07bd9877..98fb2b956 100644 --- a/go/cli/internal/cli/agent/run.go +++ b/go/cli/internal/cli/agent/run.go @@ -19,7 +19,7 @@ import ( type RunCfg struct { ProjectDir string Config *config.Config - Build bool + Build bool } // RunCmd starts docker-compose in the background and launches a chat session with the local agent diff --git a/go/cli/internal/cli/agent/utils.go b/go/cli/internal/cli/agent/utils.go index 4cac28fd2..f21c84e11 100644 --- a/go/cli/internal/cli/agent/utils.go +++ b/go/cli/internal/cli/agent/utils.go @@ -8,7 +8,7 @@ import ( "os/exec" "path/filepath" "regexp" - "sort" + "slices" "time" "github.com/kagent-dev/kagent/go/api/v1alpha2" @@ -61,7 +61,7 @@ func NewPortForward(ctx context.Context, cfg *config.Config) (*PortForward, erro client := cfg.Client() var err error - for i := 0; i < 10; i++ { + for range 10 { err = CheckServerConnection(ctx, client) if err == nil { // Connection successful, port-forward is working @@ -97,17 +97,17 @@ func StreamA2AEvents(ch <-chan protocol.StreamingMessageEvent, verbose bool) { fmt.Fprintf(os.Stderr, "Error marshaling A2A event: %v\n", err) continue } - fmt.Fprintf(os.Stdout, "%+v\n", string(json)) //nolint:errcheck + fmt.Fprintf(os.Stdout, "%+v\n", string(json)) } else { json, err := event.MarshalJSON() if err != nil { fmt.Fprintf(os.Stderr, "Error marshaling A2A event: %v\n", err) continue } - fmt.Fprintf(os.Stdout, "%+v\n", string(json)) //nolint:errcheck + fmt.Fprintf(os.Stdout, "%+v\n", string(json)) } } - fmt.Fprintln(os.Stdout) //nolint:errcheck // Add a newline after streaming is complete + fmt.Fprintln(os.Stdout) } // ResolveProjectDir resolves the project directory to an absolute path @@ -168,7 +168,7 @@ func ReadTemplateFile(templatePath string) ([]byte, error) { } // RenderTemplate reads and renders a template file with the given data -func RenderTemplate(templatePath string, data interface{}) (string, error) { +func RenderTemplate(templatePath string, data any) (string, error) { gen := pygen.NewPythonGenerator() tmplBytes, err := fs.ReadFile(gen.TemplateFiles, templatePath) if err != nil { @@ -265,7 +265,7 @@ func extractEnvVarsFromHeaders(mcpServers []common.McpServerType) []string { for varName := range envVarSet { envVars = append(envVars, varName) } - sort.Strings(envVars) + slices.Sort(envVars) return envVars } diff --git a/go/cli/internal/cli/mcp/add_tool.go b/go/cli/internal/cli/mcp/add_tool.go index f9f456c05..7877e1365 100644 --- a/go/cli/internal/cli/mcp/add_tool.go +++ b/go/cli/internal/cli/mcp/add_tool.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "slices" "strings" commonfs "github.com/kagent-dev/kagent/go/cli/internal/common/fs" @@ -119,10 +120,8 @@ func validateToolName(name string) error { // Check for reserved names reservedNames := []string{"server", "main", "core", "utils", "init", "test"} - for _, reserved := range reservedNames { - if strings.ToLower(name) == reserved { - return fmt.Errorf("'%s' is a reserved name", name) - } + if slices.Contains(reservedNames, strings.ToLower(name)) { + return fmt.Errorf("'%s' is a reserved name", name) } return nil diff --git a/go/cli/internal/cli/mcp/deploy.go b/go/cli/internal/cli/mcp/deploy.go index fa41e5e62..767f30926 100644 --- a/go/cli/internal/cli/mcp/deploy.go +++ b/go/cli/internal/cli/mcp/deploy.go @@ -597,7 +597,7 @@ func applyToCluster(projectDir, yamlContent string, mcpServer *v1alpha1.MCPServe if mcpServer.Spec.Deployment.Port != 0 { port = mcpServer.Spec.Deployment.Port } - serverConfig := map[string]interface{}{ + serverConfig := map[string]any{ "type": "streamable-http", "url": fmt.Sprintf("http://localhost:%d/mcp", port), } diff --git a/go/cli/internal/cli/mcp/init.go b/go/cli/internal/cli/mcp/init.go index a492d2d91..53fee8a3d 100644 --- a/go/cli/internal/cli/mcp/init.go +++ b/go/cli/internal/cli/mcp/init.go @@ -63,7 +63,6 @@ func runInitFramework( projectName, framework string, customizeProjectConfig func(*mcp.ProjectConfig) error, ) error { - // Validate project name if err := validateProjectName(projectName); err != nil { return fmt.Errorf("invalid project name: %w", err) diff --git a/go/cli/internal/cli/mcp/inspector.go b/go/cli/internal/cli/mcp/inspector.go index 080be514c..94aa2ab90 100644 --- a/go/cli/internal/cli/mcp/inspector.go +++ b/go/cli/internal/cli/mcp/inspector.go @@ -18,14 +18,14 @@ func checkNpxInstalled() error { } // createMCPInspectorConfig creates an MCP inspector configuration file -func createMCPInspectorConfig(serverName string, serverConfig map[string]interface{}, configPath string) error { +func createMCPInspectorConfig(serverName string, serverConfig map[string]any, configPath string) error { cfg, err := config.Get() if err != nil { return fmt.Errorf("failed to get config: %w", err) } - config := map[string]interface{}{ - "mcpServers": map[string]interface{}{ + config := map[string]any{ + "mcpServers": map[string]any{ serverName: serverConfig, }, } diff --git a/go/cli/internal/cli/mcp/run.go b/go/cli/internal/cli/mcp/run.go index 68646a857..8fb51d8d1 100644 --- a/go/cli/internal/cli/mcp/run.go +++ b/go/cli/internal/cli/mcp/run.go @@ -136,7 +136,7 @@ func runFastMCPPython(projectDir string, manifest *manifests.ProjectManifest) er } // Create server configuration for inspector - serverConfig := map[string]interface{}{ + serverConfig := map[string]any{ "command": "uv", "args": []string{"run", "python", "src/main.py"}, } @@ -189,7 +189,7 @@ func runMCPGo(projectDir string, manifest *manifests.ProjectManifest) error { } // Create server configuration for inspector - serverConfig := map[string]interface{}{ + serverConfig := map[string]any{ "command": "go", "args": []string{"run", "cmd/server/main.go"}, } @@ -290,7 +290,7 @@ func runTypeScript(projectDir string, manifest *manifests.ProjectManifest) error } // Create server configuration for inspector - serverConfig := map[string]interface{}{ + serverConfig := map[string]any{ "command": "npx", "args": []string{"tsx", "src/index.ts"}, } @@ -356,14 +356,14 @@ func runJava(projectDir string, manifest *manifests.ProjectManifest) error { } // Create server configuration for inspector - var serverConfig map[string]interface{} + var serverConfig map[string]any if runTransport == transportHTTP { - serverConfig = map[string]interface{}{ + serverConfig = map[string]any{ "type": "streamable-http", "url": "http://localhost:3000/mcp", } } else { - serverConfig = map[string]interface{}{ + serverConfig = map[string]any{ "command": "mvn", "args": mavenArgs, } diff --git a/go/cli/internal/cli/mcp/secrets.go b/go/cli/internal/cli/mcp/secrets.go index bd2e84a39..7fd8d24a6 100644 --- a/go/cli/internal/cli/mcp/secrets.go +++ b/go/cli/internal/cli/mcp/secrets.go @@ -206,9 +206,9 @@ func loadEnvFile(filename string) (map[string]string, error) { } envVars := make(map[string]string) - lines := strings.Split(string(data), "\n") + lines := strings.SplitSeq(string(data), "\n") - for _, line := range lines { + for line := range lines { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "#") { continue // Skip empty lines and comments diff --git a/go/cli/internal/common/generator/base.go b/go/cli/internal/common/generator/base.go index e6d8dda04..f1d05c24b 100644 --- a/go/cli/internal/common/generator/base.go +++ b/go/cli/internal/common/generator/base.go @@ -125,7 +125,7 @@ func (g *BaseGenerator) GenerateProject(config ProjectConfig) error { // RenderTemplate renders a template string with the provided data. // This is the core template rendering logic used by all generators. -func (g *BaseGenerator) RenderTemplate(tmplContent string, data interface{}) (string, error) { +func (g *BaseGenerator) RenderTemplate(tmplContent string, data any) (string, error) { tmpl, err := template.New("template").Parse(tmplContent) if err != nil { return "", fmt.Errorf("failed to parse template: %w", err) diff --git a/go/cli/internal/common/k8s/config.go b/go/cli/internal/common/k8s/config.go index 60bf45af7..763147360 100644 --- a/go/cli/internal/common/k8s/config.go +++ b/go/cli/internal/common/k8s/config.go @@ -43,8 +43,8 @@ func GetCurrentKindClusterName() (string, error) { } const kindPrefix = "kind-" - if strings.HasPrefix(currentContext.Cluster, kindPrefix) { - return strings.TrimPrefix(currentContext.Cluster, kindPrefix), nil + if after, ok0 := strings.CutPrefix(currentContext.Cluster, kindPrefix); ok0 { + return after, nil } return "", fmt.Errorf("current cluster %q is not a kind cluster", currentContext.Cluster) diff --git a/go/cli/internal/config/utils_test.go b/go/cli/internal/config/utils_test.go index 20a102e91..c64c3c030 100644 --- a/go/cli/internal/config/utils_test.go +++ b/go/cli/internal/config/utils_test.go @@ -32,18 +32,18 @@ func TestHandlesErrorWhenCreatingConfigDir(t *testing.T) { func checkGetConfig(t *testing.T, homeDir string) { configDir, err := GetConfigDir(homeDir) - //check for error + // check for error if err != nil { t.Fatalf("Expected no error, but got %v", err) } - //check it's equal to the expected path + // check it's equal to the expected path expectedDir := path.Join(homeDir, ".config", "kagent") if configDir != expectedDir { t.Fatalf("Expected %s, but got %s", expectedDir, configDir) } - //check kagent folder is exists + // check kagent folder is exists if _, err := os.Stat(expectedDir); os.IsNotExist(err) { t.Fatalf("Expected %s to exist, but it does not", path.Join(homeDir, "kagent")) } diff --git a/go/cli/internal/mcp/frameworks/common/base_generator.go b/go/cli/internal/mcp/frameworks/common/base_generator.go index e70a7e826..5ab4d3027 100644 --- a/go/cli/internal/mcp/frameworks/common/base_generator.go +++ b/go/cli/internal/mcp/frameworks/common/base_generator.go @@ -63,7 +63,6 @@ func (g *BaseGenerator) GenerateProject(config mcp.ProjectConfig) error { // GenerateTool generates a new tool for a project. func (g *BaseGenerator) GenerateTool(projectRoot string, config mcp.ToolConfig) error { - templateRoot, err := fs.Sub(g.TemplateFiles, "templates") if err != nil { return fmt.Errorf("failed to get templates subdirectory: %w", err) @@ -106,7 +105,7 @@ func (g *BaseGenerator) GenerateToolFile(filePath string, config mcp.ToolConfig) toolName := config.ToolName toolNamePascalCase := cases.Title(language.English).String(toolName) toolNameCamelCase := strcase.LowerCamelCase(toolName) - data := map[string]interface{}{ + data := map[string]any{ "ToolName": toolName, "ToolNameCamelCase": toolNameCamelCase, "ToolNameTitle": cases.Title(language.English).String(toolName), diff --git a/go/cli/internal/mcp/frameworks/golang/generator.go b/go/cli/internal/mcp/frameworks/golang/generator.go index 2bf06d689..a2a47de65 100644 --- a/go/cli/internal/mcp/frameworks/golang/generator.go +++ b/go/cli/internal/mcp/frameworks/golang/generator.go @@ -27,7 +27,6 @@ func NewGenerator() *Generator { // GenerateProject generates a new Go project. func (g *Generator) GenerateProject(config mcp.ProjectConfig) error { - if config.Verbose { fmt.Println("Generating Golang MCP project...") } diff --git a/go/cli/internal/mcp/frameworks/java/generator.go b/go/cli/internal/mcp/frameworks/java/generator.go index ac0896f81..0db8ad459 100644 --- a/go/cli/internal/mcp/frameworks/java/generator.go +++ b/go/cli/internal/mcp/frameworks/java/generator.go @@ -96,7 +96,7 @@ func (g *Generator) regenerateToolsClass(toolsDir string) error { } // Create template data - templateData := map[string]interface{}{ + templateData := map[string]any{ "Tools": tools, } @@ -139,7 +139,7 @@ func (g *Generator) scanToolsDirectory(toolsDir string) ([]string, error) { } // renderTemplate renders a template string with the provided data -func renderTemplate(tmplContent string, data interface{}) (string, error) { +func renderTemplate(tmplContent string, data any) (string, error) { tmpl, err := texttemplate.New("template").Parse(tmplContent) if err != nil { return "", fmt.Errorf("failed to parse template: %w", err) diff --git a/go/cli/internal/mcp/manifests/manager.go b/go/cli/internal/mcp/manifests/manager.go index 8661a70a4..93ad427d6 100644 --- a/go/cli/internal/mcp/manifests/manager.go +++ b/go/cli/internal/mcp/manifests/manager.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "slices" "strings" "time" @@ -208,12 +209,7 @@ func isValidFramework(framework string) bool { FrameworkJava, } - for _, valid := range validFrameworks { - if framework == valid { - return true - } - } - return false + return slices.Contains(validFrameworks, framework) } func isValidSecretProvider(provider string) bool { @@ -222,10 +218,5 @@ func isValidSecretProvider(provider string) bool { SecretProviderKubernetes, } - for _, valid := range validProviders { - if provider == valid { - return true - } - } - return false + return slices.Contains(validProviders, provider) } diff --git a/go/cli/internal/mcp/manifests/types.go b/go/cli/internal/mcp/manifests/types.go index 88b17c0a2..e9bee9bed 100644 --- a/go/cli/internal/mcp/manifests/types.go +++ b/go/cli/internal/mcp/manifests/types.go @@ -25,12 +25,12 @@ type ProjectManifest struct { // ToolConfig represents configuration for an MCP tool type ToolConfig struct { - Name string `yaml:"name" json:"name"` - Description string `yaml:"description,omitempty" json:"description,omitempty"` - Handler string `yaml:"handler,omitempty" json:"handler,omitempty"` - Enabled bool `yaml:"enabled" json:"enabled"` - Type string `yaml:"type,omitempty" json:"type,omitempty"` - Config map[string]interface{} `yaml:"config,omitempty" json:"config,omitempty"` + Name string `yaml:"name" json:"name"` + Description string `yaml:"description,omitempty" json:"description,omitempty"` + Handler string `yaml:"handler,omitempty" json:"handler,omitempty"` + Enabled bool `yaml:"enabled" json:"enabled"` + Type string `yaml:"type,omitempty" json:"type,omitempty"` + Config map[string]any `yaml:"config,omitempty" json:"config,omitempty"` } // SecretsConfig defines the secret management configuration diff --git a/go/cli/internal/tui/chat.go b/go/cli/internal/tui/chat.go index d0a41e3e5..4004188e1 100644 --- a/go/cli/internal/tui/chat.go +++ b/go/cli/internal/tui/chat.go @@ -35,15 +35,15 @@ type a2aEventMsg struct { type streamDoneMsg struct{} type toolCall struct { - Name string `json:"name"` - ID string `json:"id"` - Args interface{} `json:"args"` + Name string `json:"name"` + ID string `json:"id"` + Args any `json:"args"` } type toolResult struct { - Name string `json:"name"` - ID string `json:"id"` - Response interface{} `json:"response"` + Name string `json:"name"` + ID string `json:"id"` + Response any `json:"response"` } type chatModel struct { @@ -136,10 +136,7 @@ func (m *chatModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { inputHeight = 0 } sepHeight := 2 // extra line for status - vpHeight := msg.Height - inputHeight - sepHeight - if vpHeight < 5 { - vpHeight = 5 - } + vpHeight := max(msg.Height-inputHeight-sepHeight, 5) oldWidth := m.vp.Width m.vp.Width = msg.Width @@ -342,7 +339,7 @@ func (m *chatModel) handleMessageParts(msg protocol.Message, shouldDisplay bool) continue } - dataMap, ok := dp.Data.(map[string]interface{}) + dataMap, ok := dp.Data.(map[string]any) if !ok { continue } @@ -470,13 +467,6 @@ func extractTextFromParts(parts []protocol.Part) string { // styles now provided by theme package -func max(a, b int) int { - if a > b { - return a - } - return b -} - type tickMsg time.Time func (m *chatModel) tick() tea.Cmd { @@ -508,7 +498,7 @@ func (m *chatModel) updateStatus() { } // getString safely extracts a string value from a map -func getString(m map[string]interface{}, key string) string { +func getString(m map[string]any, key string) string { if val, ok := m[key]; ok { if str, ok := val.(string); ok { return str diff --git a/go/cli/internal/tui/dialogs/agent_chooser.go b/go/cli/internal/tui/dialogs/agent_chooser.go index 1d3db23ef..2217715a6 100644 --- a/go/cli/internal/tui/dialogs/agent_chooser.go +++ b/go/cli/internal/tui/dialogs/agent_chooser.go @@ -59,10 +59,7 @@ func (a *AgentChooser) buildColumns(innerWidth int) []table.Column { // Agent should be ~ 20% of the width agentW := innerWidth * 2 / 10 // Namespace should be ~ 5% of the width (with a small minimum) - namespaceW := innerWidth * 5 / 100 - if namespaceW < 6 { - namespaceW = 6 - } + namespaceW := max(innerWidth*5/100, 6) descW := innerWidth - agentW - namespaceW columns := []table.Column{ @@ -149,10 +146,3 @@ func (a *AgentChooser) View() string { func (a *AgentChooser) ID() string { return a.id } func (a *AgentChooser) Fullscreen() bool { return true } - -func max(x, y int) int { - if x > y { - return x - } - return y -} diff --git a/go/cli/internal/tui/dialogs/mcp_server_wizard.go b/go/cli/internal/tui/dialogs/mcp_server_wizard.go index 2211a2bea..373e3071e 100644 --- a/go/cli/internal/tui/dialogs/mcp_server_wizard.go +++ b/go/cli/internal/tui/dialogs/mcp_server_wizard.go @@ -382,10 +382,7 @@ func (w *McpServerWizard) View() string { inner := lipgloss.JoinVertical(lipgloss.Left, header, bodyPadded) // Calculate box width: aim for 80% of screen width with reasonable min/max bounds - boxWidth := maxInt(60, (w.width*8)/10) - if boxWidth > w.width-10 { - boxWidth = w.width - 10 - } + boxWidth := min(maxInt(60, (w.width*8)/10), w.width-10) box := lipgloss.NewStyle(). Width(boxWidth). diff --git a/go/cli/internal/tui/workspace.go b/go/cli/internal/tui/workspace.go index d2030f041..dc0c93cab 100644 --- a/go/cli/internal/tui/workspace.go +++ b/go/cli/internal/tui/workspace.go @@ -5,7 +5,7 @@ import ( "encoding/json" "fmt" "net/http" - "sort" + "slices" "strings" "github.com/charmbracelet/bubbles/help" @@ -222,8 +222,8 @@ func (m *workspaceModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { return m, nil } // Sort and store agents for later; do not auto-open chooser or auto-select. - sort.SliceStable(msg.agents, func(i, j int) bool { - return utils.GetObjectRef(msg.agents[i].Agent) < utils.GetObjectRef(msg.agents[j].Agent) + slices.SortStableFunc(msg.agents, func(a, b api.AgentResponse) int { + return strings.Compare(utils.GetObjectRef(a.Agent), utils.GetObjectRef(a.Agent)) }) m.agents = msg.agents // Keep welcome screen visible until user presses Ctrl+A @@ -439,19 +439,13 @@ func (m *workspaceModel) resize() tea.Cmd { headerLines := lineCount(renderTitle(m.width)) helpView := m.help.View(m.keys) footerLines := lineCount(helpView) - availableHeight := m.height - headerLines - footerLines - if availableHeight < 1 { - availableHeight = 1 - } + availableHeight := max(m.height-headerLines-footerLines, 1) sidebarWidth := 30 detailsWidth := 0 if m.showDetails { detailsWidth = 32 } - centerWidth := m.width - sidebarWidth - detailsWidth - if centerWidth < 20 { - centerWidth = 20 - } + centerWidth := max(m.width-sidebarWidth-detailsWidth, 20) m.sessions.SetSize(sidebarWidth, availableHeight) if m.chat != nil { @@ -500,7 +494,7 @@ func (m *workspaceModel) fetchSessionHistoryCmd(sessionID string) tea.Cmd { if err != nil { return sessionHistoryLoadedMsg{items: nil, err: err} } - defer resp.Body.Close() //nolint:errcheck + defer resp.Body.Close() var payload struct { Data []*protocol.Task `json:"data"` } @@ -584,10 +578,7 @@ func (m *workspaceModel) View() string { centerStyled := lipgloss.NewStyle().Width(centerWidth).Render(func() string { if m.agent == nil { // Start page: show instructions to select an agent - boxWidth := centerWidth - 4 - if boxWidth > 72 { - boxWidth = 72 - } + boxWidth := min(centerWidth-4, 72) if boxWidth < 40 { boxWidth = max(20, centerWidth-4) } @@ -621,10 +612,7 @@ func (m *workspaceModel) View() string { } if m.agent != nil && len(m.sessions.Items()) == 0 { // Agent selected but no sessions yet - boxWidth := centerWidth - 4 - if boxWidth > 72 { - boxWidth = 72 - } + boxWidth := min(centerWidth-4, 72) if boxWidth < 40 { boxWidth = max(20, centerWidth-4) } @@ -673,10 +661,7 @@ func (m *workspaceModel) View() string { // Force main area height so footer stays pinned at bottom headerLines := lineCount(logo) footerLines := lineCount(footer) - available := m.height - headerLines - footerLines - if available < 1 { - available = 1 - } + available := max(m.height-headerLines-footerLines, 1) mainRow = lipgloss.NewStyle().Height(available).Render(mainRow) content := lipgloss.JoinVertical(lipgloss.Left, logo, mainRow, footer) @@ -695,13 +680,7 @@ func (m *workspaceModel) View() string { if h == 0 { h = 24 } - modalWidth := w / 2 - if modalWidth < 40 { - modalWidth = 40 - } - if modalWidth > w-6 { - modalWidth = w - 6 - } + modalWidth := min(max(w/2, 40), w-6) // ensure input fits the modal m.sessionInput.Width = max(10, modalWidth-4) modal := lipgloss.NewStyle().Width(modalWidth).Border(lipgloss.RoundedBorder()).BorderForeground(theme.ColorBorder).Padding(1, 2).Render( @@ -739,12 +718,19 @@ func lineCount(s string) int { // sortSessions sorts sessions by UpdatedAt then CreatedAt descending. func sortSessions(sessions []*api.Session) { - sort.Slice(sessions, func(i, j int) bool { - ui := sessions[i].UpdatedAt - uj := sessions[j].UpdatedAt - if !ui.Equal(uj) { - return ui.After(uj) + slices.SortStableFunc(sessions, func(i, j *api.Session) int { + if i.UpdatedAt.After(j.UpdatedAt) { + return 1 + } + if j.UpdatedAt.After(i.UpdatedAt) { + return -1 + } + if i.CreatedAt.After(j.CreatedAt) { + return 1 } - return sessions[i].CreatedAt.After(sessions[j].CreatedAt) + if j.CreatedAt.After(i.CreatedAt) { + return -1 + } + return 0 }) } diff --git a/go/cmd/controller/main.go b/go/cmd/controller/main.go index a09234cc0..2049e6962 100644 --- a/go/cmd/controller/main.go +++ b/go/cmd/controller/main.go @@ -25,7 +25,7 @@ import ( _ "k8s.io/client-go/plugin/pkg/client/auth" ) -// nolint:gocyclo +//nolint:gocyclo func main() { authorizer := &auth.NoopAuthorizer{} authenticator := &auth.UnsecureAuthenticator{} diff --git a/go/internal/a2a/a2a_handler_mux.go b/go/internal/a2a/a2a_handler_mux.go index e78acead2..f0587331e 100644 --- a/go/internal/a2a/a2a_handler_mux.go +++ b/go/internal/a2a/a2a_handler_mux.go @@ -77,7 +77,6 @@ func (a *handlerMux) getHandler(name string) (http.Handler, bool) { } func (a *handlerMux) ServeHTTP(w http.ResponseWriter, r *http.Request) { - vars := mux.Vars(r) // get the handler name from the first path segment agentNamespace, ok := vars["namespace"] diff --git a/go/internal/a2a/a2a_registrar.go b/go/internal/a2a/a2a_registrar.go index 684e552a0..c1c7b3245 100644 --- a/go/internal/a2a/a2a_registrar.go +++ b/go/internal/a2a/a2a_registrar.go @@ -73,14 +73,14 @@ func (a *A2ARegistrar) Start(ctx context.Context) error { } if _, err := informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { + AddFunc: func(obj any) { if agent, ok := obj.(*v1alpha2.Agent); ok { if err := a.upsertAgentHandler(ctx, agent, log); err != nil { log.Error(err, "failed to upsert A2A handler", "agent", common.GetObjectRef(agent)) } } }, - UpdateFunc: func(oldObj, newObj interface{}) { + UpdateFunc: func(oldObj, newObj any) { oldAgent, ok1 := oldObj.(*v1alpha2.Agent) newAgent, ok2 := newObj.(*v1alpha2.Agent) if !ok1 || !ok2 { @@ -92,7 +92,7 @@ func (a *A2ARegistrar) Start(ctx context.Context) error { } } }, - DeleteFunc: func(obj interface{}) { + DeleteFunc: func(obj any) { agent, ok := obj.(*v1alpha2.Agent) if !ok { if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { diff --git a/go/internal/adk/types.go b/go/internal/adk/types.go index 8935328ac..ca08f8ab3 100644 --- a/go/internal/adk/types.go +++ b/go/internal/adk/types.go @@ -96,7 +96,7 @@ func (a *AzureOpenAI) GetType() string { } func (a *AzureOpenAI) MarshalJSON() ([]byte, error) { - return json.Marshal(map[string]interface{}{ + return json.Marshal(map[string]any{ "type": ModelTypeAzureOpenAI, "model": a.Model, "headers": a.Headers, @@ -109,7 +109,7 @@ type Anthropic struct { } func (a *Anthropic) MarshalJSON() ([]byte, error) { - return json.Marshal(map[string]interface{}{ + return json.Marshal(map[string]any{ "type": ModelTypeAnthropic, "model": a.Model, "base_url": a.BaseUrl, @@ -126,7 +126,7 @@ type GeminiVertexAI struct { } func (g *GeminiVertexAI) MarshalJSON() ([]byte, error) { - return json.Marshal(map[string]interface{}{ + return json.Marshal(map[string]any{ "type": ModelTypeGeminiVertexAI, "model": g.Model, "headers": g.Headers, @@ -142,7 +142,7 @@ type GeminiAnthropic struct { } func (g *GeminiAnthropic) MarshalJSON() ([]byte, error) { - return json.Marshal(map[string]interface{}{ + return json.Marshal(map[string]any{ "type": ModelTypeGeminiAnthropic, "model": g.Model, "headers": g.Headers, @@ -158,7 +158,7 @@ type Ollama struct { } func (o *Ollama) MarshalJSON() ([]byte, error) { - return json.Marshal(map[string]interface{}{ + return json.Marshal(map[string]any{ "type": ModelTypeOllama, "model": o.Model, "headers": o.Headers, @@ -174,7 +174,7 @@ type Gemini struct { } func (g *Gemini) MarshalJSON() ([]byte, error) { - return json.Marshal(map[string]interface{}{ + return json.Marshal(map[string]any{ "type": ModelTypeGemini, "model": g.Model, "headers": g.Headers, @@ -282,7 +282,7 @@ func (a *AgentConfig) UnmarshalJSON(data []byte) error { var _ sql.Scanner = &AgentConfig{} -func (a *AgentConfig) Scan(value interface{}) error { +func (a *AgentConfig) Scan(value any) error { return json.Unmarshal(value.([]byte), a) } diff --git a/go/internal/controller/agent_controller.go b/go/internal/controller/agent_controller.go index fea5b3967..18a3e31c1 100644 --- a/go/internal/controller/agent_controller.go +++ b/go/internal/controller/agent_controller.go @@ -202,7 +202,6 @@ func (r *AgentController) findAgentsUsingMCPServer(ctx context.Context, cl clien agents = append(agents, &agent) } } - } return agents @@ -242,7 +241,6 @@ func (r *AgentController) findAgentsUsingRemoteMCPServer(ctx context.Context, cl } for _, agent := range agentsList.Items { - agent := agent appendAgentIfUsesRemoteMCPServer(&agent) } @@ -250,7 +248,6 @@ func (r *AgentController) findAgentsUsingRemoteMCPServer(ctx context.Context, cl } func (r *AgentController) findAgentsUsingMCPService(ctx context.Context, cl client.Client, obj types.NamespacedName) []*v1alpha2.Agent { - var agentsList v1alpha2.AgentList if err := cl.List( ctx, @@ -314,7 +311,6 @@ func (r *AgentController) findAgentsUsingModelConfig(ctx context.Context, cl cli if agent.Spec.Declarative.ModelConfig == obj.Name { agents = append(agents, agent) } - } return agents diff --git a/go/internal/controller/modelconfig_controller.go b/go/internal/controller/modelconfig_controller.go index af414380c..afdbe8396 100644 --- a/go/internal/controller/modelconfig_controller.go +++ b/go/internal/controller/modelconfig_controller.go @@ -21,7 +21,7 @@ import ( "github.com/kagent-dev/kagent/go/internal/controller/reconciler" - v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" @@ -65,7 +65,7 @@ func (r *ModelConfigController) SetupWithManager(mgr ctrl.Manager) error { }). For(&v1alpha2.ModelConfig{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). Watches( - &v1.Secret{}, + &corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { requests := []reconcile.Request{} diff --git a/go/internal/controller/reconciler/reconciler.go b/go/internal/controller/reconciler/reconciler.go index b7443c777..3c5c76578 100644 --- a/go/internal/controller/reconciler/reconciler.go +++ b/go/internal/controller/reconciler/reconciler.go @@ -7,14 +7,15 @@ import ( "errors" "fmt" "reflect" - "sort" + "slices" + "strings" "sync" "github.com/hashicorp/go-multierror" reconcilerutils "github.com/kagent-dev/kagent/go/internal/controller/reconciler/utils" "github.com/kagent-dev/kmcp/api/v1alpha1" appsv1 "k8s.io/api/apps/v1" - k8s_errors "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -79,7 +80,7 @@ func (a *kagentReconciler) ReconcileKagentAgent(ctx context.Context, req ctrl.Re // TODO(sbx0r): missing finalizer logic agent := &v1alpha2.Agent{} if err := a.kube.Get(ctx, req.NamespacedName, agent); err != nil { - if k8s_errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { return a.handleAgentDeletion(req) } @@ -171,7 +172,7 @@ func (a *kagentReconciler) reconcileAgentStatus(ctx context.Context, agent *v1al func (a *kagentReconciler) ReconcileKagentMCPService(ctx context.Context, req ctrl.Request) error { service := &corev1.Service{} if err := a.kube.Get(ctx, req.NamespacedName, service); err != nil { - if k8s_errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { // Delete from DB if the service is deleted dbService := &database.ToolServer{ Name: req.String(), @@ -214,7 +215,7 @@ type secretRef struct { func (a *kagentReconciler) ReconcileKagentModelConfig(ctx context.Context, req ctrl.Request) error { modelConfig := &v1alpha2.ModelConfig{} if err := a.kube.Get(ctx, req.NamespacedName, modelConfig); err != nil { - if k8s_errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { return nil } @@ -269,8 +270,8 @@ func (a *kagentReconciler) ReconcileKagentModelConfig(ctx context.Context, req c // this loses per-secret context (i.e. versioning/hash status per-secret), but simplifies the number of statuses tracked func computeStatusSecretHash(secrets []secretRef) string { // sort secret references for deterministic output - sort.Slice(secrets, func(i, j int) bool { - return secrets[i].NamespacedName.String() < secrets[j].NamespacedName.String() + slices.SortStableFunc(secrets, func(a, b secretRef) int { + return strings.Compare(a.NamespacedName.String(), b.NamespacedName.String()) }) // compute a singular hash of the secrets @@ -283,7 +284,7 @@ func computeStatusSecretHash(secrets []secretRef) string { for k := range s.Secret.Data { keys = append(keys, k) } - sort.Strings(keys) + slices.Sort(keys) for _, k := range keys { hash.Write([]byte(k)) @@ -337,7 +338,7 @@ func (a *kagentReconciler) reconcileModelConfigStatus(ctx context.Context, model func (a *kagentReconciler) ReconcileKagentMCPServer(ctx context.Context, req ctrl.Request) error { mcpServer := &v1alpha1.MCPServer{} if err := a.kube.Get(ctx, req.NamespacedName, mcpServer); err != nil { - if k8s_errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { // Delete from DB if the mcp server is deleted dbServer := &database.ToolServer{ Name: req.String(), @@ -380,7 +381,7 @@ func (a *kagentReconciler) ReconcileKagentRemoteMCPServer(ctx context.Context, r server := &v1alpha2.RemoteMCPServer{} if err := a.kube.Get(ctx, nns, server); err != nil { // if the remote MCP server is not found, we can ignore it - if k8s_errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { // Delete from DB if the remote mcp server is deleted dbServer := &database.ToolServer{ Name: serverRef, @@ -552,7 +553,7 @@ func (a *kagentReconciler) reconcileDesiredObjects(ctx context.Context, owner me func createOrUpdate(ctx context.Context, c client.Client, obj client.Object, f controllerutil.MutateFn) (controllerutil.OperationResult, error) { key := client.ObjectKeyFromObject(obj) if err := c.Get(ctx, key, obj); err != nil { - if !k8s_errors.IsNotFound(err) { + if !apierrors.IsNotFound(err) { return controllerutil.OperationResultNone, err } if f != nil { @@ -683,7 +684,7 @@ func (a *kagentReconciler) listTools(ctx context.Context, tsp transport.Interfac if err != nil { return nil, fmt.Errorf("failed to start client for toolServer %s: %v", toolServer.Name, err) } - defer client.Close() //nolint:errcheck + defer client.Close() _, err = client.Initialize(ctx, mcp.InitializeRequest{ Params: mcp.InitializeParams{ ProtocolVersion: mcp.LATEST_PROTOCOL_VERSION, diff --git a/go/internal/controller/reconciler/utils/reconciler_utils.go b/go/internal/controller/reconciler/utils/reconciler_utils.go index 84d2f3d26..4178537b4 100644 --- a/go/internal/controller/reconciler/utils/reconciler_utils.go +++ b/go/internal/controller/reconciler/utils/reconciler_utils.go @@ -3,6 +3,7 @@ package utils import ( "context" "fmt" + "maps" "reflect" protoV2 "google.golang.org/protobuf/proto" @@ -109,7 +110,7 @@ func mapStringEqual(map1, map2 map[string]string) bool { // if i is addressable, return that. // if i is a struct passed in by value, make a new instance of the type and copy the contents to that and return // the pointer to that. -func mkPointer(val reflect.Value) interface{} { +func mkPointer(val reflect.Value) any { if val.Kind() == reflect.Ptr { return val.Interface() } @@ -126,7 +127,7 @@ func mkPointer(val reflect.Value) interface{} { // DeepEqual should be used in place of reflect.DeepEqual when the type of an object is unknown and may be a proto message. // see https://github.com/golang/protobuf/issues/1173 for details on why reflect.DeepEqual no longer works for proto messages -func DeepEqual(val1, val2 interface{}) bool { +func DeepEqual(val1, val2 any) bool { protoVal1, isProto := val1.(protoV2.Message) if isProto { protoVal2, isProto := val2.(protoV2.Message) @@ -178,9 +179,7 @@ func FindOwnedObjects(ctx context.Context, cl client.Client, uid types.UID, name if err != nil { return nil, err } - for uid, object := range objs { - ownedObjects[uid] = object - } + maps.Copy(ownedObjects, objs) } return ownedObjects, nil diff --git a/go/internal/controller/service_controller.go b/go/internal/controller/service_controller.go index 5ec9b03aa..0ddda2c2b 100644 --- a/go/internal/controller/service_controller.go +++ b/go/internal/controller/service_controller.go @@ -20,7 +20,7 @@ import ( "context" "github.com/kagent-dev/kagent/go/internal/controller/reconciler" - v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/utils/ptr" @@ -57,7 +57,7 @@ func (r *ServiceController) SetupWithManager(mgr ctrl.Manager) error { } return labels["kagent.dev/mcp-service"] == "true" })). - For(&v1.Service{}). + For(&corev1.Service{}). Named("service"). Complete(r) } diff --git a/go/internal/controller/translator/agent/adk_api_translator.go b/go/internal/controller/translator/agent/adk_api_translator.go index 3ffa09aa4..a215ad383 100644 --- a/go/internal/controller/translator/agent/adk_api_translator.go +++ b/go/internal/controller/translator/agent/adk_api_translator.go @@ -112,7 +112,6 @@ func (a *adkApiTranslator) TranslateAgent( ctx context.Context, agent *v1alpha2.Agent, ) (*AgentOutputs, error) { - err := a.validateAgent(ctx, agent, &tState{}) if err != nil { return nil, err @@ -170,7 +169,6 @@ func (r *adkApiTranslator) GetOwnedResourceTypes() []client.Object { } func (a *adkApiTranslator) validateAgent(ctx context.Context, agent *v1alpha2.Agent, state *tState) error { - agentRef := utils.GetObjectRef(agent) if state.isVisited(agentRef) { @@ -214,7 +212,6 @@ func (a *adkApiTranslator) validateAgent(ctx context.Context, agent *v1alpha2.Ag if err != nil { return err } - } return nil @@ -500,7 +497,6 @@ func (a *adkApiTranslator) buildManifest( } func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1alpha2.Agent) (*adk.AgentConfig, *modelDeploymentData, []byte, error) { - model, mdd, secretHashBytes, err := a.translateModel(ctx, agent.Namespace, agent.Spec.Declarative.ModelConfig) if err != nil { return nil, nil, nil, err diff --git a/go/internal/controller/translator/agent/adk_translator_golden_test.go b/go/internal/controller/translator/agent/adk_translator_golden_test.go index 99f7f3fba..7ba17cde4 100644 --- a/go/internal/controller/translator/agent/adk_translator_golden_test.go +++ b/go/internal/controller/translator/agent/adk_translator_golden_test.go @@ -24,10 +24,10 @@ import ( // TestInput represents the structure of input test files type TestInput struct { - Objects []map[string]interface{} `yaml:"objects"` - Operation string `yaml:"operation"` // "translateAgent", "translateTeam", "translateToolServer" - TargetObject string `yaml:"targetObject"` // name of the object to translate - Namespace string `yaml:"namespace"` + Objects []map[string]any `yaml:"objects"` + Operation string `yaml:"operation"` // "translateAgent", "translateTeam", "translateToolServer" + TargetObject string `yaml:"targetObject"` // name of the object to translate + Namespace string `yaml:"namespace"` } // TestGoldenAdkTranslator runs golden tests for the ADK API translator @@ -99,7 +99,7 @@ func runGoldenTest(t *testing.T, inputFile, outputsDir, testName string, updateG // Try to find the first ModelConfig in the objects to use as default for _, objMap := range testInput.Objects { if kind, ok := objMap["kind"].(string); ok && kind == "ModelConfig" { - if metadata, ok := objMap["metadata"].(map[string]interface{}); ok { + if metadata, ok := objMap["metadata"].(map[string]any); ok { if name, ok := metadata["name"].(string); ok { defaultModel.Name = name break @@ -109,7 +109,7 @@ func runGoldenTest(t *testing.T, inputFile, outputsDir, testName string, updateG } // Execute the specified operation - var result interface{} + var result any switch testInput.Operation { case "translateAgent": agent := &v1alpha2.Agent{} @@ -177,7 +177,7 @@ func convertUnstructuredToTyped(unstrObj *unstructured.Unstructured, scheme *run } func normalizeJSON(t *testing.T, jsonData []byte) []byte { - var obj interface{} + var obj any err := json.Unmarshal(jsonData, &obj) require.NoError(t, err) @@ -190,10 +190,10 @@ func normalizeJSON(t *testing.T, jsonData []byte) []byte { return result } -func removeNonDeterministicFields(obj interface{}) interface{} { +func removeNonDeterministicFields(obj any) any { switch v := obj.(type) { - case map[string]interface{}: - result := make(map[string]interface{}) + case map[string]any: + result := make(map[string]any) for key, value := range v { // Remove fields that are non-deterministic or generated switch key { @@ -205,8 +205,8 @@ func removeNonDeterministicFields(obj interface{}) interface{} { } } return result - case []interface{}: - var result []interface{} + case []any: + var result []any for _, item := range v { result = append(result, removeNonDeterministicFields(item)) } diff --git a/go/internal/controller/translator/mutate.go b/go/internal/controller/translator/mutate.go index 8a820d269..b6dcc0c73 100644 --- a/go/internal/controller/translator/mutate.go +++ b/go/internal/controller/translator/mutate.go @@ -65,7 +65,7 @@ func MutateFuncFor(existing, desired client.Object) controllerutil.MutateFn { } } -func mergeWithOverride(dst, src interface{}) error { +func mergeWithOverride(dst, src any) error { return mergo.Merge(dst, src, mergo.WithOverride) } diff --git a/go/internal/database/client.go b/go/internal/database/client.go index 7e67948e6..6c974198f 100644 --- a/go/internal/database/client.go +++ b/go/internal/database/client.go @@ -524,7 +524,6 @@ func (c *clientImpl) StoreCheckpointWrites(writes []*LangGraphCheckpointWrite) e // ListCheckpoints lists checkpoints for a thread, optionally filtered by beforeCheckpointID func (c *clientImpl) ListCheckpoints(userID, threadID, checkpointNS string, checkpointID *string, limit int) ([]*LangGraphCheckpointTuple, error) { - var checkpointTuples []*LangGraphCheckpointTuple if err := c.db.Transaction(func(tx *gorm.DB) error { query := c.db.Where( @@ -584,7 +583,6 @@ func (c *clientImpl) DeleteCheckpoint(userID, threadID string) error { } return nil }) - } // CrewAI methods @@ -601,7 +599,7 @@ func (c *clientImpl) StoreCrewAIMemory(memory *CrewAIAgentMemory) error { // SearchCrewAIMemoryByTask searches CrewAI agent memory by task description across all agents for a session func (c *clientImpl) SearchCrewAIMemoryByTask(userID, threadID, taskDescription string, limit int) ([]*CrewAIAgentMemory, error) { var memories []*CrewAIAgentMemory - + // Search for task_description within the JSON memory_data field // Using JSON_EXTRACT or JSON_UNQUOTE for MySQL/PostgreSQL, or simple LIKE for SQLite // Sort by created_at DESC, then by score ASC (if score exists in JSON) @@ -609,17 +607,17 @@ func (c *clientImpl) SearchCrewAIMemoryByTask(userID, threadID, taskDescription "user_id = ? AND thread_id = ? AND (memory_data LIKE ? OR JSON_EXTRACT(memory_data, '$.task_description') LIKE ?)", userID, threadID, "%"+taskDescription+"%", "%"+taskDescription+"%", ).Order("created_at DESC, JSON_EXTRACT(memory_data, '$.score') ASC") - + // Apply limit if limit > 0 { query = query.Limit(limit) } - + err := query.Find(&memories).Error if err != nil { return nil, fmt.Errorf("failed to search CrewAI agent memory by task: %w", err) } - + return memories, nil } @@ -629,11 +627,11 @@ func (c *clientImpl) ResetCrewAIMemory(userID, threadID string) error { "user_id = ? AND thread_id = ?", userID, threadID, ).Delete(&CrewAIAgentMemory{}) - + if result.Error != nil { return fmt.Errorf("failed to reset CrewAI agent memory: %w", result.Error) } - + return nil } @@ -656,13 +654,13 @@ func (c *clientImpl) GetCrewAIFlowState(userID, threadID string) (*CrewAIFlowSta "user_id = ? AND thread_id = ?", userID, threadID, ).Order("created_at DESC").First(&state).Error - + if err != nil { if errors.Is(err, gorm.ErrRecordNotFound) { return nil, nil // Return nil for not found, as expected by the Python client } return nil, fmt.Errorf("failed to get CrewAI flow state: %w", err) } - + return &state, nil } diff --git a/go/internal/database/fake/client.go b/go/internal/database/fake/client.go index 18dcca471..f7af21c4e 100644 --- a/go/internal/database/fake/client.go +++ b/go/internal/database/fake/client.go @@ -3,7 +3,7 @@ package fake import ( "encoding/json" "fmt" - "sort" + "slices" "strings" "sync" @@ -72,7 +72,6 @@ func (c *InMemoryFakeClient) GetPushNotification(taskID string, configID string) } func (c *InMemoryFakeClient) GetTask(taskID string) (*protocol.Task, error) { - c.mu.RLock() defer c.mu.RUnlock() @@ -89,7 +88,6 @@ func (c *InMemoryFakeClient) GetTask(taskID string) (*protocol.Task, error) { } func (c *InMemoryFakeClient) DeleteTask(taskID string) error { - c.mu.Lock() defer c.mu.Unlock() @@ -326,8 +324,8 @@ func (c *InMemoryFakeClient) ListSessions(userID string) ([]database.Session, er result = append(result, *session) } } - sort.Slice(result, func(i, j int) bool { - return result[i].ID < result[j].ID + slices.SortStableFunc(result, func(i, j database.Session) int { + return strings.Compare(i.ID, j.ID) }) return result, nil } @@ -343,8 +341,8 @@ func (c *InMemoryFakeClient) ListSessionsForAgent(agentID string, userID string) result = append(result, *session) } } - sort.Slice(result, func(i, j int) bool { - return result[i].ID < result[j].ID + slices.SortStableFunc(result, func(i, j database.Session) int { + return strings.Compare(i.ID, j.ID) }) return result, nil } @@ -358,7 +356,9 @@ func (c *InMemoryFakeClient) ListAgents() ([]database.Agent, error) { for _, agent := range c.agents { result = append(result, *agent) } - sort.Slice(result, func(i, j int) bool { return result[i].ID < result[j].ID }) + slices.SortStableFunc(result, func(i, j database.Agent) int { + return strings.Compare(i.ID, j.ID) + }) return result, nil } @@ -371,8 +371,8 @@ func (c *InMemoryFakeClient) ListToolServers() ([]database.ToolServer, error) { for _, server := range c.toolServers { result = append(result, *server) } - sort.Slice(result, func(i, j int) bool { - return (result[i].Name + result[i].GroupKind) < (result[j].Name + result[j].GroupKind) + slices.SortStableFunc(result, func(i, j database.ToolServer) int { + return strings.Compare(i.Name+i.GroupKind, j.Name+j.GroupKind) }) return result, nil } @@ -386,8 +386,8 @@ func (c *InMemoryFakeClient) ListTools() ([]database.Tool, error) { for _, tool := range c.tools { result = append(result, *tool) } - sort.Slice(result, func(i, j int) bool { - return (result[i].ServerName + result[i].ID) < (result[j].ServerName + result[j].ID) + slices.SortStableFunc(result, func(i, j database.Tool) int { + return strings.Compare(i.ServerName+i.ID, j.ServerName+j.ID) }) return result, nil } @@ -409,8 +409,8 @@ func (c *InMemoryFakeClient) ListToolsForServer(serverName string, groupKind str } } - sort.Slice(result, func(i, j int) bool { - return (result[i].ServerName + result[i].ID) < (result[j].ServerName + result[j].ID) + slices.SortStableFunc(result, func(i, j database.Tool) int { + return strings.Compare(i.ServerName+i.ID, j.ServerName+j.ID) }) return result, nil } @@ -740,10 +740,10 @@ func (c *InMemoryFakeClient) StoreCrewAIMemory(memory *database.CrewAIAgentMemor if c.crewaiMemory == nil { c.crewaiMemory = make(map[string][]*database.CrewAIAgentMemory) } - + key := fmt.Sprintf("%s:%s", memory.UserID, memory.ThreadID) c.crewaiMemory[key] = append(c.crewaiMemory[key], memory) - + return nil } @@ -755,16 +755,16 @@ func (c *InMemoryFakeClient) SearchCrewAIMemoryByTask(userID, threadID, taskDesc if c.crewaiMemory == nil { return []*database.CrewAIAgentMemory{}, nil } - + var allMemories []*database.CrewAIAgentMemory - + // Search across all agents for this user/thread for key, memories := range c.crewaiMemory { // Key format is "user_id:thread_id" if strings.HasPrefix(key, userID+":"+threadID) { for _, memory := range memories { // Parse the JSON memory data and search for task_description - var memoryData map[string]interface{} + var memoryData map[string]any if err := json.Unmarshal([]byte(memory.MemoryData), &memoryData); err == nil { if taskDesc, ok := memoryData["task_description"].(string); ok { if strings.Contains(strings.ToLower(taskDesc), strings.ToLower(taskDescription)) { @@ -779,38 +779,48 @@ func (c *InMemoryFakeClient) SearchCrewAIMemoryByTask(userID, threadID, taskDesc } } } - + // Sort by created_at DESC, then by score ASC (if score exists in JSON) - sort.Slice(allMemories, func(i, j int) bool { + slices.SortStableFunc(allMemories, func(i, j *database.CrewAIAgentMemory) int { // First sort by created_at DESC (most recent first) - if !allMemories[i].CreatedAt.Equal(allMemories[j].CreatedAt) { - return allMemories[i].CreatedAt.After(allMemories[j].CreatedAt) + if !i.CreatedAt.Equal(j.CreatedAt) { + if i.CreatedAt.After(j.CreatedAt) { + return -1 + } else { + return 1 + } } - + // If created_at is equal, sort by score ASC var scoreI, scoreJ float64 - var memoryDataI, memoryDataJ map[string]interface{} - - if err := json.Unmarshal([]byte(allMemories[i].MemoryData), &memoryDataI); err == nil { + var memoryDataI, memoryDataJ map[string]any + + if err := json.Unmarshal([]byte(i.MemoryData), &memoryDataI); err == nil { if score, ok := memoryDataI["score"].(float64); ok { scoreI = score } } - - if err := json.Unmarshal([]byte(allMemories[j].MemoryData), &memoryDataJ); err == nil { + + if err := json.Unmarshal([]byte(j.MemoryData), &memoryDataJ); err == nil { if score, ok := memoryDataJ["score"].(float64); ok { scoreJ = score } } - - return scoreI < scoreJ + + if scoreI < scoreJ { + return -1 + } else if scoreI > scoreJ { + return 1 + } else { + return 0 + } }) - + // Apply limit if limit > 0 && len(allMemories) > limit { allMemories = allMemories[:limit] } - + return allMemories, nil } @@ -822,7 +832,7 @@ func (c *InMemoryFakeClient) ResetCrewAIMemory(userID, threadID string) error { if c.crewaiMemory == nil { return nil } - + // Find and delete all memory entries for this user/thread combination keysToDelete := make([]string, 0) for key := range c.crewaiMemory { @@ -831,12 +841,12 @@ func (c *InMemoryFakeClient) ResetCrewAIMemory(userID, threadID string) error { keysToDelete = append(keysToDelete, key) } } - + // Delete the entries for _, key := range keysToDelete { delete(c.crewaiMemory, key) } - + return nil } @@ -848,10 +858,10 @@ func (c *InMemoryFakeClient) StoreCrewAIFlowState(state *database.CrewAIFlowStat if c.crewaiFlowStates == nil { c.crewaiFlowStates = make(map[string]*database.CrewAIFlowState) } - + key := fmt.Sprintf("%s:%s", state.UserID, state.ThreadID) c.crewaiFlowStates[key] = state - + return nil } @@ -863,9 +873,9 @@ func (c *InMemoryFakeClient) GetCrewAIFlowState(userID, threadID string) (*datab if c.crewaiFlowStates == nil { return nil, nil } - + key := fmt.Sprintf("%s:%s", userID, threadID) state := c.crewaiFlowStates[key] - + return state, nil } diff --git a/go/internal/database/service.go b/go/internal/database/service.go index 9c4c843b0..4ab1f68d2 100644 --- a/go/internal/database/service.go +++ b/go/internal/database/service.go @@ -2,6 +2,7 @@ package database import ( "fmt" + "strings" "gorm.io/gorm" "gorm.io/gorm/clause" @@ -13,7 +14,7 @@ type Model interface { type Clause struct { Key string - Value interface{} + Value any } func list[T Model](db *gorm.DB, clauses ...Clause) ([]T, error) { @@ -76,12 +77,12 @@ func delete[T Model](db *gorm.DB, clauses ...Clause) error { // BuildWhereClause is deprecated, use individual Where clauses instead func BuildWhereClause(clauses ...Clause) string { - clausesStr := "" + var clausesStr strings.Builder for idx, clause := range clauses { if idx > 0 { - clausesStr += " AND " + clausesStr.WriteString(" AND ") } - clausesStr += fmt.Sprintf("%s = %v", clause.Key, clause.Value) + clausesStr.WriteString(fmt.Sprintf("%s = %v", clause.Key, clause.Value)) } - return clausesStr + return clausesStr.String() } diff --git a/go/internal/goruntime/cpu.go b/go/internal/goruntime/cpu.go index efbd54489..fd3c984d4 100644 --- a/go/internal/goruntime/cpu.go +++ b/go/internal/goruntime/cpu.go @@ -9,7 +9,7 @@ import ( ) func SetMaxProcs(logger logr.Logger) { - l := func(format string, a ...interface{}) { + l := func(format string, a ...any) { logger.Info(fmt.Sprintf(strings.TrimPrefix(format, "maxprocs: "), a...)) } diff --git a/go/internal/httpserver/auth/authn.go b/go/internal/httpserver/auth/authn.go index ee74f48b5..ac5ab641a 100644 --- a/go/internal/httpserver/auth/authn.go +++ b/go/internal/httpserver/auth/authn.go @@ -85,7 +85,7 @@ func A2ARequestHandler(authProvider auth.AuthProvider, agentNns types.Namespaced var resp *http.Response defer func() { if err != nil && resp != nil { - resp.Body.Close() //nolint:errcheck + resp.Body.Close() } }() diff --git a/go/internal/httpserver/handlers/agents.go b/go/internal/httpserver/handlers/agents.go index 4a6d3a804..5127244b3 100644 --- a/go/internal/httpserver/handlers/agents.go +++ b/go/internal/httpserver/handlers/agents.go @@ -11,7 +11,7 @@ import ( "github.com/kagent-dev/kagent/go/internal/utils" "github.com/kagent-dev/kagent/go/pkg/auth" "github.com/kagent-dev/kagent/go/pkg/client/api" - k8serrors "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ctrllog "sigs.k8s.io/controller-runtime/pkg/log" @@ -60,7 +60,6 @@ func (h *AgentsHandler) HandleListAgents(w ErrorResponseWriter, r *http.Request) } func (h *AgentsHandler) getAgentResponse(ctx context.Context, log logr.Logger, agent *v1alpha2.Agent) (api.AgentResponse, error) { - agentRef := utils.GetObjectRef(agent) log.V(1).Info("Processing Agent", "agentRef", agentRef) @@ -100,7 +99,7 @@ func (h *AgentsHandler) getAgentResponse(ctx context.Context, log logr.Logger, a objKey, modelConfig, ); err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.V(1).Info("ModelConfig not found", "modelConfigRef", objKey) } else { log.Error(err, "Failed to get ModelConfig", "modelConfigRef", objKey) @@ -260,7 +259,7 @@ func (h *AgentsHandler) HandleUpdateAgent(w ErrorResponseWriter, r *http.Request existingAgent, ) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.Info("Agent not found") w.RespondWithError(errors.NewNotFoundError("Agent not found", nil)) return @@ -319,7 +318,7 @@ func (h *AgentsHandler) HandleDeleteAgent(w ErrorResponseWriter, r *http.Request agent, ) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.Info("Agent not found") w.RespondWithError(errors.NewNotFoundError("Agent not found", nil)) return diff --git a/go/internal/httpserver/handlers/checkpoints.go b/go/internal/httpserver/handlers/checkpoints.go index 833b1e758..0845cebfe 100644 --- a/go/internal/httpserver/handlers/checkpoints.go +++ b/go/internal/httpserver/handlers/checkpoints.go @@ -227,7 +227,6 @@ func (h *CheckpointsHandler) HandlePutWrites(w ErrorResponseWriter, r *http.Requ // Prepare writes writes := make([]*database.LangGraphCheckpointWrite, len(req.Writes)) for i, writeReq := range req.Writes { - writes[i] = &database.LangGraphCheckpointWrite{ UserID: userID, ThreadID: req.ThreadID, diff --git a/go/internal/httpserver/handlers/crewai.go b/go/internal/httpserver/handlers/crewai.go index 4764b99ec..ad23119bc 100644 --- a/go/internal/httpserver/handlers/crewai.go +++ b/go/internal/httpserver/handlers/crewai.go @@ -25,9 +25,9 @@ func NewCrewAIHandler(base *Base) *CrewAIHandler { // KagentMemoryPayload represents memory payload data from Python type KagentMemoryPayload struct { - ThreadID string `json:"thread_id"` - UserID string `json:"user_id"` - MemoryData map[string]interface{} `json:"memory_data"` + ThreadID string `json:"thread_id"` + UserID string `json:"user_id"` + MemoryData map[string]any `json:"memory_data"` } // KagentMemoryResponse represents memory response data @@ -37,9 +37,9 @@ type KagentMemoryResponse struct { // KagentFlowStatePayload represents flow state payload data type KagentFlowStatePayload struct { - ThreadID string `json:"thread_id"` - MethodName string `json:"method_name"` - StateData map[string]interface{} `json:"state_data"` + ThreadID string `json:"thread_id"` + MethodName string `json:"method_name"` + StateData map[string]any `json:"state_data"` } // KagentFlowStateResponse represents flow state response data @@ -145,7 +145,7 @@ func (h *CrewAIHandler) HandleGetMemory(w ErrorResponseWriter, r *http.Request) // Convert to response format memoryPayloads := make([]KagentMemoryPayload, len(memories)) for i, memory := range memories { - var memoryData map[string]interface{} + var memoryData map[string]any if err := json.Unmarshal([]byte(memory.MemoryData), &memoryData); err != nil { w.RespondWithError(errors.NewInternalServerError("Failed to parse memory data", err)) return @@ -282,7 +282,7 @@ func (h *CrewAIHandler) HandleGetFlowState(w ErrorResponseWriter, r *http.Reques } // Convert to response format - var stateData map[string]interface{} + var stateData map[string]any if err := json.Unmarshal([]byte(state.StateData), &stateData); err != nil { w.RespondWithError(errors.NewInternalServerError("Failed to parse state data", err)) return diff --git a/go/internal/httpserver/handlers/health.go b/go/internal/httpserver/handlers/health.go index 6c5a224f2..aafdb6097 100644 --- a/go/internal/httpserver/handlers/health.go +++ b/go/internal/httpserver/handlers/health.go @@ -20,6 +20,6 @@ func (h *HealthHandler) HandleHealth(w http.ResponseWriter, r *http.Request) { log := ctrllog.FromContext(r.Context()).WithName("health-handler") log.V(1).Info("Handling health check request") - data := api.NewResponse(map[string]interface{}{"status": "OK"}, "OK", false) + data := api.NewResponse(map[string]any{"status": "OK"}, "OK", false) RespondWithJSON(w, http.StatusOK, data) } diff --git a/go/internal/httpserver/handlers/helpers.go b/go/internal/httpserver/handlers/helpers.go index 28b000157..5e5ddf2d2 100644 --- a/go/internal/httpserver/handlers/helpers.go +++ b/go/internal/httpserver/handlers/helpers.go @@ -24,7 +24,7 @@ type ErrorResponseWriter interface { Flush() } -func RespondWithJSON(w http.ResponseWriter, code int, payload interface{}) { +func RespondWithJSON(w http.ResponseWriter, code int, payload any) { log := ctrllog.Log.WithName("http-helpers") response, err := json.Marshal(payload) @@ -126,14 +126,14 @@ func GetIntPathParam(r *http.Request, name string) (int, error) { } // DecodeJSONBody decodes a JSON request body into the provided struct -func DecodeJSONBody(r *http.Request, target interface{}) error { +func DecodeJSONBody(r *http.Request, target any) error { log := ctrllog.Log.WithName("http-helpers") if err := json.NewDecoder(r.Body).Decode(target); err != nil { log.Info("Failed to decode JSON request body", "error", err.Error()) return err } - defer r.Body.Close() //nolint:errcheck + defer r.Body.Close() log.V(2).Info("Successfully decoded JSON request body") return nil @@ -141,7 +141,7 @@ func DecodeJSONBody(r *http.Request, target interface{}) error { // flattenStructToMap uses reflection to add fields of a struct to a map, // using json tags as keys. -func FlattenStructToMap(data interface{}, targetMap map[string]interface{}) { +func FlattenStructToMap(data any, targetMap map[string]any) { val := reflect.ValueOf(data) if val.Kind() == reflect.Ptr { val = val.Elem() diff --git a/go/internal/httpserver/handlers/memory.go b/go/internal/httpserver/handlers/memory.go index dba2a321a..8b5973429 100644 --- a/go/internal/httpserver/handlers/memory.go +++ b/go/internal/httpserver/handlers/memory.go @@ -6,7 +6,7 @@ import ( "net/http" "strings" - k8serrors "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -49,7 +49,7 @@ func (h *MemoryHandler) HandleListMemories(w ErrorResponseWriter, r *http.Reques memoryRef := common.GetObjectRef(&memory) log.V(1).Info("Processing Memory", "memoryRef", memoryRef) - memoryParams := make(map[string]interface{}) + memoryParams := make(map[string]any) if memory.Spec.Pinecone != nil { FlattenStructToMap(memory.Spec.Pinecone, memoryParams) } @@ -112,7 +112,7 @@ func (h *MemoryHandler) HandleCreateMemory(w ErrorResponseWriter, r *http.Reques log.Info("Memory already exists") w.RespondWithError(errors.NewConflictError("Memory already exists", nil)) return - } else if !k8serrors.IsNotFound(err) { + } else if !apierrors.IsNotFound(err) { log.Error(err, "Failed to check if Memory exists") w.RespondWithError(errors.NewInternalServerError("Failed to check if Memory exists", err)) return @@ -200,7 +200,7 @@ func (h *MemoryHandler) HandleDeleteMemory(w ErrorResponseWriter, r *http.Reques existingMemory, ) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.Info("Memory not found") w.RespondWithError(errors.NewNotFoundError("Memory not found", nil)) return @@ -261,7 +261,7 @@ func (h *MemoryHandler) HandleGetMemory(w ErrorResponseWriter, r *http.Request) memory, ) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.Info("Memory not found") w.RespondWithError(errors.NewNotFoundError("Memory not found", nil)) return @@ -271,7 +271,7 @@ func (h *MemoryHandler) HandleGetMemory(w ErrorResponseWriter, r *http.Request) return } - memoryParams := make(map[string]interface{}) + memoryParams := make(map[string]any) if memory.Spec.Pinecone != nil { FlattenStructToMap(memory.Spec.Pinecone, memoryParams) } diff --git a/go/internal/httpserver/handlers/modelconfig.go b/go/internal/httpserver/handlers/modelconfig.go index 622ca020c..2c5c6ceee 100644 --- a/go/internal/httpserver/handlers/modelconfig.go +++ b/go/internal/httpserver/handlers/modelconfig.go @@ -12,7 +12,7 @@ import ( common "github.com/kagent-dev/kagent/go/internal/utils" "github.com/kagent-dev/kagent/go/pkg/auth" "github.com/kagent-dev/kagent/go/pkg/client/api" - k8serrors "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -46,7 +46,7 @@ func (h *ModelConfigHandler) HandleListModelConfigs(w ErrorResponseWriter, r *ht configs := make([]api.ModelConfigResponse, 0) for _, config := range modelConfigs.Items { - modelParams := make(map[string]interface{}) + modelParams := make(map[string]any) if config.Spec.OpenAI != nil { FlattenStructToMap(config.Spec.OpenAI, modelParams) @@ -119,7 +119,7 @@ func (h *ModelConfigHandler) HandleGetModelConfig(w ErrorResponseWriter, r *http modelConfig, ) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.Info("ModelConfig not found") w.RespondWithError(errors.NewNotFoundError("ModelConfig not found", nil)) return @@ -130,7 +130,7 @@ func (h *ModelConfigHandler) HandleGetModelConfig(w ErrorResponseWriter, r *http } log.V(1).Info("Constructing response object") - modelParams := make(map[string]interface{}) + modelParams := make(map[string]any) if modelConfig.Spec.OpenAI != nil { FlattenStructToMap(modelConfig.Spec.OpenAI, modelParams) } @@ -226,7 +226,7 @@ func (h *ModelConfigHandler) HandleCreateModelConfig(w ErrorResponseWriter, r *h log.Info("ModelConfig already exists") w.RespondWithError(errors.NewConflictError("ModelConfig already exists", nil)) return - } else if !k8serrors.IsNotFound(err) { + } else if !apierrors.IsNotFound(err) { log.Error(err, "Failed to check if ModelConfig exists") w.RespondWithError(errors.NewInternalServerError("Failed to check if ModelConfig exists", err)) return @@ -410,7 +410,7 @@ func (h *ModelConfigHandler) HandleUpdateModelConfig(w ErrorResponseWriter, r *h modelConfig, ) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.Info("ModelConfig not found") w.RespondWithError(errors.NewNotFoundError("ModelConfig not found", nil)) return @@ -527,7 +527,7 @@ func (h *ModelConfigHandler) HandleUpdateModelConfig(w ErrorResponseWriter, r *h return } - updatedParams := make(map[string]interface{}) + updatedParams := make(map[string]any) if modelConfig.Spec.OpenAI != nil { FlattenStructToMap(modelConfig.Spec.OpenAI, updatedParams) } else if modelConfig.Spec.Anthropic != nil { @@ -592,7 +592,7 @@ func (h *ModelConfigHandler) HandleDeleteModelConfig(w ErrorResponseWriter, r *h existingConfig, ) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.Info("ModelConfig not found") w.RespondWithError(errors.NewNotFoundError("ModelConfig not found", nil)) return diff --git a/go/internal/httpserver/handlers/namespaces.go b/go/internal/httpserver/handlers/namespaces.go index 38fce72b9..ffe5b2028 100644 --- a/go/internal/httpserver/handlers/namespaces.go +++ b/go/internal/httpserver/handlers/namespaces.go @@ -2,7 +2,7 @@ package handlers import ( "net/http" - "sort" + "slices" "strings" "github.com/kagent-dev/kagent/go/internal/httpserver/errors" @@ -50,8 +50,8 @@ func (h *NamespacesHandler) HandleListNamespaces(w ErrorResponseWriter, r *http. }) } - sort.SliceStable(namespaces, func(i, j int) bool { - return strings.ToLower(namespaces[i].Name) < strings.ToLower(namespaces[j].Name) + slices.SortStableFunc(namespaces, func(i, j api.NamespaceResponse) int { + return strings.Compare(strings.ToLower(i.Name), strings.ToLower(j.Name)) }) data := api.NewResponse(namespaces, "Successfully listed namespaces", false) @@ -80,8 +80,8 @@ func (h *NamespacesHandler) HandleListNamespaces(w ErrorResponseWriter, r *http. }) } - sort.SliceStable(namespaces, func(i, j int) bool { - return strings.ToLower(namespaces[i].Name) < strings.ToLower(namespaces[j].Name) + slices.SortStableFunc(namespaces, func(i, j api.NamespaceResponse) int { + return strings.Compare(strings.ToLower(i.Name), strings.ToLower(j.Name)) }) data := api.NewResponse(namespaces, "Successfully listed namespaces", false) diff --git a/go/internal/httpserver/handlers/providers.go b/go/internal/httpserver/handlers/providers.go index 482de2dc9..1e2f1fdde 100644 --- a/go/internal/httpserver/handlers/providers.go +++ b/go/internal/httpserver/handlers/providers.go @@ -53,10 +53,10 @@ func (h *ProviderHandler) HandleListSupportedMemoryProviders(w ErrorResponseWrit providerEnum v1alpha1.MemoryProvider configType reflect.Type }{ - {v1alpha1.Pinecone, reflect.TypeOf(v1alpha1.PineconeConfig{})}, + {v1alpha1.Pinecone, reflect.TypeFor[v1alpha1.PineconeConfig]()}, } - providersResponse := []map[string]interface{}{} + providersResponse := []map[string]any{} for _, pData := range providersData { allKeys := getStructJSONKeys(pData.configType) @@ -73,7 +73,7 @@ func (h *ProviderHandler) HandleListSupportedMemoryProviders(w ErrorResponseWrit } } - providersResponse = append(providersResponse, map[string]interface{}{ + providersResponse = append(providersResponse, map[string]any{ "name": string(pData.providerEnum), "type": string(pData.providerEnum), "requiredParams": requiredKeys, @@ -94,16 +94,16 @@ func (h *ProviderHandler) HandleListSupportedModelProviders(w ErrorResponseWrite providerEnum v1alpha2.ModelProvider configType reflect.Type }{ - {v1alpha2.ModelProviderOpenAI, reflect.TypeOf(v1alpha2.OpenAIConfig{})}, - {v1alpha2.ModelProviderAnthropic, reflect.TypeOf(v1alpha2.AnthropicConfig{})}, - {v1alpha2.ModelProviderAzureOpenAI, reflect.TypeOf(v1alpha2.AzureOpenAIConfig{})}, - {v1alpha2.ModelProviderOllama, reflect.TypeOf(v1alpha2.OllamaConfig{})}, - {v1alpha2.ModelProviderGemini, reflect.TypeOf(v1alpha2.GeminiConfig{})}, - {v1alpha2.ModelProviderGeminiVertexAI, reflect.TypeOf(v1alpha2.GeminiVertexAIConfig{})}, - {v1alpha2.ModelProviderAnthropicVertexAI, reflect.TypeOf(v1alpha2.AnthropicVertexAIConfig{})}, + {v1alpha2.ModelProviderOpenAI, reflect.TypeFor[v1alpha2.OpenAIConfig]()}, + {v1alpha2.ModelProviderAnthropic, reflect.TypeFor[v1alpha2.AnthropicConfig]()}, + {v1alpha2.ModelProviderAzureOpenAI, reflect.TypeFor[v1alpha2.AzureOpenAIConfig]()}, + {v1alpha2.ModelProviderOllama, reflect.TypeFor[v1alpha2.OllamaConfig]()}, + {v1alpha2.ModelProviderGemini, reflect.TypeFor[v1alpha2.GeminiConfig]()}, + {v1alpha2.ModelProviderGeminiVertexAI, reflect.TypeFor[v1alpha2.GeminiVertexAIConfig]()}, + {v1alpha2.ModelProviderAnthropicVertexAI, reflect.TypeFor[v1alpha2.AnthropicVertexAIConfig]()}, } - providersResponse := []map[string]interface{}{} + providersResponse := []map[string]any{} for _, pData := range providersData { allKeys := getStructJSONKeys(pData.configType) @@ -120,7 +120,7 @@ func (h *ProviderHandler) HandleListSupportedModelProviders(w ErrorResponseWrite } } - providersResponse = append(providersResponse, map[string]interface{}{ + providersResponse = append(providersResponse, map[string]any{ "name": string(pData.providerEnum), "type": string(pData.providerEnum), "requiredParams": requiredKeys, diff --git a/go/internal/httpserver/handlers/sessions_test.go b/go/internal/httpserver/handlers/sessions_test.go index 5eabdfa17..2ecb49f23 100644 --- a/go/internal/httpserver/handlers/sessions_test.go +++ b/go/internal/httpserver/handlers/sessions_test.go @@ -479,5 +479,4 @@ func TestSessionsHandler(t *testing.T) { assert.NotNil(t, responseRecorder.errorReceived) }) }) - } diff --git a/go/internal/httpserver/handlers/toolservers.go b/go/internal/httpserver/handlers/toolservers.go index c945c8676..7f10517b2 100644 --- a/go/internal/httpserver/handlers/toolservers.go +++ b/go/internal/httpserver/handlers/toolservers.go @@ -13,7 +13,7 @@ import ( "github.com/kagent-dev/kagent/go/pkg/client/api" "github.com/kagent-dev/kmcp/api/v1alpha1" corev1 "k8s.io/api/core/v1" - k8serrors "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ctrllog "sigs.k8s.io/controller-runtime/pkg/log" @@ -58,7 +58,6 @@ func (h *ToolServersHandler) HandleListToolServers(w ErrorResponseWriter, r *htt toolServerWithTools := make([]api.ToolServerResponse, len(toolServers)) for i, toolServer := range toolServers { - tools, err := h.DatabaseService.ListToolsForServer(toolServer.Name, toolServer.GroupKind) if err != nil { w.RespondWithError(errors.NewInternalServerError("Failed to list tools for ToolServer from database", err)) @@ -255,7 +254,7 @@ func (h *ToolServersHandler) HandleDeleteToolServer(w ErrorResponseWriter, r *ht toolServer, ) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.Info("RemoteMCPServer not found") w.RespondWithError(errors.NewNotFoundError("RemoteMCPServer not found", nil)) return @@ -283,7 +282,7 @@ func (h *ToolServersHandler) HandleDeleteToolServer(w ErrorResponseWriter, r *ht toolServer, ) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.Info("MCPServer not found") w.RespondWithError(errors.NewNotFoundError("MCPServer not found", nil)) return @@ -311,7 +310,7 @@ func (h *ToolServersHandler) HandleDeleteToolServer(w ErrorResponseWriter, r *ht service, ) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { log.Info("Service not found") w.RespondWithError(errors.NewNotFoundError("Service not found", nil)) return diff --git a/go/internal/httpserver/handlers/toolservertypes.go b/go/internal/httpserver/handlers/toolservertypes.go index f407cb032..cf1cc5e2f 100644 --- a/go/internal/httpserver/handlers/toolservertypes.go +++ b/go/internal/httpserver/handlers/toolservertypes.go @@ -2,6 +2,7 @@ package handlers import ( "net/http" + "strings" "github.com/kagent-dev/kagent/go/internal/httpserver/errors" "github.com/kagent-dev/kagent/go/pkg/auth" @@ -40,12 +41,13 @@ func (t ToolServerTypes) Join(sep string) string { return string(t[0]) } - joined := string(t[0]) + var joined strings.Builder + joined.WriteString(string(t[0])) for _, s := range t[1:] { - joined += sep + string(s) + joined.WriteString(sep + string(s)) } - return joined + return joined.String() } const ( diff --git a/go/internal/httpserver/handlers/toolservertypes_test.go b/go/internal/httpserver/handlers/toolservertypes_test.go index 3f9ba12aa..74c30493c 100644 --- a/go/internal/httpserver/handlers/toolservertypes_test.go +++ b/go/internal/httpserver/handlers/toolservertypes_test.go @@ -66,7 +66,6 @@ func TestToolServerTypesHandler_NoKmcp(t *testing.T) { } func TestToolServerTypesHandler_WithKmcp(t *testing.T) { - scheme := runtime.NewScheme() err := v1alpha2.AddToScheme(scheme) diff --git a/go/internal/httpserver/handlers/utils.go b/go/internal/httpserver/handlers/utils.go index 8e20158a6..1ebeeb628 100644 --- a/go/internal/httpserver/handlers/utils.go +++ b/go/internal/httpserver/handlers/utils.go @@ -5,7 +5,7 @@ import ( "fmt" corev1 "k8s.io/api/core/v1" - k8serrors "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" @@ -64,7 +64,7 @@ func createOrUpdateSecretWithOwnerReference( Namespace: owner.GetNamespace(), }, existingSecret) if err != nil { - if k8serrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { return createSecretWithOwnerReference(ctx, kubeClient, data, owner) } return fmt.Errorf("failed to get existing secret: %w", err) diff --git a/go/internal/utils/client_wrapper.go b/go/internal/utils/client_wrapper.go index 18f618136..109d20469 100644 --- a/go/internal/utils/client_wrapper.go +++ b/go/internal/utils/client_wrapper.go @@ -50,7 +50,6 @@ func (w *kubeClientWrapper) AddInMemory(obj client.Object) error { } func (w *kubeClientWrapper) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { - exists, err := w.getInMemory(key, obj) if exists && err == nil { return nil diff --git a/go/internal/utils/client_wrapper_test.go b/go/internal/utils/client_wrapper_test.go index ff04f3a24..20a5b86dd 100644 --- a/go/internal/utils/client_wrapper_test.go +++ b/go/internal/utils/client_wrapper_test.go @@ -9,7 +9,7 @@ import ( "github.com/kagent-dev/kagent/go/internal/utils" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -33,7 +33,7 @@ func TestAddInMemory(t *testing.T) { wrapper := utils.NewKubeClientWrapper(fakeClient) t.Run("should add configmap to memory", func(t *testing.T) { - configMap := &v1.ConfigMap{ + configMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "test-config", Namespace: "test-namespace", @@ -47,7 +47,7 @@ func TestAddInMemory(t *testing.T) { require.NoError(t, err) // Try to get the object from memory - retrievedConfig := &v1.ConfigMap{} + retrievedConfig := &corev1.ConfigMap{} err = wrapper.Get(ctx, types.NamespacedName{ Name: "test-config", Namespace: "test-namespace", @@ -60,7 +60,7 @@ func TestAddInMemory(t *testing.T) { }) t.Run("should add secret to memory", func(t *testing.T) { - secret := &v1.Secret{ + secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: "test-secret", Namespace: "test-namespace", @@ -74,7 +74,7 @@ func TestAddInMemory(t *testing.T) { require.NoError(t, err) // Try to get the object from memory - retrievedSecret := &v1.Secret{} + retrievedSecret := &corev1.Secret{} err = wrapper.Get(ctx, types.NamespacedName{ Name: "test-secret", Namespace: "test-namespace", @@ -87,7 +87,7 @@ func TestAddInMemory(t *testing.T) { }) t.Run("should overwrite existing object in memory", func(t *testing.T) { - configMap1 := &v1.ConfigMap{ + configMap1 := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "overwrite-test", Namespace: "test-namespace", @@ -97,7 +97,7 @@ func TestAddInMemory(t *testing.T) { }, } - configMap2 := &v1.ConfigMap{ + configMap2 := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "overwrite-test", Namespace: "test-namespace", @@ -116,7 +116,7 @@ func TestAddInMemory(t *testing.T) { require.NoError(t, err) // Retrieve and verify it's the updated object - retrieved := &v1.ConfigMap{} + retrieved := &corev1.ConfigMap{} err = wrapper.Get(ctx, types.NamespacedName{ Name: "overwrite-test", Namespace: "test-namespace", @@ -135,7 +135,7 @@ func TestGet(t *testing.T) { wrapper := utils.NewKubeClientWrapper(fakeClient) // Add object to memory - configMap := &v1.ConfigMap{ + configMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "cached-config", Namespace: "test-namespace", @@ -148,7 +148,7 @@ func TestGet(t *testing.T) { require.NoError(t, err) // Get object (should come from memory) - retrieved := &v1.ConfigMap{} + retrieved := &corev1.ConfigMap{} err = wrapper.Get(ctx, types.NamespacedName{ Name: "cached-config", Namespace: "test-namespace", @@ -160,7 +160,7 @@ func TestGet(t *testing.T) { t.Run("should get object from underlying client when not in cache", func(t *testing.T) { // Create object in fake client - configMap := &v1.ConfigMap{ + configMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "k8s-config", Namespace: "test-namespace", @@ -178,7 +178,7 @@ func TestGet(t *testing.T) { wrapper := utils.NewKubeClientWrapper(fakeClient) // Get object (should come from underlying client) - retrieved := &v1.ConfigMap{} + retrieved := &corev1.ConfigMap{} err := wrapper.Get(ctx, types.NamespacedName{ Name: "k8s-config", Namespace: "test-namespace", @@ -190,7 +190,7 @@ func TestGet(t *testing.T) { t.Run("should prioritize memory cache over underlying client", func(t *testing.T) { // Create object in fake client - k8sConfigMap := &v1.ConfigMap{ + k8sConfigMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "priority-test", Namespace: "test-namespace", @@ -208,7 +208,7 @@ func TestGet(t *testing.T) { wrapper := utils.NewKubeClientWrapper(fakeClient) // Add different object with same key to memory - memoryConfigMap := &v1.ConfigMap{ + memoryConfigMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "priority-test", Namespace: "test-namespace", @@ -221,7 +221,7 @@ func TestGet(t *testing.T) { require.NoError(t, err) // Get object - should come from memory, not kubernetes - retrieved := &v1.ConfigMap{} + retrieved := &corev1.ConfigMap{} err = wrapper.Get(ctx, types.NamespacedName{ Name: "priority-test", Namespace: "test-namespace", @@ -235,7 +235,7 @@ func TestGet(t *testing.T) { fakeClient := fake.NewClientBuilder().WithScheme(scheme.Scheme).Build() wrapper := utils.NewKubeClientWrapper(fakeClient) - retrieved := &v1.ConfigMap{} + retrieved := &corev1.ConfigMap{} err := wrapper.Get(ctx, types.NamespacedName{ Name: "nonexistent", Namespace: "test-namespace", @@ -255,12 +255,12 @@ func TestConcurrentAccess(t *testing.T) { numRoutines := 10 // Start multiple goroutines adding objects - for i := 0; i < numRoutines; i++ { + for i := range numRoutines { wg.Add(1) go func(id int) { defer wg.Done() - configMap := &v1.ConfigMap{ + configMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: fmt.Sprintf("concurrent-config-%d", id), Namespace: "test-namespace", @@ -276,12 +276,12 @@ func TestConcurrentAccess(t *testing.T) { } // Start multiple goroutines reading objects - for i := 0; i < numRoutines; i++ { + for i := range numRoutines { wg.Add(1) go func(id int) { defer wg.Done() - retrieved := &v1.ConfigMap{} + retrieved := &corev1.ConfigMap{} err := wrapper.Get(ctx, types.NamespacedName{ Name: fmt.Sprintf("concurrent-config-%d", id), Namespace: "test-namespace", @@ -297,8 +297,8 @@ func TestConcurrentAccess(t *testing.T) { wg.Wait() // Verify all objects are accessible after concurrent operations - for i := 0; i < numRoutines; i++ { - retrieved := &v1.ConfigMap{} + for i := range numRoutines { + retrieved := &corev1.ConfigMap{} err := wrapper.Get(ctx, types.NamespacedName{ Name: fmt.Sprintf("concurrent-config-%d", i), Namespace: "test-namespace", @@ -317,7 +317,7 @@ func TestDifferentObjectTypes(t *testing.T) { t.Run("should handle different object types independently", func(t *testing.T) { // Add ConfigMap - configMap := &v1.ConfigMap{ + configMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "same-name", Namespace: "test-namespace", @@ -330,7 +330,7 @@ func TestDifferentObjectTypes(t *testing.T) { require.NoError(t, err) // Add Secret with same name and namespace - secret := &v1.Secret{ + secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: "same-name", Namespace: "test-namespace", @@ -343,7 +343,7 @@ func TestDifferentObjectTypes(t *testing.T) { require.NoError(t, err) // Retrieve ConfigMap - retrievedConfig := &v1.ConfigMap{} + retrievedConfig := &corev1.ConfigMap{} err = wrapper.Get(ctx, types.NamespacedName{ Name: "same-name", Namespace: "test-namespace", @@ -352,7 +352,7 @@ func TestDifferentObjectTypes(t *testing.T) { assert.Equal(t, "configmap", retrievedConfig.Data["type"]) // Retrieve Secret - retrievedSecret := &v1.Secret{} + retrievedSecret := &corev1.Secret{} err = wrapper.Get(ctx, types.NamespacedName{ Name: "same-name", Namespace: "test-namespace", diff --git a/go/pkg/app/app.go b/go/pkg/app/app.go index de61918f9..932808c85 100644 --- a/go/pkg/app/app.go +++ b/go/pkg/app/app.go @@ -429,6 +429,7 @@ func Start(getExtensionConfig GetExtensionConfig) { } } + //nolint:govet if webhookCertWatcher != nil { setupLog.Info("Adding webhook certificate watcher to manager") if err := mgr.Add(webhookCertWatcher); err != nil { @@ -484,7 +485,6 @@ func configureNamespaceWatching(watchNamespacesList []string) map[string]cache.C if len(watchNamespacesList) == 0 { setupLog.Info("Watching all namespaces (no valid namespaces specified)") return map[string]cache.Config{"": {}} - } setupLog.Info("Watching specific namespaces at cache level", "namespaces", watchNamespacesList) diff --git a/go/pkg/client/agent.go b/go/pkg/client/agent.go index 4cc40e22b..d4bd42e58 100644 --- a/go/pkg/client/agent.go +++ b/go/pkg/client/agent.go @@ -101,6 +101,6 @@ func (c *agentClient) DeleteAgent(ctx context.Context, agentRef string) error { if err != nil { return err } - resp.Body.Close() //nolint:errcheck + resp.Body.Close() return nil } diff --git a/go/pkg/client/api/types.go b/go/pkg/client/api/types.go index a11ec3c77..a3a4b0e82 100644 --- a/go/pkg/client/api/types.go +++ b/go/pkg/client/api/types.go @@ -43,13 +43,13 @@ type VersionResponse struct { // ModelConfigResponse represents a model configuration response type ModelConfigResponse struct { - Ref string `json:"ref"` - ProviderName string `json:"providerName"` - Model string `json:"model"` - APIKeySecret string `json:"apiKeySecret"` - APIKeySecretKey string `json:"apiKeySecretKey"` - ModelParams map[string]interface{} `json:"modelParams"` - TLS *v1alpha2.TLSConfig `json:"tls,omitempty"` + Ref string `json:"ref"` + ProviderName string `json:"providerName"` + Model string `json:"model"` + APIKeySecret string `json:"apiKeySecret"` + APIKeySecretKey string `json:"apiKeySecretKey"` + ModelParams map[string]any `json:"modelParams"` + TLS *v1alpha2.TLSConfig `json:"tls,omitempty"` } // CreateModelConfigRequest represents a request to create a model configuration @@ -145,11 +145,11 @@ type ToolServerResponse struct { // MemoryResponse represents a memory response type MemoryResponse struct { - Ref string `json:"ref"` - ProviderName string `json:"providerName"` - APIKeySecretRef string `json:"apiKeySecretRef"` - APIKeySecretKey string `json:"apiKeySecretKey"` - MemoryParams map[string]interface{} `json:"memoryParams"` + Ref string `json:"ref"` + ProviderName string `json:"providerName"` + APIKeySecretRef string `json:"apiKeySecretRef"` + APIKeySecretKey string `json:"apiKeySecretKey"` + MemoryParams map[string]any `json:"memoryParams"` } // CreateMemoryRequest represents a request to create a memory @@ -185,11 +185,11 @@ type ProviderInfo struct { // SessionRunsResponse represents the response for session runs type SessionRunsResponse struct { - Status bool `json:"status"` - Data interface{} `json:"data"` + Status bool `json:"status"` + Data any `json:"data"` } // SessionRunsData represents the data part of session runs response type SessionRunsData struct { - Runs []interface{} `json:"runs"` + Runs []any `json:"runs"` } diff --git a/go/pkg/client/base.go b/go/pkg/client/base.go index f930496f7..962332c6a 100644 --- a/go/pkg/client/base.go +++ b/go/pkg/client/base.go @@ -83,7 +83,7 @@ func (c *BaseClient) addUserID(req *http.Request, userID string) { req.Header.Set("X-User-ID", userID) } -func (c *BaseClient) doRequest(ctx context.Context, method, path string, body interface{}, userID string) (*http.Response, error) { +func (c *BaseClient) doRequest(ctx context.Context, method, path string, body any, userID string) (*http.Response, error) { var reqBody io.Reader if body != nil { jsonBody, err := json.Marshal(body) @@ -113,7 +113,7 @@ func (c *BaseClient) doRequest(ctx context.Context, method, path string, body in if resp.StatusCode >= 400 { bodyBytes, _ := io.ReadAll(resp.Body) - resp.Body.Close() //nolint:errcheck + resp.Body.Close() var apiErr api.APIError if json.Unmarshal(bodyBytes, &apiErr) == nil && apiErr.Error != "" { @@ -138,11 +138,11 @@ func (c *BaseClient) Get(ctx context.Context, path string, userID string) (*http return c.doRequest(ctx, http.MethodGet, path, nil, userID) } -func (c *BaseClient) Post(ctx context.Context, path string, body interface{}, userID string) (*http.Response, error) { +func (c *BaseClient) Post(ctx context.Context, path string, body any, userID string) (*http.Response, error) { return c.doRequest(ctx, http.MethodPost, path, body, userID) } -func (c *BaseClient) Put(ctx context.Context, path string, body interface{}, userID string) (*http.Response, error) { +func (c *BaseClient) Put(ctx context.Context, path string, body any, userID string) (*http.Response, error) { return c.doRequest(ctx, http.MethodPut, path, body, userID) } @@ -150,8 +150,8 @@ func (c *BaseClient) Delete(ctx context.Context, path string, userID string) (*h return c.doRequest(ctx, http.MethodDelete, path, nil, userID) } -func DecodeResponse(resp *http.Response, target interface{}) error { - defer resp.Body.Close() //nolint:errcheck +func DecodeResponse(resp *http.Response, target any) error { + defer resp.Body.Close() return json.NewDecoder(resp.Body).Decode(target) } diff --git a/go/pkg/client/session.go b/go/pkg/client/session.go index 66440c08b..e2b39d75d 100644 --- a/go/pkg/client/session.go +++ b/go/pkg/client/session.go @@ -14,7 +14,7 @@ type Session interface { GetSession(ctx context.Context, sessionName string) (*api.StandardResponse[*api.Session], error) UpdateSession(ctx context.Context, request *api.SessionRequest) (*api.StandardResponse[*api.Session], error) DeleteSession(ctx context.Context, sessionName string) error - ListSessionRuns(ctx context.Context, sessionName string) (*api.StandardResponse[interface{}], error) + ListSessionRuns(ctx context.Context, sessionName string) (*api.StandardResponse[any], error) } // sessionClient handles session-related requests @@ -123,7 +123,7 @@ func (c *sessionClient) DeleteSession(ctx context.Context, sessionName string) e } // ListSessionRuns lists all runs for a specific session -func (c *sessionClient) ListSessionRuns(ctx context.Context, sessionName string) (*api.StandardResponse[interface{}], error) { +func (c *sessionClient) ListSessionRuns(ctx context.Context, sessionName string) (*api.StandardResponse[any], error) { userID := c.client.GetUserIDOrDefault("") if userID == "" { return nil, fmt.Errorf("userID is required") @@ -135,7 +135,7 @@ func (c *sessionClient) ListSessionRuns(ctx context.Context, sessionName string) return nil, err } - var response api.StandardResponse[interface{}] + var response api.StandardResponse[any] if err := DecodeResponse(resp, &response); err != nil { return nil, err } diff --git a/go/test/e2e/invoke_api_test.go b/go/test/e2e/invoke_api_test.go index bd5fc097e..b01af46f3 100644 --- a/go/test/e2e/invoke_api_test.go +++ b/go/test/e2e/invoke_api_test.go @@ -387,7 +387,6 @@ func buildK8sURL(baseURL string) string { } return fmt.Sprintf("http://%s:%s", localHost, port) - } func TestE2EInvokeInlineAgent(t *testing.T) { diff --git a/go/test/e2e/mocks/mock_sts_server.go b/go/test/e2e/mocks/mock_sts_server.go index 054be8f85..c3a2f558c 100644 --- a/go/test/e2e/mocks/mock_sts_server.go +++ b/go/test/e2e/mocks/mock_sts_server.go @@ -108,7 +108,7 @@ func (m *MockSTSServer) handleWellKnown(w http.ResponseWriter) { baseURL = m.k8sURL } - wellKnownConfig := map[string]interface{}{ + wellKnownConfig := map[string]any{ "issuer": baseURL, "token_endpoint": baseURL + "/token", } @@ -212,7 +212,7 @@ func (m *MockSTSServer) generateMockAccessToken(subjectToken string) (string, er return "", fmt.Errorf("invalid access token subject claim not found") } - tokenData := map[string]interface{}{ + tokenData := map[string]any{ "sub": subject, "scope": "read write", "iat": time.Now().Unix(), @@ -267,7 +267,7 @@ func extractClaimFromJWT(jwtToken string, claim string) (string, error) { } // Parse the JSON payload - var claims map[string]interface{} + var claims map[string]any if err := json.Unmarshal(payloadBytes, &claims); err != nil { return "", fmt.Errorf("failed to parse JWT claims: %v", err) } From 561e5b6292fe0f7680cebd43b78ab51b6bf19a1b Mon Sep 17 00:00:00 2001 From: dongjiang Date: Thu, 4 Dec 2025 22:48:40 +0800 Subject: [PATCH 14/17] Fix webhookCertWatcher setting (#1155) Signed-off-by: jiangdong Signed-off-by: Ivan Porta --- go/pkg/app/app.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/go/pkg/app/app.go b/go/pkg/app/app.go index 932808c85..deee627da 100644 --- a/go/pkg/app/app.go +++ b/go/pkg/app/app.go @@ -99,6 +99,11 @@ type Config struct { CertName string CertKey string } + Webhook struct { + CertPath string + CertName string + CertKey string + } Streaming struct { MaxBufSize resource.QuantityValue `default:"1Mi"` InitialBufSize resource.QuantityValue `default:"4Ki"` @@ -132,6 +137,10 @@ func (cfg *Config) SetFlags(commandLine *flag.FlagSet) { "The directory that contains the metrics server certificate.") commandLine.StringVar(&cfg.Metrics.CertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.") commandLine.StringVar(&cfg.Metrics.CertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.") + commandLine.StringVar(&cfg.Webhook.CertPath, "webhook-cert-path", "", + "The directory that contains the webhook server certificate.") + commandLine.StringVar(&cfg.Webhook.CertName, "webhook-cert-name", "tls.crt", "The name of the wehbook server certificate file.") + commandLine.StringVar(&cfg.Webhook.CertKey, "webhook-cert-key", "tls.key", "The name of the webhook server key file.") commandLine.BoolVar(&cfg.EnableHTTP2, "enable-http2", false, "If set, HTTP/2 will be enabled for the metrics and webhook servers") @@ -282,6 +291,21 @@ func Start(getExtensionConfig GetExtensionConfig) { }) } + if len(cfg.Webhook.CertPath) > 0 { + setupLog.Info("Initializing webhook certificate watcher using provided certificates", + "webhook-cert-path", cfg.Webhook.CertPath, "webhook-cert-name", cfg.Webhook.CertName, "webhook-cert-key", cfg.Webhook.CertKey) + + var err error + webhookCertWatcher, err = certwatcher.New( + filepath.Join(cfg.Webhook.CertPath, cfg.Webhook.CertName), + filepath.Join(cfg.Webhook.CertPath, cfg.Webhook.CertKey), + ) + if err != nil { + setupLog.Error(err, "to initialize webhook certificate watcher", "error", err) + os.Exit(1) + } + } + // filter out invalid namespaces from the watchNamespaces flag (comma separated list) watchNamespacesList := filterValidNamespaces(strings.Split(cfg.WatchNamespaces, ",")) From a4fb08b25296a51194159dadeb1940257dbf82c6 Mon Sep 17 00:00:00 2001 From: Ivan Porta Date: Thu, 4 Dec 2025 23:53:02 +0900 Subject: [PATCH 15/17] restore default values Signed-off-by: Ivan Porta --- helm/kagent/values.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index b2215a5eb..097700c7f 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -229,7 +229,7 @@ kagent-tools: agents: k8s-agent: - enabled: false + enabled: true resources: requests: cpu: 100m @@ -238,7 +238,7 @@ agents: cpu: 1000m memory: 1Gi kgateway-agent: - enabled: false + enabled: true resources: requests: cpu: 100m @@ -265,7 +265,7 @@ agents: cpu: 1000m memory: 1Gi promql-agent: - enabled: false + enabled: true resources: requests: cpu: 100m @@ -274,7 +274,7 @@ agents: cpu: 1000m memory: 1Gi observability-agent: - enabled: false + enabled: true resources: requests: cpu: 100m @@ -283,7 +283,7 @@ agents: cpu: 1000m memory: 1Gi argo-rollouts-agent: - enabled: false + enabled: true resources: requests: cpu: 100m @@ -301,7 +301,7 @@ agents: cpu: 1000m memory: 1Gi cilium-policy-agent: - enabled: false + enabled: true resources: requests: cpu: 100m @@ -310,7 +310,7 @@ agents: cpu: 1000m memory: 1Gi cilium-manager-agent: - enabled: false + enabled: true resources: requests: cpu: 100m @@ -319,7 +319,7 @@ agents: cpu: 1000m memory: 1Gi cilium-debug-agent: - enabled: false + enabled: true resources: requests: cpu: 100m From 33399c823b3581bda07e06af5582bb94330a7e2f Mon Sep 17 00:00:00 2001 From: Ivan Porta Date: Mon, 8 Dec 2025 17:12:54 +0900 Subject: [PATCH 16/17] Restore history-max and re-enable grafana and querydoc --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2240ad402..fbf1f6855 100644 --- a/Makefile +++ b/Makefile @@ -327,7 +327,7 @@ helm-install-provider: helm-version check-api-key helm $(HELM_ACTION) kagent-crds helm/kagent-crds \ --namespace kagent \ --create-namespace \ - + --history-max 2 \ --timeout 5m \ --kube-context kind-$(KIND_CLUSTER_NAME) \ --wait \ @@ -335,6 +335,7 @@ helm-install-provider: helm-version check-api-key helm $(HELM_ACTION) kagent helm/kagent \ --namespace kagent \ --create-namespace \ + --history-max 2 \ --timeout 5m \ --kube-context kind-$(KIND_CLUSTER_NAME) \ --wait \ From 90ea320c03605a1887a260e92b0d5a576d41b163 Mon Sep 17 00:00:00 2001 From: Ivan Porta Date: Mon, 8 Dec 2025 17:13:07 +0900 Subject: [PATCH 17/17] re-enable querydoc --- helm/kagent/values.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index 097700c7f..caea89100 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -334,9 +334,9 @@ agents: tools: grafana-mcp: - enabled: false + enabled: true querydoc: - enabled: false + enabled: true grafana-mcp: grafana: