redis-applied-ai
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎config.yaml.example‎
Lines changed: 107 additions & 0 deletions b/‎config.yaml.example‎
Lines changed: 107 additions & 0 deletions
diff --git a/‎docker-compose.test.yml‎
Lines changed: 1 addition & 1 deletion b/‎docker-compose.test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker-compose.yml‎
Lines changed: 71 additions & 4 deletions b/‎docker-compose.yml‎
Lines changed: 71 additions & 4 deletions
diff --git a/‎docs/concepts/core.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/concepts/core.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/how-to/api.md‎
Lines changed: 31 additions & 26 deletions b/‎docs/how-to/api.md‎
Lines changed: 31 additions & 26 deletions
@@ -134,3 +134,8 @@ artifacts/
 ui/node_modules/
 ui/ui-kit/node_modules/
 ui/test-results/
+
+# SSL certificates (generated locally)
+monitoring/nginx/certs/
+config.yaml
+eval_reports
@@ -0,0 +1,107 @@
+# Redis SRE Agent Configuration
+# Copy this file to config.yaml and customize for your environment.
+#
+# Settings can be loaded from (priority order):
+#   1. Environment variables (highest priority)
+#   2. .env file
+#   3. config.yaml (this file)
+#   4. Default values (lowest priority)
+#
+# Set SRE_AGENT_CONFIG environment variable to use a custom path.
+
+# Application settings
+# debug: false
+# log_level: INFO
+
+# Server settings
+# host: "0.0.0.0"
+# port: 8000
+
+# MCP (Model Context Protocol) servers configuration
+# This is the primary use case for YAML config - complex nested structures
+mcp_servers:
+  # Memory server for long-term agent memory
+  redis-memory-server:
+    command: uv
+    args:
+      - tool
+      - run
+      - --from
+      - agent-memory-server
+      - agent-memory
+      - mcp
+    env:
+      REDIS_URL: redis://localhost:6399
+    tools:
+      get_current_datetime:
+        description: |
+          Get the current date and time. Use this when you need to
+          record timestamps for Redis instance events or incidents.
+
+          {original}
+      create_long_term_memories:
+        description: |
+          Save long-term memories about Redis instances. Use this to
+          record: past incidents and their resolutions, configuration
+          changes, performance baselines, known issues, maintenance
+          history, and lessons learned. Always include the instance_id
+          in the memory text for future retrieval.
+
+          {original}
+      search_long_term_memory:
+        description: |
+          Search saved memories about Redis instances. ALWAYS use this
+          before troubleshooting a Redis instance to recall past issues,
+          solutions, and context. Search by instance_id, error patterns,
+          or symptoms.
+
+          {original}
+      get_long_term_memory:
+        description: |
+          Retrieve a specific memory by ID. Use this to get full details
+          of a memory found via search.
+
+          {original}
+      edit_long_term_memory:
+        description: |
+          Update an existing memory. Use this to add new information to
+          a past incident record, update resolution status, or correct
+          outdated information.
+
+          {original}
+      delete_long_term_memories:
+        description: |
+          Delete memories that are no longer relevant. Use sparingly -
+          prefer editing to add context rather than deleting.
+
+          {original}
+
+  # GitHub MCP server for repository operations
+  # Option 1: Local Docker (requires Docker to be running)
+  github:
+    command: docker
+    args:
+      - run
+      - -i
+      - --rm
+      - -e
+      - GITHUB_PERSONAL_ACCESS_TOKEN
+      - ghcr.io/github/github-mcp-server
+    env:
+      # Set your GitHub Personal Access Token here or via environment variable
+      GITHUB_PERSONAL_ACCESS_TOKEN: ${GITHUB_PERSONAL_ACCESS_TOKEN}
+
+  # Option 2: Remote GitHub MCP server (recommended, no Docker needed)
+  # Uncomment the following and comment out the local Docker option above:
+  # github:
+  #   url: "https://api.githubcopilot.com/mcp/"
+  #   headers:
+  #     Authorization: "Bearer ${GITHUB_PERSONAL_ACCESS_TOKEN}"
+  #   # transport: streamable_http  # default, uses Streamable HTTP protocol
+
+# Tool providers configuration (fully qualified class paths)
+# tool_providers:
+#   - redis_sre_agent.tools.metrics.prometheus.provider.PrometheusToolProvider
+#   - redis_sre_agent.tools.diagnostics.redis_command.provider.RedisCommandToolProvider
+#   - redis_sre_agent.tools.logs.loki.provider.LokiToolProvider
+#   - redis_sre_agent.tools.host_telemetry.provider.HostTelemetryToolProvider
@@ -87,7 +87,7 @@ services:
       context: .
       dockerfile: Dockerfile
     ports:
-      - "8000:8000"
+      - "8080:8000"
     environment:
       - REDIS_URL=redis://redis-demo:6379/0  # Internal container port stays 6379
       - PROMETHEUS_URL=http://prometheus:9090
 
@@ -9,10 +9,12 @@ services:
       - ./monitoring/redis.conf:/usr/local/etc/redis/redis.conf
     command: redis-server /usr/local/etc/redis/redis.conf
     healthcheck:
-      test: ["CMD", "redis-cli", "ping"]
-      interval: 10s
+      # Wait for Redis to finish loading before marking healthy
+      test: ["CMD-SHELL", "redis-cli ping | grep -q PONG && redis-cli INFO persistence | grep -q 'loading:0'"]
+      interval: 5s
       timeout: 5s
-      retries: 3
+      retries: 10
+      start_period: 10s
     networks:
       - sre-network
 
@@ -273,7 +275,7 @@ services:
       context: .
       dockerfile: Dockerfile
     ports:
-      - "8000:8000"
+      - "8080:8000"
     environment:
       - REDIS_URL=redis://redis:6379/0  # Internal container port stays 6379
       - TOOLS_PROMETHEUS_URL=http://prometheus:9090
@@ -327,6 +329,71 @@ services:
     networks:
       - sre-network
 
+  # GitHub MCP Server - Exposes GitHub tools via MCP
+  # This runs the GitHub MCP server behind an SSE/HTTP proxy so the sre-worker
+  # can connect to it without needing Docker-in-Docker permissions.
+  github-mcp:
+    image: ghcr.io/sparfenyuk/mcp-proxy:latest
+    ports:
+      - "8082:8082"
+    environment:
+      - GITHUB_PERSONAL_ACCESS_TOKEN=${GITHUB_PERSONAL_ACCESS_TOKEN}
+    command: >
+      --pass-environment
+      --port=8082
+      --host=0.0.0.0
+      docker run -i --rm -e GITHUB_PERSONAL_ACCESS_TOKEN ghcr.io/github/github-mcp-server
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
+    networks:
+      - sre-network
+    profiles:
+      - mcp  # Start with: docker compose --profile mcp up
+
+  # SRE Agent MCP Server - Exposes agent capabilities via Model Context Protocol
+  # Connect Claude to this via: Settings > Connectors > Add Custom Connector
+  # HTTP: http://localhost:8081/mcp
+  # HTTPS: https://localhost:8450/mcp (requires running scripts/generate-mcp-certs.sh first)
+  sre-mcp:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    ports:
+      - "8081:8081"
+    environment:
+      - REDIS_URL=redis://redis:6379/0
+      - REDIS_SRE_MASTER_KEY=${REDIS_SRE_MASTER_KEY}
+      - TOOLS_PROMETHEUS_URL=http://prometheus:9090
+      - TOOLS_LOKI_URL=http://loki:3100
+    depends_on:
+      redis:
+        condition: service_healthy
+    volumes:
+      - .env:/app/.env
+      - ./redis_sre_agent:/app/redis_sre_agent
+    command: uv run redis-sre-agent mcp serve --transport http --host 0.0.0.0 --port 8081
+    networks:
+      - sre-network
+    profiles:
+      - mcp  # Start with: docker compose --profile mcp up
+      - ssl  # Or with SSL: docker compose --profile ssl up
+
+  # MCP SSL Proxy - HTTPS termination for MCP server
+  # Run scripts/generate-mcp-certs.sh first to generate self-signed certs
+  sre-mcp-ssl:
+    image: nginx:alpine
+    ports:
+      - "8450:443"
+    volumes:
+      - ./monitoring/nginx/mcp-ssl.conf:/etc/nginx/conf.d/default.conf:ro
+      - ./monitoring/nginx/certs:/etc/nginx/certs:ro
+    depends_on:
+      - sre-mcp
+    networks:
+      - sre-network
+    profiles:
+      - ssl  # Only start with: docker compose --profile ssl up
+
   # SRE Agent UI
   sre-ui:
     build:
 
@@ -27,7 +27,7 @@ This section explains the core ideas behind Redis SRE Agent and how pieces fit t
   When you create a task, the API creates or reuses a thread to store the execution history. You can:
   - Poll the task for status: `GET /api/v1/tasks/{task_id}`
   - Read the thread for results: `GET /api/v1/threads/{thread_id}`
-  - Stream updates via WebSocket: `ws://localhost:8000/api/v1/ws/tasks/{thread_id}`
+  - Stream updates via WebSocket: `ws://localhost:8080/api/v1/ws/tasks/{thread_id}` (Docker Compose) or port 8000 (local)
 
 - **Jobs**
   - Ad-hoc jobs: On-demand via CLI or API. Each run creates a task and streams results to a thread.
 
@@ -6,6 +6,8 @@ This guide shows how to use the HTTP API end-to-end: check health, add an instan
 - Services running (Docker Compose or local uvicorn + worker)
 - If you enabled auth in your environment, include your API key header as needed
 
+**Port Note**: Docker Compose exposes the API on port **8080**, while local uvicorn uses port **8000**. Examples below use port 8080 (Docker Compose). Replace with 8000 if running locally.
+
 ### 1) Start services (choose one)
 - Docker Compose
 ```bash
@@ -26,20 +28,21 @@ uv run redis-sre-agent worker --concurrency 4
 ### 2) Health and readiness
 ```bash
 # Root health (fast)
-curl -fsS http://localhost:8000/
+# Use port 8080 for Docker Compose, port 8000 for local uvicorn
+curl -fsS http://localhost:8080/
 
 # Detailed health (Redis, vector index, workers)
-curl -fsS http://localhost:8000/api/v1/health | jq
+curl -fsS http://localhost:8080/api/v1/health | jq
 
 # Prometheus metrics (scrape this)
-curl -fsS http://localhost:8000/api/v1/metrics | head -n 20
+curl -fsS http://localhost:8080/api/v1/metrics | head -n 20
 ```
 
 ### 3) Manage Redis instances
 Create the instance the agent will triage, then verify a connection.
 ```bash
 # Create instance
-curl -fsS -X POST http://localhost:8000/api/v1/instances \
+curl -fsS -X POST http://localhost:8080/api/v1/instances \
   -H 'Content-Type: application/json' \
   -d '{
     "name": "prod-cache",
@@ -50,14 +53,14 @@ curl -fsS -X POST http://localhost:8000/api/v1/instances \
   }' | jq
 
 # List & inspect
-curl -fsS http://localhost:8000/api/v1/instances | jq
-curl -fsS http://localhost:8000/api/v1/instances/<id> | jq
+curl -fsS http://localhost:8080/api/v1/instances | jq
+curl -fsS http://localhost:8080/api/v1/instances/<id> | jq
 
 # Test connection (by ID)
-curl -fsS -X POST http://localhost:8000/api/v1/instances/<id>/test-connection | jq
+curl -fsS -X POST http://localhost:8080/api/v1/instances/<id>/test-connection | jq
 
 # Test a raw URL (without saving)
-curl -fsS -X POST http://localhost:8000/api/v1/instances/test-connection-url \
+curl -fsS -X POST http://localhost:8080/api/v1/instances/test-connection-url \
   -H 'Content-Type: application/json' \
   -d '{"connection_url": "redis://host:6379/0"}' | jq
 ```
@@ -69,14 +72,16 @@ curl -fsS -X POST http://localhost:8000/api/v1/instances/test-connection-url \
 
 ### 4) Triage with tasks and threads
 Simplest: create a task with your question. The API will create a thread if you omit `thread_id`.
+
+> **Note**: Triage performs comprehensive analysis (metrics, logs, knowledge base, multi-topic recommendations) and typically takes **2-10 minutes** to complete. Poll the task status or use WebSocket for real-time updates.
 ```bash
 # Create a task (no instance)
-curl -fsS -X POST http://localhost:8000/api/v1/tasks \
+curl -fsS -X POST http://localhost:8080/api/v1/tasks \
   -H 'Content-Type: application/json' \
   -d '{"message": "Explain high memory usage signals in Redis"}' | jq
 
 # Create a task (target a specific instance)
-curl -fsS -X POST http://localhost:8000/api/v1/tasks \
+curl -fsS -X POST http://localhost:8080/api/v1/tasks \
   -H 'Content-Type: application/json' \
   -d '{
     "message": "Check memory pressure and slow ops",
@@ -86,15 +91,15 @@ curl -fsS -X POST http://localhost:8000/api/v1/tasks \
 Poll task or inspect the thread:
 ```bash
 # Poll task status
-curl -fsS http://localhost:8000/api/v1/tasks/<task_id> | jq
+curl -fsS http://localhost:8080/api/v1/tasks/<task_id> | jq
 
 # Get the thread state (messages, updates, result)
-curl -fsS http://localhost:8000/api/v1/threads/<thread_id> | jq
+curl -fsS http://localhost:8080/api/v1/threads/<thread_id> | jq
 ```
 Real-time updates via WebSocket:
 ```bash
 # Requires a thread_id; use any ws client (wscat, websocat)
-wscat -c ws://localhost:8000/api/v1/ws/tasks/<thread_id>
+wscat -c ws://localhost:8080/api/v1/ws/tasks/<thread_id>
 # You will receive an initial_state event and subsequent progress updates
 ```
 
@@ -103,12 +108,12 @@ wscat -c ws://localhost:8000/api/v1/ws/tasks/<thread_id>
 Alternative flow: create a thread first, then submit a task on that thread.
 ```bash
 # Create thread
-curl -fsS -X POST http://localhost:8000/api/v1/threads \
+curl -fsS -X POST http://localhost:8080/api/v1/threads \
   -H 'Content-Type: application/json' \
   -d '{"user_id": "u1", "subject": "Prod triage"}' | jq
 
 # Submit a task to that thread
-curl -fsS -X POST http://localhost:8000/api/v1/tasks \
+curl -fsS -X POST http://localhost:8080/api/v1/tasks \
   -H 'Content-Type: application/json' \
   -d '{
     "thread_id": "<thread_id>",
@@ -121,20 +126,20 @@ curl -fsS -X POST http://localhost:8000/api/v1/tasks \
 Run an ingestion job, then search to confirm content is available.
 ```bash
 # Start pipeline job (ingest existing artifacts or run full if configured)
-curl -fsS -X POST http://localhost:8000/api/v1/knowledge/ingest/pipeline \
+curl -fsS -X POST http://localhost:8080/api/v1/knowledge/ingest/pipeline \
   -H 'Content-Type: application/json' \
   -d '{"operation": "ingest", "artifacts_path": "./artifacts"}' | jq
 
 # List jobs & check individual job status
-curl -fsS http://localhost:8000/api/v1/knowledge/jobs | jq
-curl -fsS http://localhost:8000/api/v1/knowledge/jobs/<job_id> | jq
+curl -fsS http://localhost:8080/api/v1/knowledge/jobs | jq
+curl -fsS http://localhost:8080/api/v1/knowledge/jobs/<job_id> | jq
 
 # Search knowledge
-curl -fsS 'http://localhost:8000/api/v1/knowledge/search?query=redis%20eviction%20policy' | jq
+curl -fsS 'http://localhost:8080/api/v1/knowledge/search?query=redis%20eviction%20policy' | jq
 ```
 Optional single-document ingestion:
 ```bash
-curl -fsS -X POST http://localhost:8000/api/v1/knowledge/ingest/document \
+curl -fsS -X POST http://localhost:8080/api/v1/knowledge/ingest/document \
   -H 'Content-Type: application/json' \
   -d '{
     "title": "Redis memory troubleshooting",
@@ -148,7 +153,7 @@ curl -fsS -X POST http://localhost:8000/api/v1/knowledge/ingest/document \
 Create a schedule to run instructions periodically, optionally bound to an instance.
 ```bash
 # Create schedule (daily)
-curl -fsS -X POST http://localhost:8000/api/v1/schedules/ \
+curl -fsS -X POST http://localhost:8080/api/v1/schedules/ \
   -H 'Content-Type: application/json' \
   -d '{
     "name": "daily-triage",
@@ -161,20 +166,20 @@ curl -fsS -X POST http://localhost:8000/api/v1/schedules/ \
   }' | jq
 
 # List/get
-curl -fsS http://localhost:8000/api/v1/schedules/ | jq
-curl -fsS http://localhost:8000/api/v1/schedules/<schedule_id> | jq
+curl -fsS http://localhost:8080/api/v1/schedules/ | jq
+curl -fsS http://localhost:8080/api/v1/schedules/<schedule_id> | jq
 
 # Trigger now (manual run)
-curl -fsS -X POST http://localhost:8000/api/v1/schedules/<schedule_id>/trigger | jq
+curl -fsS -X POST http://localhost:8080/api/v1/schedules/<schedule_id>/trigger | jq
 
 # View runs for a schedule
-curl -fsS http://localhost:8000/api/v1/schedules/<schedule_id>/runs | jq
+curl -fsS http://localhost:8080/api/v1/schedules/<schedule_id>/runs | jq
 ```
 
 ### 7) Tasks, threads, and streaming
 - Tasks: `GET /api/v1/tasks/{task_id}`
 - Threads: `GET /api/v1/threads`, `GET /api/v1/threads/{thread_id}`
-- WebSocket: `ws://localhost:8000/api/v1/ws/tasks/{thread_id}`
+- WebSocket: `ws://localhost:8080/api/v1/ws/tasks/{thread_id}`
 
 ### 8) Observability
 - Prometheus scrape: `GET /api/v1/metrics`