BrowserOperator
diff --git a/‎MODEL-CONFIGS.md‎
Lines changed: 450 additions & 0 deletions b/‎MODEL-CONFIGS.md‎
Lines changed: 450 additions & 0 deletions
diff --git a/‎eval-server/nodejs/.env.example‎
Lines changed: 45 additions & 0 deletions b/‎eval-server/nodejs/.env.example‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎eval-server/nodejs/CLAUDE.md‎
Lines changed: 89 additions & 2 deletions b/‎eval-server/nodejs/CLAUDE.md‎
Lines changed: 89 additions & 2 deletions
diff --git a/‎eval-server/nodejs/README.md‎
Lines changed: 17 additions & 1 deletion b/‎eval-server/nodejs/README.md‎
Lines changed: 17 additions & 1 deletion
@@ -0,0 +1,45 @@
+# Evaluation Server Configuration
+# Copy this file to .env and configure your settings
+
+# Server Configuration
+PORT=8080
+HOST=127.0.0.1
+
+# LLM Provider API Keys
+# Configure one or more providers for evaluation
+
+# OpenAI Configuration
+OPENAI_API_KEY=sk-your-openai-api-key-here
+
+# LiteLLM Configuration (if using a LiteLLM server)
+LITELLM_ENDPOINT=http://localhost:4000
+LITELLM_API_KEY=your-litellm-api-key-here
+
+# Groq Configuration
+GROQ_API_KEY=gsk_your-groq-api-key-here
+
+# OpenRouter Configuration
+OPENROUTER_API_KEY=sk-or-v1-your-openrouter-api-key-here
+
+# Default LLM Configuration for Evaluations
+# These will be used as fallbacks when not specified in evaluation requests
+DEFAULT_PROVIDER=openai
+DEFAULT_MAIN_MODEL=gpt-4
+DEFAULT_MINI_MODEL=gpt-4-mini
+DEFAULT_NANO_MODEL=gpt-3.5-turbo
+
+# Logging Configuration
+LOG_LEVEL=info
+LOG_DIR=./logs
+
+# Client Configuration
+CLIENTS_DIR=./clients
+EVALS_DIR=./evals
+
+# RPC Configuration
+RPC_TIMEOUT=30000
+
+# Security
+# Set this to enable authentication for client connections
+# Leave empty to disable authentication
+AUTH_SECRET_KEY=
@@ -22,6 +22,16 @@ bo-eval-server is a WebSocket-based evaluation server for LLM agents that implem
 - `OPENAI_API_KEY` - OpenAI API key for LLM judge functionality
 - `PORT` - WebSocket server port (default: 8080)
 
+### LLM Provider Configuration (Optional)
+- `GROQ_API_KEY` - Groq API key for Groq provider support
+- `OPENROUTER_API_KEY` - OpenRouter API key for OpenRouter provider support
+- `LITELLM_ENDPOINT` - LiteLLM server endpoint URL
+- `LITELLM_API_KEY` - LiteLLM API key for LiteLLM provider support
+- `DEFAULT_PROVIDER` - Default LLM provider (openai, groq, openrouter, litellm)
+- `DEFAULT_MAIN_MODEL` - Default main model name
+- `DEFAULT_MINI_MODEL` - Default mini model name
+- `DEFAULT_NANO_MODEL` - Default nano model name
+
 ## Architecture
 
 ### Core Components
@@ -33,10 +43,11 @@ bo-eval-server is a WebSocket-based evaluation server for LLM agents that implem
 - Handles bidirectional RPC communication
 
 **RPC Client** (`src/rpc-client.js`)
-- Implements JSON-RPC 2.0 protocol for server-to-client calls
+- Implements JSON-RPC 2.0 protocol for bidirectional communication
 - Manages request/response correlation with unique IDs
 - Handles timeouts and error conditions
 - Calls `Evaluate(request: String) -> String` method on connected agents
+- Supports `configure_llm` method for dynamic LLM provider configuration
 
 **LLM Evaluator** (`src/evaluator.js`)
 - Integrates with OpenAI API for LLM-as-a-judge functionality
@@ -78,7 +89,10 @@ logs/                  # Log files (created automatically)
 ### Key Features
 
 - **Bidirectional RPC**: Server can call methods on connected clients
-- **LLM-as-a-Judge**: Automated evaluation of agent responses using GPT-4
+- **Multi-Provider LLM Support**: Support for OpenAI, Groq, OpenRouter, and LiteLLM providers
+- **Dynamic LLM Configuration**: Runtime configuration via `configure_llm` JSON-RPC method
+- **Per-Client Configuration**: Each connected client can have different LLM settings
+- **LLM-as-a-Judge**: Automated evaluation of agent responses using configurable LLM providers
 - **Concurrent Evaluations**: Support for multiple agents and parallel evaluations
 - **Structured Logging**: All interactions logged as JSON for analysis
 - **Interactive CLI**: Built-in CLI for testing and server management
@@ -93,6 +107,79 @@ Agents must implement:
 - `Evaluate(task: string) -> string` method
 - "ready" message to signal availability for evaluations
 
+### Model Configuration Schema
+
+The server uses a canonical nested model configuration format that allows per-tier provider and API key settings:
+
+#### Model Configuration Structure
+
+```typescript
+interface ModelTierConfig {
+  provider: string;  // "openai" | "groq" | "openrouter" | "litellm"
+  model: string;     // Model name (e.g., "gpt-4", "llama-3.1-8b-instant")
+  api_key: string;   // API key for this tier
+}
+
+interface ModelConfig {
+  main_model: ModelTierConfig;  // Primary model for complex tasks
+  mini_model: ModelTierConfig;  // Secondary model for simpler tasks
+  nano_model: ModelTierConfig;  // Tertiary model for basic tasks
+}
+```
+
+#### Example: Evaluation with Model Configuration
+
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "evaluate",
+  "params": {
+    "tool": "chat",
+    "input": {"message": "Hello"},
+    "model": {
+      "main_model": {
+        "provider": "openai",
+        "model": "gpt-4",
+        "api_key": "sk-main-key"
+      },
+      "mini_model": {
+        "provider": "openai",
+        "model": "gpt-4-mini",
+        "api_key": "sk-mini-key"
+      },
+      "nano_model": {
+        "provider": "groq",
+        "model": "llama-3.1-8b-instant",
+        "api_key": "gsk-nano-key"
+      }
+    }
+  }
+}
+```
+
+### Dynamic LLM Configuration
+
+The server supports runtime LLM configuration via the `configure_llm` JSON-RPC method:
+
+```json
+{
+  "jsonrpc": "2.0",
+  "method": "configure_llm",
+  "params": {
+    "provider": "openai|groq|openrouter|litellm",
+    "apiKey": "your-api-key",
+    "endpoint": "endpoint-url-for-litellm",
+    "models": {
+      "main": "main-model-name",
+      "mini": "mini-model-name",
+      "nano": "nano-model-name"
+    },
+    "partial": false
+  },
+  "id": "config-request-id"
+}
+```
+
 ### Configuration
 
 All configuration is managed through environment variables and `src/config.js`. Key settings:
 
@@ -145,7 +145,23 @@ server.onConnect(async client => {
       message: "Your question here"
     },
     timeout: 30000,             // Optional timeout (ms)
-    model: {},                  // Optional model config
+    model: {                    // Optional nested model config
+      main_model: {
+        provider: "openai",
+        model: "gpt-4",
+        api_key: "sk-..."
+      },
+      mini_model: {
+        provider: "openai",
+        model: "gpt-4-mini",
+        api_key: "sk-..."
+      },
+      nano_model: {
+        provider: "groq",
+        model: "llama-3.1-8b-instant",
+        api_key: "gsk-..."
+      }
+    },
     metadata: {                 // Optional metadata
       tags: ['api', 'test']
     }