Feat: Make max output tokens configurable for agentic workflows

jruokola · jruokola · commit 6b6a2a1fe126 · 2025-11-08T14:54:43.000+02:00
Added MCP_CODE_AGENT_MAX_OUTPUT_TOKENS environment variable and config
setting to allow users to customize max output tokens for agentic workflows,
overriding tier-based defaults.

## Configuration (config_manager.rs):

Added to LLMConfig:
- mcp_code_agent_max_output_tokens: Option&lt;usize&gt; field
- Environment variable override: MCP_CODE_AGENT_MAX_OUTPUT_TOKENS
- Defaults to None (uses tier-based defaults)

## Orchestrator (agentic_orchestrator.rs):

AgenticConfig changes:
- from_tier_with_override(tier, max_tokens_override) method
- from_tier() now delegates to from_tier_with_override(tier, None)

AgenticOrchestrator changes:
- new_with_override() constructor accepting optional max_tokens
- new() delegates to new_with_override(..., None)

## Server Integration (official_server.rs):

execute_agentic_workflow() changes:
- Reads config.llm.mcp_code_agent_max_output_tokens
- Passes override to AgenticOrchestrator::new_with_override()
- Falls back to tier defaults if not configured

## CLI Display (bin/codegraph.rs):

codegraph config agent-status changes:
- Calculates actual max_output_tokens used
- Shows "(custom)" or "(tier default)" indicator
- JSON output includes:
  * max_output_tokens: actual value
  * max_output_tokens_source: "custom" | "tier_default"

## Usage:

Environment variable:
export MCP_CODE_AGENT_MAX_OUTPUT_TOKENS=8000

Config file (.codegraph.toml):
[llm]
mcp_code_agent_max_output_tokens = 8000

Tier defaults (if not configured):
- Small: 2,048 tokens
- Medium: 4,096 tokens
- Large: 8,192 tokens
- Massive: 16,384 tokens

## Benefits:

- Users can test different token limits to optimize coding flow
- Override tier defaults without changing LLM configuration
- Visible in agent-status command for debugging
- Backward compatible (defaults to tier-based values)

~75 lines added
diff --git a/crates/codegraph-core/src/config_manager.rs b/crates/codegraph-core/src/config_manager.rs
@@ -196,6 +196,11 @@ pub struct LLMConfig {
     #[serde(default)]
     pub max_completion_token: Option<usize>,
 
+    /// MCP code agent maximum output tokens (for agentic workflows)
+    /// Overrides tier-based defaults if set
+    #[serde(default)]
+    pub mcp_code_agent_max_output_tokens: Option<usize>,
+
     /// Reasoning effort for reasoning models: "minimal", "medium", "high"
     #[serde(default)]
     pub reasoning_effort: Option<String>,
@@ -223,6 +228,7 @@ impl Default for LLMConfig {
             insights_mode: default_insights_mode(),
             max_tokens: default_max_tokens(),
             max_completion_token: None, // Will use max_tokens if not set
+            mcp_code_agent_max_output_tokens: None, // Use tier-based defaults if not set
             reasoning_effort: None,     // Only for reasoning models
             timeout_secs: default_timeout_secs(),
         }
@@ -531,6 +537,12 @@ impl ConfigManager {
             config.llm.reasoning_effort = Some(effort);
         }
 
+        if let Ok(max_output) = std::env::var("MCP_CODE_AGENT_MAX_OUTPUT_TOKENS") {
+            if let Ok(tokens) = max_output.parse() {
+                config.llm.mcp_code_agent_max_output_tokens = Some(tokens);
+            }
+        }
+
         // Logging
         if let Ok(level) = std::env::var("RUST_LOG") {
             config.logging.level = level;
diff --git a/crates/codegraph-mcp/src/agentic_orchestrator.rs b/crates/codegraph-mcp/src/agentic_orchestrator.rs
@@ -33,13 +33,20 @@ pub struct AgenticConfig {
 impl AgenticConfig {
     /// Create tier-aware configuration
     pub fn from_tier(tier: ContextTier) -> Self {
-        let (max_steps, max_tokens) = match tier {
+        Self::from_tier_with_override(tier, None)
+    }
+
+    /// Create tier-aware configuration with optional max_tokens override
+    pub fn from_tier_with_override(tier: ContextTier, max_tokens_override: Option<usize>) -> Self {
+        let (max_steps, default_max_tokens) = match tier {
             ContextTier::Small => (5, 2048),     // Conservative for small models
             ContextTier::Medium => (10, 4096),   // Moderate for medium models
             ContextTier::Large => (15, 8192),    // Generous for large models
             ContextTier::Massive => (20, 16384), // Very generous for massive models
         };
 
+        let max_tokens = max_tokens_override.unwrap_or(default_max_tokens);
+
         Self {
             max_steps,
             max_duration_secs: 300, // 5 minutes max
@@ -204,7 +211,17 @@ impl AgenticOrchestrator {
         tool_executor: Arc<GraphToolExecutor>,
         tier: ContextTier,
     ) -> Self {
-        let config = AgenticConfig::from_tier(tier);
+        Self::new_with_override(llm_provider, tool_executor, tier, None)
+    }
+
+    /// Create a new agentic orchestrator with optional max_tokens override
+    pub fn new_with_override(
+        llm_provider: Arc<dyn LLMProvider>,
+        tool_executor: Arc<GraphToolExecutor>,
+        tier: ContextTier,
+        max_tokens_override: Option<usize>,
+    ) -> Self {
+        let config = AgenticConfig::from_tier_with_override(tier, max_tokens_override);
         Self {
             llm_provider,
             tool_executor,
diff --git a/crates/codegraph-mcp/src/bin/codegraph.rs b/crates/codegraph-mcp/src/bin/codegraph.rs
@@ -1704,13 +1704,19 @@ async fn handle_agent_status(json: bool) -> Result<()> {
     };
 
     // Get tier-specific parameters
-    let (max_steps, base_limit) = match tier {
-        ContextTier::Small => (5, 10),
-        ContextTier::Medium => (10, 25),
-        ContextTier::Large => (15, 50),
-        ContextTier::Massive => (20, 100),
+    let (max_steps, base_limit, default_max_tokens) = match tier {
+        ContextTier::Small => (5, 10, 2048),
+        ContextTier::Medium => (10, 25, 4096),
+        ContextTier::Large => (15, 50, 8192),
+        ContextTier::Massive => (20, 100, 16384),
     };
 
+    // Get max output tokens (config override or tier default)
+    let max_output_tokens = config
+        .llm
+        .mcp_code_agent_max_output_tokens
+        .unwrap_or(default_max_tokens);
+
     // Active MCP tools
     let mcp_tools = vec![
         ("enhanced_search", "Search code with AI insights (2-5s)"),
@@ -1766,6 +1772,12 @@ async fn handle_agent_status(json: bool) -> Result<()> {
                 "base_search_limit": base_limit,
                 "cache_enabled": true,
                 "cache_size": 100,
+                "max_output_tokens": max_output_tokens,
+                "max_output_tokens_source": if config.llm.mcp_code_agent_max_output_tokens.is_some() {
+                    "custom"
+                } else {
+                    "tier_default"
+                },
             },
             "mcp_tools": mcp_tools.iter().map(|(name, desc)| {
                 serde_json::json!({
@@ -1851,7 +1863,12 @@ async fn handle_agent_status(json: bool) -> Result<()> {
             "Enabled".green(),
             "100".cyan()
         );
-        println!("   Max Output Tokens: {}", "44,200".cyan());
+        let max_tokens_display = if config.llm.mcp_code_agent_max_output_tokens.is_some() {
+            format!("{} (custom)", max_output_tokens.to_string().cyan())
+        } else {
+            format!("{} (tier default)", max_output_tokens.to_string().cyan())
+        };
+        println!("   Max Output Tokens: {}", max_tokens_display);
         println!();
 
         // MCP Tools
diff --git a/crates/codegraph-mcp/src/official_server.rs b/crates/codegraph-mcp/src/official_server.rs
@@ -1359,8 +1359,16 @@ impl CodeGraphMCPServer {
         // Create GraphToolExecutor
         let tool_executor = Arc::new(crate::GraphToolExecutor::new(graph_functions));
 
-        // Create AgenticOrchestrator
-        let orchestrator = AgenticOrchestrator::new(llm_provider, tool_executor, tier);
+        // Get max_tokens override from config if set
+        let max_tokens_override = config.llm.mcp_code_agent_max_output_tokens;
+
+        // Create AgenticOrchestrator with config override
+        let orchestrator = AgenticOrchestrator::new_with_override(
+            llm_provider,
+            tool_executor,
+            tier,
+            max_tokens_override,
+        );
 
         // Get tier-appropriate prompt from PromptSelector
         let prompt_selector = PromptSelector::new();