feat: add Qwen runtime tuning configuration and enhance Qwen client with connection timeouts

jruokola · jruokola · commit d77ca5dcab11 · 2025-09-27T04:31:39.000+03:00
diff --git a/README.md b/README.md
@@ -1081,6 +1081,10 @@ export CODEGRAPH_LOG_LEVEL=debug
 export CODEGRAPH_DB_PATH=/custom/path/db
 export CODEGRAPH_EMBEDDING_MODEL=local
 export CODEGRAPH_HTTP_PORT=8080
+# Qwen runtime tuning (defaults shown)
+export CODEGRAPH_QWEN_MAX_TOKENS=1024            # Limit completion length for faster docs
+export CODEGRAPH_QWEN_TIMEOUT_SECS=180           # Fallback to RAG if Qwen exceeds this (0 disables)
+export CODEGRAPH_QWEN_CONNECT_TIMEOUT_MS=5000    # Abort if Ollama endpoint cannot be reached quickly
 ```
 
 ### Embedding Model Configuration
@@ -1475,3 +1479,63 @@ This project is dual-licensed under MIT and Apache 2.0 licenses. See [LICENSE-MI
 <p align="center">
   Completely built with Ouroboros - The next-generation of coding agent systems
 </p>
+## ⚙️ Installation (Local)
+
+> **Note:** CodeGraph runs entirely local-first. These steps build the CLI with all AI/Qwen tooling enabled.
+
+### 1. Install dependencies
+
+```bash
+# macOS (Homebrew)
+brew install faiss
+
+# Rust toolchain
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+```
+
+Ensure `faiss` libs are visible to the linker (the install script sets sensible defaults):
+
+```bash
+export LIBRARY_PATH="/opt/homebrew/opt/faiss/lib:$LIBRARY_PATH"
+export LD_LIBRARY_PATH="/opt/homebrew/opt/faiss/lib:$LD_LIBRARY_PATH"
+export DYLD_LIBRARY_PATH="/opt/homebrew/opt/faiss/lib:$DYLD_LIBRARY_PATH"
+```
+
+### 2. Build + install the CLI
+
+Run the bundled installer from the repo root:
+
+```bash
+bash install-codegraph-osx.sh
+```
+
+This compiles the release binary with the following features:
+
+```
+ai-enhanced, qwen-integration, embeddings,
+faiss, embeddings-ollama, codegraph-vector/onnx
+```
+
+The binary is copied to `~/.local/bin/codegraph` (honoring `CODEGRAPH_INSTALL_DIR` if you set it). Make sure that directory is on your `PATH`:
+
+```bash
+export PATH="$HOME/.local/bin:$PATH"
+```
+
+### 3. (Optional) Keep a local copy of the release binary
+
+If you prefer to run it from the repo, grab the compiled binary and point `CODEGRAPH_BIN` at it:
+
+```bash
+cp target/release/codegraph dist/codegraph
+export CODEGRAPH_BIN="$(pwd)/dist/codegraph"
+```
+
+### 4. Verify the MCP tools
+
+```bash
+export NOTIFY_POLLING=true  # avoid macOS FSEvents issues
+python3 test_mcp_tools.py   # exercises all MCP tools
+```
+
+You should see the MCP handshake negotiate `protocolVersion: "2025-06-18"` and each tool (including `code_documentation`) return structured JSON.
diff --git a/crates/codegraph-mcp/src/config_manager.rs b/crates/codegraph-mcp/src/config_manager.rs
@@ -48,6 +48,7 @@ pub struct QwenModelConfig {
     pub max_tokens: usize,
     pub temperature: f32,
     pub timeout_seconds: u64,
+    pub connect_timeout_ms: u64,
     pub enable_caching: bool,
 }
 
@@ -115,9 +116,10 @@ impl Default for QwenModelConfig {
             model_name: "qwen2.5-coder-14b-128k".to_string(),
             base_url: "http://localhost:11434".to_string(),
             context_window: 128000,
-            max_tokens: 8192,
+            max_tokens: 1024,
             temperature: 0.1,
-            timeout_seconds: 90,
+            timeout_seconds: 180,
+            connect_timeout_ms: 5_000,
             enable_caching: true,
         }
     }
@@ -251,13 +253,34 @@ impl ConfigManager {
             }
         }
 
+        if let Ok(max_tokens) = env::var("CODEGRAPH_QWEN_MAX_TOKENS") {
+            if let Ok(tokens) = max_tokens.parse::<usize>() {
+                config.qwen.max_tokens = tokens;
+                debug!("Override Qwen max tokens from environment");
+            }
+        }
+
         if let Ok(temperature) = env::var("CODEGRAPH_TEMPERATURE") {
             if let Ok(temp_num) = temperature.parse::<f32>() {
                 config.qwen.temperature = temp_num;
                 debug!("Override temperature from environment");
             }
         }
 
+        if let Ok(timeout) = env::var("CODEGRAPH_QWEN_TIMEOUT_SECS") {
+            if let Ok(secs) = timeout.parse::<u64>() {
+                config.qwen.timeout_seconds = secs;
+                debug!("Override Qwen request timeout from environment");
+            }
+        }
+
+        if let Ok(connect_timeout) = env::var("CODEGRAPH_QWEN_CONNECT_TIMEOUT_MS") {
+            if let Ok(ms) = connect_timeout.parse::<u64>() {
+                config.qwen.connect_timeout_ms = ms;
+                debug!("Override Qwen connect timeout from environment");
+            }
+        }
+
         // Cache configuration
         if let Ok(cache_size) = env::var("CODEGRAPH_CACHE_SIZE") {
             if let Ok(size_num) = cache_size.parse::<usize>() {
@@ -586,6 +609,9 @@ impl ConfigManager {
             CODEGRAPH_MODEL=qwen2.5-coder-14b-128k    # Ollama model name\n\
             CODEGRAPH_OLLAMA_URL=http://localhost:11434 # Ollama server URL\n\
             CODEGRAPH_CONTEXT_WINDOW=128000   # Context window size\n\
+            CODEGRAPH_QWEN_MAX_TOKENS=1024    # Max completion tokens (0 disables limit)\n\
+            CODEGRAPH_QWEN_TIMEOUT_SECS=180   # Request timeout before falling back (0 disables)\n\
+            CODEGRAPH_QWEN_CONNECT_TIMEOUT_MS=5000 # Connection timeout to Ollama\n\
             CODEGRAPH_TEMPERATURE=0.1         # Generation temperature\n\n\
             ## Cache Configuration\n\
             CODEGRAPH_CACHE_SIZE=1000         # Maximum cache entries\n\
@@ -613,14 +639,16 @@ impl ConfigManager {
 #[cfg(feature = "qwen-integration")]
 impl From<CodeGraphConfig> for crate::qwen::QwenConfig {
     fn from(config: CodeGraphConfig) -> Self {
-        Self {
-            model_name: config.qwen.model_name,
-            base_url: config.qwen.base_url,
-            context_window: config.qwen.context_window,
-            max_tokens: config.qwen.max_tokens,
-            temperature: config.qwen.temperature,
-            timeout: Duration::from_secs(config.qwen.timeout_seconds),
-        }
+        let mut cfg = crate::qwen::QwenConfig::default();
+        cfg.model_name = config.qwen.model_name;
+        cfg.base_url = config.qwen.base_url;
+        cfg.context_window = config.qwen.context_window;
+        cfg.max_tokens = config.qwen.max_tokens;
+        cfg.temperature = config.qwen.temperature;
+        cfg.request_timeout = (config.qwen.timeout_seconds > 0)
+            .then(|| Duration::from_secs(config.qwen.timeout_seconds));
+        cfg.connect_timeout = Duration::from_millis(config.qwen.connect_timeout_ms.max(1));
+        cfg
     }
 }
 
diff --git a/crates/codegraph-mcp/src/official_server.rs b/crates/codegraph-mcp/src/official_server.rs
@@ -681,6 +681,11 @@ impl CodeGraphMCPServer {
     ) -> Result<CallToolResult, McpError> {
         let request = params.0;
 
+        eprintln!(
+            "📘 Generating documentation for '{}' (style: {})",
+            request.target_name, request.style
+        );
+
         // Use the existing graph database from the server
         let config = codegraph_ai::rag::engine::RAGEngineConfig {
             max_results: 15, // More context for comprehensive documentation
@@ -710,6 +715,9 @@ impl CodeGraphMCPServer {
 
         // First, gather RAG insights so we always have citations/fallback ready
         let rag_result = rag_engine.answer(&doc_query).await;
+        if rag_result.is_ok() {
+            eprintln!("🗂 RAG context assembled for '{}'", request.target_name);
+        }
         let citations: Vec<serde_json::Value> = rag_result
             .as_ref()
             .ok()
@@ -731,14 +739,20 @@ impl CodeGraphMCPServer {
 
         // If Qwen is available, use it for richer documentation synthesis
         if let Some(ref qwen_client) = server_state.qwen_client {
-            let context_limit = ((qwen_client.config.context_window as f32) * 0.6) as usize;
+            let raw_limit = ((qwen_client.config.context_window as f32) * 0.5) as usize;
+            let context_limit = raw_limit.clamp(2048, 65536);
             if let Ok(context) = crate::server::build_comprehensive_context(
                 &server_state,
                 &doc_query,
                 context_limit.max(1024),
             )
             .await
             {
+                eprintln!(
+                    "🤖 Qwen synthesis in progress for '{}' (context ~{} chars)",
+                    request.target_name,
+                    context.len()
+                );
                 match qwen_client.analyze_codebase(&doc_query, &context).await {
                     Ok(doc_result) => {
                         let response = serde_json::json!({
@@ -759,6 +773,12 @@ impl CodeGraphMCPServer {
                             }
                         });
 
+                        eprintln!(
+                            "✅ Qwen documentation ready for '{}' in {}ms",
+                            request.target_name,
+                            doc_result.processing_time.as_millis()
+                        );
+
                         return Ok(CallToolResult::success(vec![Content::text(
                             serde_json::to_string_pretty(&response).unwrap_or_else(|_| {
                                 "Error formatting documentation response".to_string()
diff --git a/crates/codegraph-mcp/src/qwen.rs b/crates/codegraph-mcp/src/qwen.rs
@@ -1,5 +1,5 @@
 use codegraph_core::{CodeGraphError, Result};
-use reqwest::Client;
+use reqwest::{Client, ClientBuilder};
 use serde::{Deserialize, Serialize};
 use std::time::{Duration, Instant};
 use tracing::{debug, info, warn};
@@ -12,20 +12,39 @@ pub struct QwenConfig {
     pub context_window: usize,
     pub max_tokens: usize,
     pub temperature: f32,
-    pub timeout: Duration,
+    pub request_timeout: Option<Duration>,
+    pub connect_timeout: Duration,
 }
 
 impl Default for QwenConfig {
     fn default() -> Self {
+        let request_timeout = std::env::var("CODEGRAPH_QWEN_TIMEOUT_SECS")
+            .ok()
+            .and_then(|raw| raw.parse::<u64>().ok())
+            .and_then(|secs| (secs > 0).then(|| Duration::from_secs(secs)));
+
+        let connect_timeout = std::env::var("CODEGRAPH_QWEN_CONNECT_TIMEOUT_MS")
+            .ok()
+            .and_then(|raw| raw.parse::<u64>().ok())
+            .map(Duration::from_millis)
+            .unwrap_or_else(|| Duration::from_secs(5));
+
+        let max_tokens = std::env::var("CODEGRAPH_QWEN_MAX_TOKENS")
+            .ok()
+            .and_then(|raw| raw.parse::<usize>().ok())
+            .filter(|value| *value > 0)
+            .unwrap_or(1024);
+
         Self {
             model_name: std::env::var("CODEGRAPH_MODEL").unwrap_or_else(|_| {
                 "hf.co/unsloth/Qwen2.5-Coder-14B-Instruct-128K-GGUF:Q4_K_M".to_string()
             }),
             base_url: "http://localhost:11434".to_string(),
             context_window: 128000,
-            max_tokens: 8192,
+            max_tokens,
             temperature: 0.1,
-            timeout: Duration::from_secs(90),
+            request_timeout,
+            connect_timeout,
         }
     }
 }
@@ -85,10 +104,20 @@ pub struct QwenClient {
 
 impl QwenClient {
     pub fn new(config: QwenConfig) -> Self {
-        Self {
-            client: Client::new(),
-            config,
+        let mut builder = ClientBuilder::new()
+            .pool_idle_timeout(None)
+            .tcp_keepalive(Some(Duration::from_secs(30)))
+            .connect_timeout(config.connect_timeout);
+
+        if let Some(timeout) = config.request_timeout {
+            builder = builder.timeout(timeout);
+        } else {
+            builder = builder.timeout(None);
         }
+
+        let client = builder.build().expect("Failed to build Qwen HTTP client");
+
+        Self { client, config }
     }
 
     /// Generate semantic analysis using Qwen2.5-Coder with optimized prompts
@@ -126,6 +155,11 @@ impl QwenClient {
             self.config.context_window
         );
 
+        eprintln!(
+            "Qwen documentation request started (max_tokens={} timeout={:?})",
+            self.config.max_tokens, self.config.request_timeout
+        );
+
         let response = self
             .client
             .post(&format!("{}/api/chat", self.config.base_url))