Skip to content

Commit d77ca5d

Browse files
committed
feat: add Qwen runtime tuning configuration and enhance Qwen client with connection timeouts
1 parent e5341d3 commit d77ca5d

File tree

4 files changed

+164
-18
lines changed

4 files changed

+164
-18
lines changed

README.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,6 +1081,10 @@ export CODEGRAPH_LOG_LEVEL=debug
10811081
export CODEGRAPH_DB_PATH=/custom/path/db
10821082
export CODEGRAPH_EMBEDDING_MODEL=local
10831083
export CODEGRAPH_HTTP_PORT=8080
1084+
# Qwen runtime tuning (defaults shown)
1085+
export CODEGRAPH_QWEN_MAX_TOKENS=1024 # Limit completion length for faster docs
1086+
export CODEGRAPH_QWEN_TIMEOUT_SECS=180 # Fallback to RAG if Qwen exceeds this (0 disables)
1087+
export CODEGRAPH_QWEN_CONNECT_TIMEOUT_MS=5000 # Abort if Ollama endpoint cannot be reached quickly
10841088
```
10851089
10861090
### Embedding Model Configuration
@@ -1475,3 +1479,63 @@ This project is dual-licensed under MIT and Apache 2.0 licenses. See [LICENSE-MI
14751479
<p align="center">
14761480
Completely built with Ouroboros - The next-generation of coding agent systems
14771481
</p>
1482+
## ⚙️ Installation (Local)
1483+
1484+
> **Note:** CodeGraph runs entirely local-first. These steps build the CLI with all AI/Qwen tooling enabled.
1485+
1486+
### 1. Install dependencies
1487+
1488+
```bash
1489+
# macOS (Homebrew)
1490+
brew install faiss
1491+
1492+
# Rust toolchain
1493+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
1494+
```
1495+
1496+
Ensure `faiss` libs are visible to the linker (the install script sets sensible defaults):
1497+
1498+
```bash
1499+
export LIBRARY_PATH="/opt/homebrew/opt/faiss/lib:$LIBRARY_PATH"
1500+
export LD_LIBRARY_PATH="/opt/homebrew/opt/faiss/lib:$LD_LIBRARY_PATH"
1501+
export DYLD_LIBRARY_PATH="/opt/homebrew/opt/faiss/lib:$DYLD_LIBRARY_PATH"
1502+
```
1503+
1504+
### 2. Build + install the CLI
1505+
1506+
Run the bundled installer from the repo root:
1507+
1508+
```bash
1509+
bash install-codegraph-osx.sh
1510+
```
1511+
1512+
This compiles the release binary with the following features:
1513+
1514+
```
1515+
ai-enhanced, qwen-integration, embeddings,
1516+
faiss, embeddings-ollama, codegraph-vector/onnx
1517+
```
1518+
1519+
The binary is copied to `~/.local/bin/codegraph` (honoring `CODEGRAPH_INSTALL_DIR` if you set it). Make sure that directory is on your `PATH`:
1520+
1521+
```bash
1522+
export PATH="$HOME/.local/bin:$PATH"
1523+
```
1524+
1525+
### 3. (Optional) Keep a local copy of the release binary
1526+
1527+
If you prefer to run it from the repo, grab the compiled binary and point `CODEGRAPH_BIN` at it:
1528+
1529+
```bash
1530+
cp target/release/codegraph dist/codegraph
1531+
export CODEGRAPH_BIN="$(pwd)/dist/codegraph"
1532+
```
1533+
1534+
### 4. Verify the MCP tools
1535+
1536+
```bash
1537+
export NOTIFY_POLLING=true # avoid macOS FSEvents issues
1538+
python3 test_mcp_tools.py # exercises all MCP tools
1539+
```
1540+
1541+
You should see the MCP handshake negotiate `protocolVersion: "2025-06-18"` and each tool (including `code_documentation`) return structured JSON.

crates/codegraph-mcp/src/config_manager.rs

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ pub struct QwenModelConfig {
4848
pub max_tokens: usize,
4949
pub temperature: f32,
5050
pub timeout_seconds: u64,
51+
pub connect_timeout_ms: u64,
5152
pub enable_caching: bool,
5253
}
5354

@@ -115,9 +116,10 @@ impl Default for QwenModelConfig {
115116
model_name: "qwen2.5-coder-14b-128k".to_string(),
116117
base_url: "http://localhost:11434".to_string(),
117118
context_window: 128000,
118-
max_tokens: 8192,
119+
max_tokens: 1024,
119120
temperature: 0.1,
120-
timeout_seconds: 90,
121+
timeout_seconds: 180,
122+
connect_timeout_ms: 5_000,
121123
enable_caching: true,
122124
}
123125
}
@@ -251,13 +253,34 @@ impl ConfigManager {
251253
}
252254
}
253255

256+
if let Ok(max_tokens) = env::var("CODEGRAPH_QWEN_MAX_TOKENS") {
257+
if let Ok(tokens) = max_tokens.parse::<usize>() {
258+
config.qwen.max_tokens = tokens;
259+
debug!("Override Qwen max tokens from environment");
260+
}
261+
}
262+
254263
if let Ok(temperature) = env::var("CODEGRAPH_TEMPERATURE") {
255264
if let Ok(temp_num) = temperature.parse::<f32>() {
256265
config.qwen.temperature = temp_num;
257266
debug!("Override temperature from environment");
258267
}
259268
}
260269

270+
if let Ok(timeout) = env::var("CODEGRAPH_QWEN_TIMEOUT_SECS") {
271+
if let Ok(secs) = timeout.parse::<u64>() {
272+
config.qwen.timeout_seconds = secs;
273+
debug!("Override Qwen request timeout from environment");
274+
}
275+
}
276+
277+
if let Ok(connect_timeout) = env::var("CODEGRAPH_QWEN_CONNECT_TIMEOUT_MS") {
278+
if let Ok(ms) = connect_timeout.parse::<u64>() {
279+
config.qwen.connect_timeout_ms = ms;
280+
debug!("Override Qwen connect timeout from environment");
281+
}
282+
}
283+
261284
// Cache configuration
262285
if let Ok(cache_size) = env::var("CODEGRAPH_CACHE_SIZE") {
263286
if let Ok(size_num) = cache_size.parse::<usize>() {
@@ -586,6 +609,9 @@ impl ConfigManager {
586609
CODEGRAPH_MODEL=qwen2.5-coder-14b-128k # Ollama model name\n\
587610
CODEGRAPH_OLLAMA_URL=http://localhost:11434 # Ollama server URL\n\
588611
CODEGRAPH_CONTEXT_WINDOW=128000 # Context window size\n\
612+
CODEGRAPH_QWEN_MAX_TOKENS=1024 # Max completion tokens (0 disables limit)\n\
613+
CODEGRAPH_QWEN_TIMEOUT_SECS=180 # Request timeout before falling back (0 disables)\n\
614+
CODEGRAPH_QWEN_CONNECT_TIMEOUT_MS=5000 # Connection timeout to Ollama\n\
589615
CODEGRAPH_TEMPERATURE=0.1 # Generation temperature\n\n\
590616
## Cache Configuration\n\
591617
CODEGRAPH_CACHE_SIZE=1000 # Maximum cache entries\n\
@@ -613,14 +639,16 @@ impl ConfigManager {
613639
#[cfg(feature = "qwen-integration")]
614640
impl From<CodeGraphConfig> for crate::qwen::QwenConfig {
615641
fn from(config: CodeGraphConfig) -> Self {
616-
Self {
617-
model_name: config.qwen.model_name,
618-
base_url: config.qwen.base_url,
619-
context_window: config.qwen.context_window,
620-
max_tokens: config.qwen.max_tokens,
621-
temperature: config.qwen.temperature,
622-
timeout: Duration::from_secs(config.qwen.timeout_seconds),
623-
}
642+
let mut cfg = crate::qwen::QwenConfig::default();
643+
cfg.model_name = config.qwen.model_name;
644+
cfg.base_url = config.qwen.base_url;
645+
cfg.context_window = config.qwen.context_window;
646+
cfg.max_tokens = config.qwen.max_tokens;
647+
cfg.temperature = config.qwen.temperature;
648+
cfg.request_timeout = (config.qwen.timeout_seconds > 0)
649+
.then(|| Duration::from_secs(config.qwen.timeout_seconds));
650+
cfg.connect_timeout = Duration::from_millis(config.qwen.connect_timeout_ms.max(1));
651+
cfg
624652
}
625653
}
626654

crates/codegraph-mcp/src/official_server.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,11 @@ impl CodeGraphMCPServer {
681681
) -> Result<CallToolResult, McpError> {
682682
let request = params.0;
683683

684+
eprintln!(
685+
"📘 Generating documentation for '{}' (style: {})",
686+
request.target_name, request.style
687+
);
688+
684689
// Use the existing graph database from the server
685690
let config = codegraph_ai::rag::engine::RAGEngineConfig {
686691
max_results: 15, // More context for comprehensive documentation
@@ -710,6 +715,9 @@ impl CodeGraphMCPServer {
710715

711716
// First, gather RAG insights so we always have citations/fallback ready
712717
let rag_result = rag_engine.answer(&doc_query).await;
718+
if rag_result.is_ok() {
719+
eprintln!("🗂 RAG context assembled for '{}'", request.target_name);
720+
}
713721
let citations: Vec<serde_json::Value> = rag_result
714722
.as_ref()
715723
.ok()
@@ -731,14 +739,20 @@ impl CodeGraphMCPServer {
731739

732740
// If Qwen is available, use it for richer documentation synthesis
733741
if let Some(ref qwen_client) = server_state.qwen_client {
734-
let context_limit = ((qwen_client.config.context_window as f32) * 0.6) as usize;
742+
let raw_limit = ((qwen_client.config.context_window as f32) * 0.5) as usize;
743+
let context_limit = raw_limit.clamp(2048, 65536);
735744
if let Ok(context) = crate::server::build_comprehensive_context(
736745
&server_state,
737746
&doc_query,
738747
context_limit.max(1024),
739748
)
740749
.await
741750
{
751+
eprintln!(
752+
"🤖 Qwen synthesis in progress for '{}' (context ~{} chars)",
753+
request.target_name,
754+
context.len()
755+
);
742756
match qwen_client.analyze_codebase(&doc_query, &context).await {
743757
Ok(doc_result) => {
744758
let response = serde_json::json!({
@@ -759,6 +773,12 @@ impl CodeGraphMCPServer {
759773
}
760774
});
761775

776+
eprintln!(
777+
"✅ Qwen documentation ready for '{}' in {}ms",
778+
request.target_name,
779+
doc_result.processing_time.as_millis()
780+
);
781+
762782
return Ok(CallToolResult::success(vec![Content::text(
763783
serde_json::to_string_pretty(&response).unwrap_or_else(|_| {
764784
"Error formatting documentation response".to_string()

crates/codegraph-mcp/src/qwen.rs

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use codegraph_core::{CodeGraphError, Result};
2-
use reqwest::Client;
2+
use reqwest::{Client, ClientBuilder};
33
use serde::{Deserialize, Serialize};
44
use std::time::{Duration, Instant};
55
use tracing::{debug, info, warn};
@@ -12,20 +12,39 @@ pub struct QwenConfig {
1212
pub context_window: usize,
1313
pub max_tokens: usize,
1414
pub temperature: f32,
15-
pub timeout: Duration,
15+
pub request_timeout: Option<Duration>,
16+
pub connect_timeout: Duration,
1617
}
1718

1819
impl Default for QwenConfig {
1920
fn default() -> Self {
21+
let request_timeout = std::env::var("CODEGRAPH_QWEN_TIMEOUT_SECS")
22+
.ok()
23+
.and_then(|raw| raw.parse::<u64>().ok())
24+
.and_then(|secs| (secs > 0).then(|| Duration::from_secs(secs)));
25+
26+
let connect_timeout = std::env::var("CODEGRAPH_QWEN_CONNECT_TIMEOUT_MS")
27+
.ok()
28+
.and_then(|raw| raw.parse::<u64>().ok())
29+
.map(Duration::from_millis)
30+
.unwrap_or_else(|| Duration::from_secs(5));
31+
32+
let max_tokens = std::env::var("CODEGRAPH_QWEN_MAX_TOKENS")
33+
.ok()
34+
.and_then(|raw| raw.parse::<usize>().ok())
35+
.filter(|value| *value > 0)
36+
.unwrap_or(1024);
37+
2038
Self {
2139
model_name: std::env::var("CODEGRAPH_MODEL").unwrap_or_else(|_| {
2240
"hf.co/unsloth/Qwen2.5-Coder-14B-Instruct-128K-GGUF:Q4_K_M".to_string()
2341
}),
2442
base_url: "http://localhost:11434".to_string(),
2543
context_window: 128000,
26-
max_tokens: 8192,
44+
max_tokens,
2745
temperature: 0.1,
28-
timeout: Duration::from_secs(90),
46+
request_timeout,
47+
connect_timeout,
2948
}
3049
}
3150
}
@@ -85,10 +104,20 @@ pub struct QwenClient {
85104

86105
impl QwenClient {
87106
pub fn new(config: QwenConfig) -> Self {
88-
Self {
89-
client: Client::new(),
90-
config,
107+
let mut builder = ClientBuilder::new()
108+
.pool_idle_timeout(None)
109+
.tcp_keepalive(Some(Duration::from_secs(30)))
110+
.connect_timeout(config.connect_timeout);
111+
112+
if let Some(timeout) = config.request_timeout {
113+
builder = builder.timeout(timeout);
114+
} else {
115+
builder = builder.timeout(None);
91116
}
117+
118+
let client = builder.build().expect("Failed to build Qwen HTTP client");
119+
120+
Self { client, config }
92121
}
93122

94123
/// Generate semantic analysis using Qwen2.5-Coder with optimized prompts
@@ -126,6 +155,11 @@ impl QwenClient {
126155
self.config.context_window
127156
);
128157

158+
eprintln!(
159+
"Qwen documentation request started (max_tokens={} timeout={:?})",
160+
self.config.max_tokens, self.config.request_timeout
161+
);
162+
129163
let response = self
130164
.client
131165
.post(&format!("{}/api/chat", self.config.base_url))

0 commit comments

Comments
 (0)