Skip to content

Commit ad7b13d

Browse files
committed
feat: Add read-only mode for CodeGraph to enhance performance during queries
1 parent 1316b11 commit ad7b13d

File tree

5 files changed

+116
-5
lines changed

5 files changed

+116
-5
lines changed

README.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,26 @@
2424

2525
## 🎯 Overview
2626

27-
CodeGraph is a powerful CLI tool that combines MCP (Model Context Protocol) server management with sophisticated code analysis capabilities. It provides a unified interface for indexing projects, managing embeddings, and running MCP servers with multiple transport options.
27+
CodeGraph is a powerful CLI tool that combines MCP (Model Context Protocol) server management with sophisticated code analysis capabilities. It provides a unified interface for indexing projects, managing embeddings, and running MCP servers with multiple transport options. All you now need is an Agent(s) to create your very own deep code and project knowledge synthehizer system!
2828

2929
### Key Capabilities
3030

3131
- **🔍 Advanced Code Analysis**: Parse and analyze code across multiple languages using Tree-sitter
3232
- **🚄 Dual Transport Support**: Run MCP servers with STDIO, HTTP, or both simultaneously
3333
- **🎯 Vector Search**: Semantic code search using FAISS-powered vector embeddings
3434
- **📊 Graph-Based Architecture**: Navigate code relationships with RocksDB-backed graph storage
35-
- **⚡ High Performance**: Optimized for large codebases with parallel processing
35+
- **⚡ High Performance**: Optimized for large codebases with parallel processing and batched embeddings
3636
- **🔧 Flexible Configuration**: Extensive configuration options for embedding models and performance tuning
3737

38+
## RAW PERFORMANCE ✨✨✨
39+
40+
170K lines of rust code in 0.49sec! 21024 embeddings in 3:24mins! On M3 Pro 32GB Qdrant/all-MiniLM-L6-v2-onnx on CPU no Metal acceleration used!
41+
42+
```bash
43+
Parsing completed: 353/353 files, 169397 lines in 0.49s (714.5 files/s, 342852 lines/s)
44+
[00:03:24] [########################################] 21024/21024 Embeddings complete
45+
```
46+
3847
## ✨ Features
3948
4049
### Core Features

crates/codegraph-graph/src/graph.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,19 @@ impl CodeGraph {
4545
})
4646
}
4747

48+
pub fn new_read_only() -> Result<Self> {
49+
let storage = Arc::new(HighPerformanceRocksDbStorage::new_read_only("./data/graph.db")?);
50+
51+
Ok(Self {
52+
storage,
53+
node_cache: Arc::new(DashMap::with_capacity(100_000)),
54+
edge_cache: Arc::new(DashMap::with_capacity(50_000)),
55+
query_optimizer: None,
56+
query_stats: Arc::new(RwLock::new(QueryStats::default())),
57+
path_cache: Arc::new(DashMap::with_capacity(10_000)),
58+
})
59+
}
60+
4861
pub fn new_with_cache() -> Result<Self> {
4962
let cache = GraphQueryCache::new();
5063
let cache_manager = CacheManager::new(cache, Duration::from_secs(60));

crates/codegraph-graph/src/storage.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,48 @@ impl HighPerformanceRocksDbStorage {
195195
Ok(storage)
196196
}
197197

198+
pub fn new_read_only<P: AsRef<Path>>(path: P) -> Result<Self> {
199+
let mut db_opts = Options::default();
200+
// Do not create missing; expect an existing DB schema
201+
db_opts.set_compression_type(DBCompressionType::Lz4);
202+
db_opts.set_bottommost_compression_type(DBCompressionType::Zstd);
203+
db_opts.set_use_direct_reads(false);
204+
db_opts.set_use_direct_io_for_flush_and_compaction(false);
205+
db_opts.set_allow_mmap_reads(true);
206+
db_opts.set_allow_mmap_writes(false);
207+
208+
// Open existing column families in read-only mode
209+
let cf_names = vec![NODES_CF, EDGES_CF, INDICES_CF, METADATA_CF];
210+
let db = DB::open_cf_for_read_only(&db_opts, &path, cf_names, false)
211+
.map_err(|e| CodeGraphError::Database(format!("Failed to open database (read-only): {}", e)))?;
212+
213+
let batching_config = BatchingConfig::default();
214+
let db_arc = Arc::new(db);
215+
let read_cache = Arc::new(DashMap::with_capacity(100_000));
216+
let read_coalescer = ReadCoalescer::new(
217+
db_arc.clone(),
218+
NODES_CF,
219+
read_cache.clone(),
220+
batching_config.clone(),
221+
);
222+
223+
let storage = Self {
224+
db: db_arc,
225+
db_path: path.as_ref().to_path_buf(),
226+
read_cache,
227+
edge_cache: Arc::new(DashMap::with_capacity(50_000)),
228+
edge_counter: AtomicU64::new(1),
229+
memory_tables: Arc::new(RwLock::new(HashMap::new())),
230+
batching_config: batching_config.clone(),
231+
read_coalescer,
232+
};
233+
234+
// Safe to attempt reading counters in read-only mode
235+
let _ = storage.initialize_counters();
236+
237+
Ok(storage)
238+
}
239+
198240
pub(crate) fn add_node_inner(&self, node: &CodeNode) -> Result<()> {
199241
let node_id = node.id;
200242
let serializable_node = SerializableCodeNode::from(node.clone());

crates/codegraph-mcp/src/bin/codegraph.rs

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ enum Commands {
211211
#[arg(long, default_value = "512")] max_seq_len: usize,
212212
#[arg(long, help = "Remove existing .codegraph before indexing")] clean: bool,
213213
#[arg(long, default_value = "json", value_parser = clap::builder::PossibleValuesParser::new(["human","json"]))] format: String,
214+
#[arg(long, help = "Open graph in read-only mode for perf queries")] graph_readonly: bool,
214215
},
215216
#[command(about = "Serve HTTP MCP endpoint")]
216217
#[cfg(feature = "server-http")]
@@ -538,7 +539,7 @@ async fn main() -> Result<()> {
538539
}
539540
}
540541
}
541-
Commands::Perf { path, langs, warmup, trials, queries, workers, batch_size, device, max_seq_len, clean, format } => {
542+
Commands::Perf { path, langs, warmup, trials, queries, workers, batch_size, device, max_seq_len, clean, format, graph_readonly } => {
542543
handle_perf(
543544
path,
544545
langs,
@@ -551,6 +552,7 @@ async fn main() -> Result<()> {
551552
max_seq_len,
552553
clean,
553554
format,
555+
graph_readonly,
554556
)
555557
.await?;
556558
}
@@ -1171,6 +1173,7 @@ async fn handle_perf(
11711173
max_seq_len: usize,
11721174
clean: bool,
11731175
format: String,
1176+
graph_readonly: bool,
11741177
) -> Result<()> {
11751178
use std::time::Instant;
11761179
use serde_json::json;
@@ -1197,6 +1200,10 @@ async fn handle_perf(
11971200
let t0 = Instant::now();
11981201
let stats = indexer.index_project(&path).await?;
11991202
let indexing_secs = t0.elapsed().as_secs_f64();
1203+
// Release RocksDB handle before running queries (which open their own graph handles)
1204+
drop(indexer);
1205+
// Give RocksDB a brief moment to release OS locks
1206+
tokio::time::sleep(std::time::Duration::from_millis(75)).await;
12001207

12011208
let qset = if let Some(q) = queries {
12021209
q
@@ -1236,7 +1243,30 @@ async fn handle_perf(
12361243
latencies_ms.iter().sum::<f64>() / latencies_ms.len() as f64
12371244
};
12381245

1239-
let graph = codegraph_graph::CodeGraph::new()?;
1246+
// Open graph with small retry to avoid transient lock contention
1247+
let graph = {
1248+
use std::time::Duration;
1249+
let mut attempts = 0;
1250+
loop {
1251+
let open_res = if graph_readonly {
1252+
codegraph_graph::CodeGraph::new_read_only()
1253+
} else {
1254+
codegraph_graph::CodeGraph::new()
1255+
};
1256+
match open_res {
1257+
Ok(g) => break g,
1258+
Err(e) => {
1259+
let msg = e.to_string();
1260+
if msg.contains("LOCK") && attempts < 10 {
1261+
tokio::time::sleep(Duration::from_millis(50)).await;
1262+
attempts += 1;
1263+
continue;
1264+
}
1265+
return Err(e.into());
1266+
}
1267+
}
1268+
}
1269+
};
12401270
let mut any_node: Option<codegraph_core::NodeId> = None;
12411271
if let Ok(ids_raw) = std::fs::read_to_string(".codegraph/faiss_ids.json") {
12421272
if let Ok(ids) = serde_json::from_str::<Vec<codegraph_core::NodeId>>(&ids_raw) {

crates/codegraph-mcp/src/server.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,24 @@ pub async fn bin_search_with_scores(
299299
scored.dedup_by_key(|(id, _)| *id);
300300
let top: Vec<(codegraph_core::NodeId, f32)> = scored.into_iter().take(limit).collect();
301301

302-
let graph = codegraph_graph::CodeGraph::new()?;
302+
let graph = {
303+
use std::time::Duration;
304+
let mut attempts = 0;
305+
loop {
306+
match codegraph_graph::CodeGraph::new_read_only() {
307+
Ok(g) => break g,
308+
Err(e) => {
309+
let msg = e.to_string();
310+
if msg.contains("LOCK") && attempts < 10 {
311+
tokio::time::sleep(Duration::from_millis(50)).await;
312+
attempts += 1;
313+
continue;
314+
}
315+
return Err(e.into());
316+
}
317+
}
318+
}
319+
};
303320
let mut out = Vec::new();
304321
for (id, score) in top {
305322
if let Some(node) = graph.get_node(id).await? {

0 commit comments

Comments
 (0)