Skip to content

Commit 720c810

Browse files
committed
feat: Add SurrealDB as selectable database backend with flexible schema
This commit adds comprehensive SurrealDB support as an alternative to RocksDB: ## Configuration System - Added DatabaseBackend enum to select between RocksDB and SurrealDB - Created SurrealDbConfig with support for: - Multiple connection types (file://, mem://, http://, ws://) - Namespace and database selection for multi-tenancy - Optional authentication (username/password) - Strict mode for schema enforcement - Auto-migration on startup - Updated Settings struct to use new DatabaseConfig structure - Maintained backward compatibility with legacy rocksdb config ## Storage Implementation (surrealdb_storage.rs) - Implemented GraphStore trait for full CRUD operations - Features: - In-memory caching with DashMap for performance - Flexible JSON-based node representation - Automatic schema initialization - Built-in migration runner - Conversion between CodeNode and SurrealDB format - Support for embeddings, metadata, and all node attributes ## Schema Manager (surrealdb_schema.rs) - Flexible schema definition system with: - TableSchema, FieldDefinition, and IndexDefinition types - Type-safe field type mappings to SurrealDB types - Dynamic field and index addition without downtime - Schema export/import functionality (JSON format) - Helper functions for standard node/edge schemas - Designed for easy schema evolution and modifications ## Migration System (surrealdb_migrations.rs) - Versioned migration framework with: - UP/DOWN migration support for rollbacks - Migration checksum verification for integrity - Migration status tracking and reporting - Automatic migration application - Template generation for new migrations - Includes default migrations for initial schema - Migration files stored in migrations/ directory ## Feature Flag & Dependencies - Added 'surrealdb' feature flag to Cargo.toml - Optional dependency on surrealdb v2.2 - Conditional compilation for zero overhead when not used ## Documentation & Examples - Comprehensive SURREALDB_GUIDE.md covering: - Installation and configuration - Schema management best practices - Migration creation and management - Usage examples and advanced queries - Performance optimization tips - Migration path from RocksDB - Troubleshooting guide - Example configuration file (surrealdb_example.toml) - Updated default.toml with new database structure ## Migration SQL - Initial schema migration (001_initial_schema.sql) - Creates nodes, edges, schema_versions, and metadata tables - Includes all necessary indexes for performance This implementation is designed to be: - Flexible: Easy to add new fields without migrations - Maintainable: Clear migration system for schema evolution - Production-ready: Strict mode, authentication, and validation - Performant: Caching, indexes, and batch operations - Well-documented: Comprehensive guide and examples
1 parent 03df311 commit 720c810

File tree

10 files changed

+2186
-3
lines changed

10 files changed

+2186
-3
lines changed

config/default.toml

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,29 @@ env = "development"
44
host = "0.0.0.0"
55
port = 3000
66

7-
[rocksdb]
7+
# Database configuration
8+
[database]
9+
# Backend options: "rocksdb" (default), "surrealdb"
10+
backend = "rocksdb"
11+
12+
[database.rocksdb]
813
path = "data/graph.db"
914
read_only = false
1015

16+
[database.surrealdb]
17+
# Example SurrealDB configuration (uncomment to use)
18+
# connection = "file://data/surrealdb/graph.db"
19+
# namespace = "codegraph"
20+
# database = "graph"
21+
# auto_migrate = true
22+
# strict_mode = false
23+
24+
# Deprecated: Legacy rocksdb configuration (use database.rocksdb instead)
25+
# This is kept for backward compatibility
26+
# [rocksdb]
27+
# path = "data/graph.db"
28+
# read_only = false
29+
1130
[vector]
1231
dimension = 384
1332
index = "ivf_flat"

config/surrealdb_example.toml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# CodeGraph Configuration with SurrealDB
2+
3+
[database]
4+
# Select SurrealDB as the database backend
5+
backend = "surrealdb"
6+
7+
# RocksDB configuration (not used when backend is surrealdb, but kept for backward compatibility)
8+
[database.rocksdb]
9+
path = "data/graph.db"
10+
read_only = false
11+
12+
# SurrealDB configuration
13+
[database.surrealdb]
14+
# Connection string options:
15+
# - Local file: "file://data/graph.db"
16+
# - Memory (testing): "mem://"
17+
# - Remote HTTP: "http://localhost:8000"
18+
# - Remote HTTPS: "https://example.com:8000"
19+
# - WebSocket: "ws://localhost:8000"
20+
connection = "file://data/surrealdb/graph.db"
21+
22+
# Namespace for multi-tenancy (default: "codegraph")
23+
namespace = "codegraph"
24+
25+
# Database name (default: "graph")
26+
database = "graph"
27+
28+
# Optional: Authentication credentials
29+
# username = "root"
30+
# password = "root" # Can also be set via CODEGRAPH__DATABASE__SURREALDB__PASSWORD env var
31+
32+
# Enable strict schema validation (default: false)
33+
# When true, SurrealDB will enforce the defined schema strictly
34+
# When false, allows for schema flexibility and easier migrations
35+
strict_mode = false
36+
37+
# Auto-apply migrations on startup (default: true)
38+
# When true, automatically runs pending migrations when connecting
39+
auto_migrate = true
40+
41+
# Example: Remote SurrealDB server configuration
42+
# [database.surrealdb]
43+
# connection = "https://your-surrealdb-server.com:8000"
44+
# namespace = "production"
45+
# database = "codegraph"
46+
# username = "admin"
47+
# # Password should be set via environment variable:
48+
# # CODEGRAPH__DATABASE__SURREALDB__PASSWORD=your_password
49+
# strict_mode = true
50+
# auto_migrate = false
51+
52+
[server]
53+
host = "0.0.0.0"
54+
port = 3000
55+
56+
[vector]
57+
dimension = 1024
58+
59+
[logging]
60+
level = "info"
61+
62+
[security]
63+
require_auth = false
64+
rate_limit_per_minute = 1200

crates/codegraph-core/src/config.rs

Lines changed: 121 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,96 @@ impl Default for RocksDbConfig {
5050
}
5151
}
5252

53+
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
54+
pub struct SurrealDbConfig {
55+
/// Connection string for SurrealDB (e.g., "file://data/graph.db" or "http://localhost:8000")
56+
pub connection: String,
57+
/// Namespace for multi-tenancy
58+
#[serde(default = "SurrealDbConfig::default_namespace")]
59+
pub namespace: String,
60+
/// Database name
61+
#[serde(default = "SurrealDbConfig::default_database")]
62+
pub database: String,
63+
/// Optional username for authentication
64+
#[serde(default)]
65+
pub username: Option<String>,
66+
/// Optional password for authentication
67+
#[serde(default, skip_serializing)]
68+
#[schemars(skip)]
69+
pub password: Option<SecretString>,
70+
/// Enable strict schema validation
71+
#[serde(default = "SurrealDbConfig::default_strict_mode")]
72+
pub strict_mode: bool,
73+
/// Auto-apply migrations on startup
74+
#[serde(default = "SurrealDbConfig::default_auto_migrate")]
75+
pub auto_migrate: bool,
76+
}
77+
78+
impl SurrealDbConfig {
79+
fn default_namespace() -> String {
80+
"codegraph".to_string()
81+
}
82+
83+
fn default_database() -> String {
84+
"graph".to_string()
85+
}
86+
87+
fn default_strict_mode() -> bool {
88+
false
89+
}
90+
91+
fn default_auto_migrate() -> bool {
92+
true
93+
}
94+
}
95+
96+
impl Default for SurrealDbConfig {
97+
fn default() -> Self {
98+
Self {
99+
connection: "file://data/graph.db".into(),
100+
namespace: Self::default_namespace(),
101+
database: Self::default_database(),
102+
username: None,
103+
password: None,
104+
strict_mode: Self::default_strict_mode(),
105+
auto_migrate: Self::default_auto_migrate(),
106+
}
107+
}
108+
}
109+
110+
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
111+
#[serde(rename_all = "snake_case")]
112+
pub enum DatabaseBackend {
113+
RocksDb,
114+
SurrealDb,
115+
}
116+
117+
impl Default for DatabaseBackend {
118+
fn default() -> Self {
119+
Self::RocksDb
120+
}
121+
}
122+
123+
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
124+
pub struct DatabaseConfig {
125+
#[serde(default)]
126+
pub backend: DatabaseBackend,
127+
#[serde(default)]
128+
pub rocksdb: RocksDbConfig,
129+
#[serde(default)]
130+
pub surrealdb: SurrealDbConfig,
131+
}
132+
133+
impl Default for DatabaseConfig {
134+
fn default() -> Self {
135+
Self {
136+
backend: DatabaseBackend::default(),
137+
rocksdb: RocksDbConfig::default(),
138+
surrealdb: SurrealDbConfig::default(),
139+
}
140+
}
141+
}
142+
53143
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
54144
pub struct VectorConfig {
55145
pub dimension: usize,
@@ -118,7 +208,10 @@ pub struct Settings {
118208
#[serde(default)]
119209
pub server: ServerConfig,
120210
#[serde(default)]
121-
pub rocksdb: RocksDbConfig,
211+
pub database: DatabaseConfig,
212+
/// Deprecated: Use database.rocksdb instead
213+
#[serde(default, skip_serializing_if = "Option::is_none")]
214+
pub rocksdb: Option<RocksDbConfig>,
122215
#[serde(default)]
123216
pub vector: VectorConfig,
124217
#[serde(default)]
@@ -134,7 +227,8 @@ impl Default for Settings {
134227
Self {
135228
env: Self::default_env(),
136229
server: ServerConfig::default(),
137-
rocksdb: RocksDbConfig::default(),
230+
database: DatabaseConfig::default(),
231+
rocksdb: None,
138232
vector: VectorConfig::default(),
139233
logging: LoggingConfig::default(),
140234
security: SecurityConfig::default(),
@@ -161,6 +255,31 @@ impl Settings {
161255
self.vector.dimension > 0 && self.vector.dimension <= 8192,
162256
"vector.dimension must be 1..=8192"
163257
);
258+
259+
// Validate database configuration
260+
match self.database.backend {
261+
DatabaseBackend::RocksDb => {
262+
anyhow::ensure!(
263+
!self.database.rocksdb.path.is_empty(),
264+
"database.rocksdb.path cannot be empty"
265+
);
266+
}
267+
DatabaseBackend::SurrealDb => {
268+
anyhow::ensure!(
269+
!self.database.surrealdb.connection.is_empty(),
270+
"database.surrealdb.connection cannot be empty"
271+
);
272+
anyhow::ensure!(
273+
!self.database.surrealdb.namespace.is_empty(),
274+
"database.surrealdb.namespace cannot be empty"
275+
);
276+
anyhow::ensure!(
277+
!self.database.surrealdb.database.is_empty(),
278+
"database.surrealdb.database cannot be empty"
279+
);
280+
}
281+
}
282+
164283
Ok(())
165284
}
166285
}

crates/codegraph-graph/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,13 @@ num_cpus = "1.16"
3131
crossbeam-channel = "0.5"
3232
notify = { workspace = true }
3333

34+
# SurrealDB support (optional)
35+
surrealdb = { version = "2.2", optional = true }
36+
37+
[features]
38+
default = []
39+
surrealdb = ["dep:surrealdb"]
40+
3441
[dev-dependencies]
3542
tokio-test = { workspace = true }
3643
tempfile = { workspace = true }
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
-- Migration 001: Initial Schema
2+
-- This migration creates the core tables for CodeGraph with SurrealDB
3+
4+
-- Nodes table: Stores code entities (functions, classes, variables, etc.)
5+
DEFINE TABLE IF NOT EXISTS nodes SCHEMAFULL;
6+
DEFINE FIELD IF NOT EXISTS id ON TABLE nodes TYPE string;
7+
DEFINE FIELD IF NOT EXISTS name ON TABLE nodes TYPE string;
8+
DEFINE FIELD IF NOT EXISTS node_type ON TABLE nodes TYPE option<string>;
9+
DEFINE FIELD IF NOT EXISTS language ON TABLE nodes TYPE option<string>;
10+
DEFINE FIELD IF NOT EXISTS content ON TABLE nodes TYPE option<string>;
11+
DEFINE FIELD IF NOT EXISTS file_path ON TABLE nodes TYPE option<string>;
12+
DEFINE FIELD IF NOT EXISTS start_line ON TABLE nodes TYPE option<number>;
13+
DEFINE FIELD IF NOT EXISTS end_line ON TABLE nodes TYPE option<number>;
14+
DEFINE FIELD IF NOT EXISTS embedding ON TABLE nodes TYPE option<array<float>>;
15+
DEFINE FIELD IF NOT EXISTS complexity ON TABLE nodes TYPE option<float>;
16+
DEFINE FIELD IF NOT EXISTS metadata ON TABLE nodes TYPE option<object>;
17+
DEFINE FIELD IF NOT EXISTS created_at ON TABLE nodes TYPE datetime DEFAULT time::now();
18+
DEFINE FIELD IF NOT EXISTS updated_at ON TABLE nodes TYPE datetime DEFAULT time::now();
19+
20+
-- Indexes for efficient queries on nodes
21+
DEFINE INDEX IF NOT EXISTS idx_nodes_id ON TABLE nodes COLUMNS id UNIQUE;
22+
DEFINE INDEX IF NOT EXISTS idx_nodes_name ON TABLE nodes COLUMNS name;
23+
DEFINE INDEX IF NOT EXISTS idx_nodes_type ON TABLE nodes COLUMNS node_type;
24+
DEFINE INDEX IF NOT EXISTS idx_nodes_language ON TABLE nodes COLUMNS language;
25+
DEFINE INDEX IF NOT EXISTS idx_nodes_file_path ON TABLE nodes COLUMNS file_path;
26+
27+
-- Edges table: Stores relationships between nodes
28+
DEFINE TABLE IF NOT EXISTS edges SCHEMAFULL;
29+
DEFINE FIELD IF NOT EXISTS id ON TABLE edges TYPE string;
30+
DEFINE FIELD IF NOT EXISTS from ON TABLE edges TYPE record(nodes);
31+
DEFINE FIELD IF NOT EXISTS to ON TABLE edges TYPE record(nodes);
32+
DEFINE FIELD IF NOT EXISTS edge_type ON TABLE edges TYPE string;
33+
DEFINE FIELD IF NOT EXISTS weight ON TABLE edges TYPE float DEFAULT 1.0;
34+
DEFINE FIELD IF NOT EXISTS metadata ON TABLE edges TYPE option<object>;
35+
DEFINE FIELD IF NOT EXISTS created_at ON TABLE edges TYPE datetime DEFAULT time::now();
36+
37+
-- Indexes for graph traversal on edges
38+
DEFINE INDEX IF NOT EXISTS idx_edges_from ON TABLE edges COLUMNS from;
39+
DEFINE INDEX IF NOT EXISTS idx_edges_to ON TABLE edges COLUMNS to;
40+
DEFINE INDEX IF NOT EXISTS idx_edges_type ON TABLE edges COLUMNS edge_type;
41+
42+
-- Schema versions table: Tracks applied migrations
43+
DEFINE TABLE IF NOT EXISTS schema_versions SCHEMAFULL;
44+
DEFINE FIELD IF NOT EXISTS version ON TABLE schema_versions TYPE number;
45+
DEFINE FIELD IF NOT EXISTS name ON TABLE schema_versions TYPE string;
46+
DEFINE FIELD IF NOT EXISTS applied_at ON TABLE schema_versions TYPE datetime DEFAULT time::now();
47+
DEFINE FIELD IF NOT EXISTS checksum ON TABLE schema_versions TYPE string;
48+
49+
DEFINE INDEX IF NOT EXISTS idx_schema_version ON TABLE schema_versions COLUMNS version UNIQUE;
50+
51+
-- Metadata table: Stores system-level metadata
52+
DEFINE TABLE IF NOT EXISTS metadata SCHEMAFULL;
53+
DEFINE FIELD IF NOT EXISTS key ON TABLE metadata TYPE string;
54+
DEFINE FIELD IF NOT EXISTS value ON TABLE metadata TYPE option<string | number | bool | object | array>;
55+
DEFINE FIELD IF NOT EXISTS updated_at ON TABLE metadata TYPE datetime DEFAULT time::now();
56+
57+
DEFINE INDEX IF NOT EXISTS idx_metadata_key ON TABLE metadata COLUMNS key UNIQUE;

crates/codegraph-graph/src/lib.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ pub mod traversal;
1919
pub mod update_scheduler;
2020
pub mod versioned_storage;
2121

22+
#[cfg(feature = "surrealdb")]
23+
pub mod surrealdb_storage;
24+
#[cfg(feature = "surrealdb")]
25+
pub mod surrealdb_schema;
26+
#[cfg(feature = "surrealdb")]
27+
pub mod surrealdb_migrations;
28+
2229
pub use cache::*;
2330
pub use delta::*;
2431
pub use delta_processor::*;
@@ -39,3 +46,10 @@ pub use transactional_graph::*;
3946
pub use traversal::*;
4047
pub use update_scheduler::*;
4148
pub use versioned_storage::*;
49+
50+
#[cfg(feature = "surrealdb")]
51+
pub use surrealdb_storage::*;
52+
#[cfg(feature = "surrealdb")]
53+
pub use surrealdb_schema::*;
54+
#[cfg(feature = "surrealdb")]
55+
pub use surrealdb_migrations::*;

0 commit comments

Comments
 (0)