diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c6ba021..d44b732 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -42,19 +42,19 @@ jobs:
           components: clippy
 
       - name: Cache cargo registry
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cargo/registry
           key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Cache cargo index
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cargo/git
           key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Cache cargo build
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: target
           key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }}
@@ -85,19 +85,19 @@ jobs:
           toolchain: ${{ matrix.rust }}
 
       - name: Cache cargo registry
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cargo/registry
           key: ${{ runner.os }}-${{ matrix.rust }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Cache cargo index
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cargo/git
           key: ${{ runner.os }}-${{ matrix.rust }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Cache cargo build
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: target
           key: ${{ runner.os }}-${{ matrix.rust }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }}
@@ -112,29 +112,6 @@ jobs:
   integration-test:
     name: Integration Tests
     runs-on: ubuntu-latest
-    services:
-      oxigraph:
-        image: oxigraph/oxigraph:latest
-        ports:
-          - 7878:7878
-        options: >-
-          --health-cmd "curl -f http://localhost:7878/query || exit 1"
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-
-      fuseki:
-        image: stain/jena-fuseki:latest
-        ports:
-          - 3030:3030
-        env:
-          ADMIN_PASSWORD: admin
-          JVM_ARGS: "-Xmx2g"
-        options: >-
-          --health-cmd "curl -f http://localhost:3030/$/ping || exit 1"
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
 
     steps:
       - name: Checkout code
@@ -144,43 +121,25 @@ jobs:
         uses: dtolnay/rust-toolchain@stable
 
       - name: Cache cargo registry
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cargo/registry
           key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Cache cargo index
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cargo/git
           key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Cache cargo build
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: target
           key: ${{ runner.os }}-cargo-build-target-${{ hashFiles('**/Cargo.lock') }}
 
-      - name: Wait for services
-        run: |
-          echo "Waiting for Oxigraph..."
-          timeout 60 bash -c 'until curl -f http://localhost:7878/query; do sleep 2; done'
-          echo "Waiting for Fuseki..."
-          timeout 60 bash -c 'until curl -f http://localhost:3030/$/ping; do sleep 2; done'
-
-      - name: Create Fuseki test dataset
-        run: |
-          curl -X POST http://localhost:3030/$/datasets \
-            -H "Content-Type: application/x-www-form-urlencoded" \
-            -d "dbName=ds&dbType=mem" \
-            --user admin:admin || true
-
       - name: Run integration tests
         run: cargo test --test '*' --all-features --verbose
-        env:
-          OXIGRAPH_ENDPOINT: http://localhost:7878
-          JENA_ENDPOINT: http://localhost:3030
-          JENA_DATASET: ds
 
   # Code coverage
   coverage:
@@ -199,13 +158,13 @@ jobs:
         uses: taiki-e/install-action@cargo-llvm-cov
 
       - name: Cache cargo registry
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cargo/registry
           key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Cache cargo index
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cargo/git
           key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
@@ -246,19 +205,19 @@ jobs:
           targets: ${{ matrix.target }}
 
       - name: Cache cargo registry
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cargo/registry
           key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Cache cargo index
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cargo/git
           key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Cache cargo build
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: target
           key: ${{ runner.os }}-cargo-build-target-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }}
@@ -267,7 +226,7 @@ jobs:
         run: cargo build --release --target ${{ matrix.target }} --verbose
 
       - name: Upload build artifacts
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: janus-${{ matrix.target }}
           path: |
@@ -290,18 +249,6 @@ jobs:
       - name: Run security audit
         run: cargo audit
 
-  # Dependency review (for PRs)
-  dependency-review:
-    name: Dependency Review
-    runs-on: ubuntu-latest
-    if: github.event_name == 'pull_request'
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Dependency Review
-        uses: actions/dependency-review-action@v3
-
   # Publish to crates.io (on release tags)
   publish:
     name: Publish to crates.io
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b10a4dd..4468e96 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -382,4 +382,4 @@ By contributing to Janus, you agree that your contributions will be licensed und
 
 ---
 
-Thank you for contributing to Janus! 🎉
\ No newline at end of file
+Thank you for contributing to Janus!
diff --git a/Cargo.toml b/Cargo.toml
index c199397..8601fa5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,6 +13,7 @@ readme = "README.md"
 [dependencies]
 regex = "1.0"
 serde = { version = "1.0", features = ["derive"] }
+bincode = "1.0"
 
 [dev-dependencies]
 
diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
index df2f77b..87b6f30 100644
--- a/GETTING_STARTED.md
+++ b/GETTING_STARTED.md
@@ -399,4 +399,4 @@ This project is licensed under the MIT License - see [LICENCE.md](LICENCE.md) fo
 
 ---
 
-Happy coding with Janus! 🚀
\ No newline at end of file
+Happy coding with Janus!
diff --git a/WRITING_BENCHMARKS.md b/WRITING_BENCHMARKS.md
new file mode 100644
index 0000000..98896ef
--- /dev/null
+++ b/WRITING_BENCHMARKS.md
@@ -0,0 +1,229 @@
+# Complete Guide: Testing Writing Performance for Dense vs Sparse Indexing
+
+## Overview
+
+This guide provides step-by-step instructions for testing and comparing the writing performance of Dense vs Sparse indexing approaches in the Janus RDF Stream Processing Engine.
+
+## Background
+
+Previously, the benchmarking only tested **reading performance** (querying existing indexes). Now we have comprehensive **writing performance** tests that measure:
+
+1. **Real-time indexing**: Building indexes while writing records
+2. **Batch indexing**: Writing all records first, then building indexes
+3. **Throughput comparison**: Records processed per second
+4. **Memory and storage efficiency**: Resource usage patterns
+
+## What's Been Added
+
+### New Benchmark Files
+
+1. **`write_benchmark.rs`** - Core writing performance tests
+2. **`analysis.rs`** - Advanced analysis and optimal configuration finding
+3. **`run_benchmarks.sh`** - Automated test runner script
+4. **Enhanced `README.md`** - Comprehensive documentation
+
+### Updated Configuration
+
+- Updated `Cargo.toml` with new benchmark entries
+- Added support for multiple test scenarios
+- Integrated analysis tools
+
+## Step-by-Step Testing Instructions
+
+### Step 1: Run the Complete Benchmark Suite
+
+```bash
+# Make script executable (if not already)
+chmod +x run_benchmarks.sh
+
+# Run all benchmarks
+./run_benchmarks.sh
+```
+
+This runs all three benchmark types in sequence and provides a comprehensive overview.
+
+### Step 2: Test Writing Performance Specifically
+
+```bash
+# Run only the writing performance benchmark
+cargo bench --bench write_benchmark
+```
+
+**What this tests:**
+- Real-time writing with indexing for 10K, 100K, and 1M records
+- Batch writing comparison
+- Performance ratios between dense and sparse approaches
+
+**Expected output:**
+```
+=== WRITING PERFORMANCE RESULTS ===
+Records: 100000
+Sparse interval: 1000
+
+--- Real-time Writing (Index while writing) ---
+Dense - Write time: 260.611 ms, Total time: 260.611 ms
+Sparse - Write time: 85.356 ms, Total time: 85.356 ms
+
+--- Performance Comparison ---
+Real-time: Sparse is 3.05x faster than Dense
+```
+
+### Step 3: Advanced Analysis
+
+```bash
+# Run detailed analysis
+cargo bench --bench analysis
+```
+
+**What this tests:**
+- Optimal sparse intervals (100, 500, 1000, 2000, 5000, 10000)
+- Memory usage scaling across different dataset sizes
+- Write throughput under various conditions
+
+### Step 4: Original Read Performance (For Comparison)
+
+```bash
+# Run original benchmark
+cargo bench --bench benchmark
+```
+
+**What this tests:**
+- Index building time from existing log files
+- Query performance across different ranges
+- Memory usage of indexes
+
+### Step 5: Individual Test Runs
+
+For targeted testing, you can run specific scenarios:
+
+```bash
+# Run with release optimizations for accurate timing
+cargo bench --bench write_benchmark --release
+
+# Run with specific test size (modify source code)
+# Edit the test_sizes vector in write_benchmark.rs
+```
+
+## Interpreting Results
+
+### Key Metrics to Focus On
+
+#### 1. Writing Throughput
+- **Records/second**: Higher is better
+- **Dense typically**: 300-500 records/sec for large datasets
+- **Sparse typically**: 1000-1500 records/sec for large datasets
+
+#### 2. Performance Ratios
+- **Real-time writing**: Sparse is typically 2-4x faster
+- **Batch processing**: Sparse is typically 2-3x faster
+- **Memory usage**: Sparse uses significantly less memory
+
+#### 3. Trade-offs
+- **Query speed**: Dense is typically 10-30% faster for queries
+- **Storage space**: Sparse uses 90-99% less index storage
+- **Write speed**: Sparse is 2-4x faster for writing
+
+### When to Use Each Approach
+
+#### Use Dense Indexing When:
+- Query performance is critical
+- Dataset size is manageable (< 1M records)
+- Storage space is not a constraint
+- Read-heavy workloads
+
+#### Use Sparse Indexing When:
+- High-frequency writes (streaming data)
+- Large datasets (> 1M records)
+- Storage efficiency is important
+- Write-heavy workloads
+- Real-time ingestion requirements
+
+### Sample Results Analysis
+
+```
+Real-time: Sparse is 3.05x faster than Dense
+Batch: Sparse is 2.44x faster than Dense
+```
+
+This shows:
+- Sparse indexing provides significant write performance benefits
+- The advantage is consistent across different writing patterns
+- Sparse indexing scales better with larger datasets
+
+## Customizing Tests
+
+### Modify Record Counts
+
+Edit the test sizes in `write_benchmark.rs`:
+
+```rust
+let test_sizes = vec![10_000u64, 100_000u64, 1_000_000u64, 5_000_000u64];
+```
+
+### Adjust Sparse Intervals
+
+Modify the `SPARSE_INTERVAL` constant:
+
+```rust
+const SPARSE_INTERVAL: usize = 500; // Test different intervals
+```
+
+### Add Custom Test Scenarios
+
+Create new benchmark functions following the existing patterns in the benchmark files.
+
+## Performance Optimization Tips
+
+### For Maximum Accuracy
+1. Run benchmarks on a quiet system (minimal background processes)
+2. Use release builds: `cargo bench --release`
+3. Run multiple iterations and average results
+4. Ensure consistent storage (SSD vs HDD considerations)
+
+### For Large Datasets
+1. Monitor memory usage during tests
+2. Consider disk I/O limitations
+3. Test with realistic data patterns
+4. Evaluate network storage implications
+
+## Troubleshooting
+
+### Common Issues
+
+#### Out of Memory
+- Reduce test dataset sizes
+- Monitor system memory during tests
+- Consider streaming vs batch processing
+
+#### Slow Performance
+- Ensure running in release mode
+- Check disk I/O capacity
+- Verify no other processes consuming resources
+
+#### Inconsistent Results
+- Run tests multiple times
+- Check system load
+- Ensure consistent test conditions
+
+## Next Steps
+
+### Additional Testing Ideas
+
+1. **Network Storage**: Test performance with network-attached storage
+2. **Concurrent Access**: Test multiple writers/readers simultaneously
+3. **Real-world Data**: Test with actual RDF datasets
+4. **Memory Pressure**: Test under various memory constraints
+5. **Different Hardware**: Compare SSD vs HDD performance
+
+### Integration Testing
+
+1. Test within larger application contexts
+2. Measure end-to-end pipeline performance
+3. Evaluate query pattern impacts
+4. Test with realistic data volumes and patterns
+
+## Conclusion
+
+The new writing performance benchmarks provide comprehensive insights into the trade-offs between dense and sparse indexing approaches. The results clearly show that sparse indexing provides significant advantages for write-heavy workloads while maintaining acceptable query performance.
+
+Use these tools to make informed decisions about indexing strategies based on your specific use case requirements.
diff --git a/benches/README.md b/benches/README.md
deleted file mode 100644
index a627df0..0000000
--- a/benches/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# Benchmarks
-
-This directory contains performance benchmarks for the Janus RDF Stream Processing Engine.
-
-## Running Benchmarks
-
-To run all benchmarks:
-
-```bash
-cargo bench
-```
-
-To run a specific benchmark:
-
-```bash
-cargo bench --bench <benchmark_name>
-```
-
-## Benchmark Structure
-
-Benchmarks are organized by functionality:
-
-- `query_parsing.rs` - Benchmarks for parsing RSP-QL queries
-- `stream_processing.rs` - Benchmarks for stream processing operations
-- `store_operations.rs` - Benchmarks for RDF store interactions
-- `integration.rs` - End-to-end integration benchmarks
-
-## Adding New Benchmarks
-
-To add a new benchmark:
-
-1. Create a new file in the `benches/` directory
-2. Add the benchmark to `Cargo.toml`:
-
-```toml
-[[bench]]
-name = "my_benchmark"
-harness = false
-```
-
-3. Use the `criterion` crate for benchmarking:
-
-```rust
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-
-fn benchmark_function(c: &mut Criterion) {
-    c.bench_function("my_function", |b| {
-        b.iter(|| {
-            // Code to benchmark
-            black_box(my_function())
-        });
-    });
-}
-
-criterion_group!(benches, benchmark_function);
-criterion_main!(benches);
-```
-
-## Benchmark Results
-
-Benchmark results are stored in `target/criterion/` and include:
-
-- HTML reports with graphs
-- Comparison with previous runs
-- Statistical analysis
-
-To view results, open `target/criterion/report/index.html` in a browser.
-
-## Performance Tips
-
-- Run benchmarks in release mode (default for `cargo bench`)
-- Ensure system is idle during benchmarking
-- Use consistent hardware for comparisons
-- Run multiple iterations to reduce noise
-- Use `black_box()` to prevent compiler optimizations
\ No newline at end of file
diff --git a/examples/basic.rs b/examples/basic.rs
deleted file mode 100644
index 43fc3f0..0000000
--- a/examples/basic.rs
+++ /dev/null
@@ -1,42 +0,0 @@
-//! Basic example demonstrating the Janus RDF Stream Processing Engine
-//!
-//! This example shows how to use the Janus library for basic operations.
-//!
-//! Run this example with:
-//! ```
-//! cargo run --example basic
-//! ```
-
-use janus::Result;
-
-fn main() -> Result<()> {
-    println!("=== Janus Basic Example ===\n");
-
-    println!("This is a basic example of the Janus RDF Stream Processing Engine.");
-    println!("The engine is designed to process both live and historical RDF streams.\n");
-
-    // TODO: Initialize the Janus engine
-    println!("Step 1: Initialize the engine");
-    println!("  - Configure RDF store connection");
-    println!("  - Set up stream processing pipeline\n");
-
-    // TODO: Load historical data
-    println!("Step 2: Load historical RDF data");
-    println!("  - Connect to RDF store (e.g., Oxigraph, Apache Jena)");
-    println!("  - Query historical triples\n");
-
-    // TODO: Set up live stream
-    println!("Step 3: Set up live RDF stream");
-    println!("  - Connect to stream source (e.g., Kafka, MQTT)");
-    println!("  - Register stream processors\n");
-
-    // TODO: Execute queries
-    println!("Step 4: Execute unified queries");
-    println!("  - Parse RSP-QL query");
-    println!("  - Execute over historical and live data");
-    println!("  - Return results\n");
-
-    println!("Example completed successfully!");
-
-    Ok(())
-}
diff --git a/examples/point_query_benchmark.rs b/examples/point_query_benchmark.rs
new file mode 100644
index 0000000..7424836
--- /dev/null
+++ b/examples/point_query_benchmark.rs
@@ -0,0 +1,147 @@
+use janus::storage::segmented_storage::StreamingSegmentedStorage;
+use janus::storage::util::StreamingConfig;
+use std::time::{Instant, SystemTime, UNIX_EPOCH};
+
+fn main() -> std::io::Result<()> {
+    println!("\nPoint Query Performance Benchmark");
+    println!("=====================================");
+    println!("Testing 100K and 1M datasets with 33 runs each\n");
+
+    // Test sizes: 100K and 1M quads
+    let test_sizes = vec![100_000, 1_000_000];
+    let num_runs = 33;
+    let warmup_runs = 3;
+    let outlier_runs = 2;
+
+    for &size in &test_sizes {
+        println!(
+            "Testing Point Queries for {} RDF Quads ({} runs, using middle 30)",
+            format_number(size),
+            num_runs
+        );
+        println!("{}", "=".repeat(80));
+
+        let mut point_query_times = Vec::new();
+
+        // Run benchmark multiple times
+        for run in 1..=num_runs {
+            if run % 10 == 0 || run == 1 {
+                println!("   Run {}/{}...", run, num_runs);
+            }
+
+            let point_time = run_point_query_benchmark(size, run)?;
+            point_query_times.push(point_time);
+        }
+
+        // Analyze results (middle 30 runs: exclude first 3 and last 2)
+        let start_idx = warmup_runs;
+        let end_idx = num_runs - outlier_runs;
+        let analysis_times = &point_query_times[start_idx..end_idx];
+
+        println!(
+            "\nPoint Query Results (Middle 30 runs, excluding first {} and last {} runs)",
+            warmup_runs, outlier_runs
+        );
+        println!("{}", "-".repeat(80));
+
+        analyze_and_print_point_query("Point Query Latency", analysis_times);
+        println!();
+    }
+
+    println!("Point Query Benchmark Complete!\n");
+    Ok(())
+}
+
+fn run_point_query_benchmark(size: u64, run_id: usize) -> std::io::Result<f64> {
+    // Create storage
+    let config = StreamingConfig {
+        max_batch_events: 10000,
+        max_batch_bytes: 10 * 1024 * 1024,
+        max_batch_age_seconds: 5,
+        sparse_interval: 1000,
+        entries_per_index_block: 1000,
+        segment_base_path: format!("data/point_query_benchmark_{}_{}", size, run_id),
+    };
+
+    let mut storage = StreamingSegmentedStorage::new(config.clone())?;
+    storage.start_background_flushing();
+
+    let base_timestamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_millis() as u64;
+    let mut min_timestamp = u64::MAX;
+    let mut max_timestamp = 0u64;
+
+    // Generate RDF quads with unique timestamps
+    for i in 0..size {
+        let timestamp = base_timestamp + i;
+        min_timestamp = min_timestamp.min(timestamp);
+        max_timestamp = max_timestamp.max(timestamp);
+
+        storage.write_rdf(
+            timestamp,
+            &format!("subject{}", i),
+            "predicate",
+            &format!("object{}", i),
+            "graph",
+        )?;
+    }
+
+    // Wait for all data to be flushed to disk
+    storage.shutdown()?;
+
+    // Restart storage for read-only point query
+    let storage = StreamingSegmentedStorage::new(config.clone())?;
+
+    // Point query benchmark - query for middle timestamp
+    let target_timestamp = min_timestamp + (max_timestamp - min_timestamp) / 2;
+
+    let point_start = Instant::now();
+    let _point_results = storage.query_rdf(target_timestamp, target_timestamp)?;
+    let point_duration = point_start.elapsed();
+
+    // Convert to milliseconds with microsecond precision
+    let point_time_ms = (point_duration.as_micros() as f64) / 1000.0;
+
+    // Cleanup
+    let _ = std::fs::remove_dir_all(&config.segment_base_path);
+
+    Ok(point_time_ms)
+}
+
+fn analyze_and_print_point_query(label: &str, times: &[f64]) {
+    let mut sorted_times = times.to_vec();
+    sorted_times.sort_by(|a, b| a.partial_cmp(b).unwrap());
+
+    let mean = times.iter().sum::<f64>() / times.len() as f64;
+    let median = sorted_times[times.len() / 2];
+    let min = *sorted_times.first().unwrap();
+    let max = *sorted_times.last().unwrap();
+
+    // Calculate standard deviation
+    let variance = times.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / times.len() as f64;
+    let std_dev = variance.sqrt();
+
+    println!(
+        "   {:<20}: {:.2} ms (median: {:.2}, std: {:.2}, range: {:.2} - {:.2})",
+        label, mean, median, std_dev, min, max
+    );
+
+    // Additional statistics
+    println!("   Sample Size         : {} measurements", times.len());
+    println!("   Coefficient of Var  : {:.1}%", (std_dev / mean) * 100.0);
+
+    // Percentiles
+    let p95_idx = (times.len() as f64 * 0.95) as usize;
+    let p99_idx = (times.len() as f64 * 0.99) as usize;
+    println!("   95th Percentile     : {:.2} ms", sorted_times[p95_idx.min(times.len() - 1)]);
+    println!("   99th Percentile     : {:.2} ms", sorted_times[p99_idx.min(times.len() - 1)]);
+}
+
+fn format_number(n: u64) -> String {
+    if n >= 1_000_000 {
+        format!("{:.1}M", n as f64 / 1_000_000.0)
+    } else if n >= 1_000 {
+        format!("{:.1}K", n as f64 / 1_000.0)
+    } else {
+        n.to_string()
+    }
+}
diff --git a/examples/range_query_benchmark.rs b/examples/range_query_benchmark.rs
new file mode 100644
index 0000000..6ab56e2
--- /dev/null
+++ b/examples/range_query_benchmark.rs
@@ -0,0 +1,289 @@
+use janus::storage::segmented_storage::StreamingSegmentedStorage;
+use janus::storage::util::StreamingConfig;
+use std::error::Error;
+use std::time::Instant;
+
+#[allow(clippy::manual_div_ceil)]
+#[derive(Debug)]
+struct BenchmarkResults {
+    range_10_percent_times: Vec<f64>,
+    range_50_percent_times: Vec<f64>,
+    range_100_percent_times: Vec<f64>,
+}
+
+#[allow(clippy::manual_div_ceil)]
+fn main() -> Result<(), Box<dyn Error>> {
+    println!("Realistic Range Query Benchmark by Range Size");
+    println!("================================================");
+    println!("Using realistic IoT sensor data (5 quads per observation)");
+    println!("Testing range query performance for 10%, 50%, and 100% of time range");
+    println!("Running 33 iterations, analyzing middle 30 runs\n");
+
+    let predicates = vec![
+        "http://www.w3.org/1999/02/22-rdf-syntax-ns#type".to_string(),
+        "https://saref.etsi.org/core/isMeasuredByDevice".to_string(),
+        "https://saref.etsi.org/core/relatesToProperty".to_string(),
+        "http://purl.org/dc/terms/created".to_string(),
+        "https://saref.etsi.org/core/hasValue".to_string(),
+    ];
+
+    // Test specific quad counts (each observation generates 5 quads)
+    let target_quad_counts = vec![10, 100, 1_000, 10_000, 100_000, 1_000_000];
+    let num_runs = 33;
+    let warmup_runs = 3;
+    let outlier_runs = 2;
+
+    for &quad_count in &target_quad_counts {
+        // Calculate observations needed (each generates 5 quads)
+        let observations_needed = (quad_count + 4) / 5; // Round up division
+        let actual_quads = observations_needed * 5;
+
+        println!(
+            "Testing {} target quads ({} observations → {} actual quads)",
+            quad_count, observations_needed, actual_quads
+        );
+        println!("{}", "-".repeat(70));
+
+        let mut all_results = Vec::new();
+
+        // Run benchmark multiple times
+        for run in 1..=num_runs {
+            if run % 10 == 0 || run == 1 {
+                println!("   Run {}/{}...", run, num_runs);
+            }
+
+            let result = run_range_query_benchmark(observations_needed, &predicates, run)?;
+            all_results.push(result);
+        }
+
+        // Analyze results (middle 30 runs: exclude first 3 and last 2)
+        let start_idx = warmup_runs;
+        let end_idx = num_runs - outlier_runs;
+        let analysis_results = &all_results[start_idx..end_idx];
+
+        println!(
+            "\nRange Query Results (Middle 30 runs, excluding first {} and last {} runs)",
+            warmup_runs, outlier_runs
+        );
+        println!("{}", "-".repeat(80));
+
+        // 10% range performance
+        let range_10_times: Vec<f64> =
+            analysis_results.iter().map(|r| r.range_10_percent_times[0]).collect();
+        analyze_and_print(
+            &format!("10% Range Query ({} quads)", actual_quads),
+            &range_10_times,
+            "ms",
+        );
+
+        // 50% range performance
+        let range_50_times: Vec<f64> =
+            analysis_results.iter().map(|r| r.range_50_percent_times[0]).collect();
+        analyze_and_print(
+            &format!("50% Range Query ({} quads)", actual_quads),
+            &range_50_times,
+            "ms",
+        );
+
+        // 100% range performance
+        let range_100_times: Vec<f64> =
+            analysis_results.iter().map(|r| r.range_100_percent_times[0]).collect();
+        analyze_and_print(
+            &format!("100% Range Query ({} quads)", actual_quads),
+            &range_100_times,
+            "ms",
+        );
+
+        println!();
+    }
+
+    println!("Realistic Range Query Benchmark Complete!");
+    Ok(())
+}
+
+fn run_range_query_benchmark(
+    observations: usize,
+    predicates: &[String],
+    run_id: usize,
+) -> std::io::Result<BenchmarkResults> {
+    // Create storage
+    let config = StreamingConfig {
+        max_batch_events: 10000,
+        max_batch_bytes: 10 * 1024 * 1024,
+        max_batch_age_seconds: 5,
+        sparse_interval: 1000,
+        entries_per_index_block: 1000,
+        segment_base_path: format!("data/range_query_benchmark_{}_{}", observations, run_id),
+    };
+
+    let mut storage = StreamingSegmentedStorage::new(config.clone())?;
+    storage.start_background_flushing();
+
+    let base_timestamp = std::time::SystemTime::now()
+        .duration_since(std::time::UNIX_EPOCH)
+        .unwrap()
+        .as_millis() as u64;
+
+    let mut min_timestamp = u64::MAX;
+    let mut max_timestamp = 0u64;
+
+    // Generate realistic RDF quads - each observation has 5 quads (same as main benchmark)
+    for i in 0..observations {
+        // Each observation has a unique timestamp (1ms apart)
+        let timestamp = base_timestamp + i as u64;
+        min_timestamp = min_timestamp.min(timestamp);
+        max_timestamp = max_timestamp.max(timestamp);
+
+        // Create a unique subject for each observation
+        // Format: https://dahcc.idlab.ugent.be/Protego/_participant{participant}/obs{i}
+        let subject = format!(
+            "https://dahcc.idlab.ugent.be/Protego/_participant{}/obs{}",
+            (i % 100) + 1, // Rotate through 100 participants
+            i
+        );
+
+        // Sensor data - rotating through different sensors
+        let sensor = format!(
+            "https://dahcc.idlab.ugent.be/Homelab/SensorsAndActuators/70:ee:50:67:30:{:02x}",
+            (i % 256) as u8
+        );
+
+        // Property type - rotating through different measurement types
+        let properties = [
+            "org.dyamand.types.common.AtmosphericPressure",
+            "org.dyamand.types.common.Temperature",
+            "org.dyamand.types.common.Humidity",
+            "org.dyamand.types.common.LightLevel",
+        ];
+        let property = format!(
+            "https://dahcc.idlab.ugent.be/Homelab/SensorsAndActuators/{}",
+            properties[i % 4]
+        );
+
+        // Dataset
+        let dataset = format!("https://dahcc.idlab.ugent.be/Protego/_participant{}", (i % 100) + 1);
+
+        // Create 5 quads per observation (matching your example)
+        let quads = vec![
+            (
+                subject.clone(),
+                predicates[0].clone(),
+                dataset,
+                "http://example.org/graph1".to_string(),
+            ),
+            (
+                subject.clone(),
+                predicates[1].clone(),
+                sensor,
+                "http://example.org/graph1".to_string(),
+            ),
+            (
+                subject.clone(),
+                predicates[2].clone(),
+                property,
+                "http://example.org/graph1".to_string(),
+            ),
+            (
+                subject.clone(),
+                predicates[3].clone(),
+                format!("2022-01-03T09:04:{:02}.000000", (i % 60) as u32),
+                "http://example.org/graph1".to_string(),
+            ),
+            (
+                subject.clone(),
+                predicates[4].clone(),
+                format!("{:.1}", 1000.0 + (i as f64 * 0.1) % 100.0),
+                "http://example.org/graph1".to_string(),
+            ),
+        ];
+
+        // Write all 5 quads for this observation
+        for (s, p, o, g) in quads {
+            storage.write_rdf(timestamp, &s, &p, &o, &g)?;
+        }
+    }
+
+    // Ensure all data is written before querying
+    storage.shutdown()?;
+
+    // Recreate storage for clean read-only access
+    let storage = StreamingSegmentedStorage::new(config.clone())?;
+
+    let time_range = max_timestamp - min_timestamp;
+
+    // Debug: Print timestamp range
+    if observations == 2000 {
+        println!(
+            "DEBUG 10K: min_timestamp={}, max_timestamp={}, time_range={}",
+            min_timestamp, max_timestamp, time_range
+        );
+    }
+
+    // 10% range query - query 10% of the total time range
+    let range_10_start = min_timestamp;
+    let range_10_end = min_timestamp + (time_range / 10);
+    let range_10_start_time = Instant::now();
+    let range_10_results = storage.query_rdf(range_10_start, range_10_end)?;
+    let range_10_duration = range_10_start_time.elapsed();
+    let range_10_time_ms = (range_10_duration.as_micros() as f64) / 1000.0;
+
+    // 50% range query - query 50% of the total time range
+    let range_50_start = min_timestamp;
+    let range_50_end = min_timestamp + (time_range / 2);
+    let range_50_start_time = Instant::now();
+    let range_50_results = storage.query_rdf(range_50_start, range_50_end)?;
+    let range_50_duration = range_50_start_time.elapsed();
+    let range_50_time_ms = (range_50_duration.as_micros() as f64) / 1000.0;
+
+    // 100% range query - query entire time range
+    let range_100_start = min_timestamp;
+    let range_100_end = max_timestamp;
+
+    // Debug: Print query parameters
+    if observations == 2000 {
+        println!("DEBUG 10K: 100% query from {} to {}", range_100_start, range_100_end);
+    }
+
+    let range_100_start_time = Instant::now();
+    let range_100_results = storage.query_rdf(range_100_start, range_100_end)?;
+    let range_100_duration = range_100_start_time.elapsed();
+    let range_100_time_ms = (range_100_duration.as_micros() as f64) / 1000.0;
+
+    let actual_quads = observations * 5;
+
+    // Debug: Print result counts
+    println!("DEBUG: Dataset {} quads - 10% range returned {} results, 50% range returned {} results, 100% range returned {} results",
+             actual_quads, range_10_results.len(), range_50_results.len(), range_100_results.len());
+
+    // Cleanup
+    let _ = std::fs::remove_dir_all(&config.segment_base_path);
+
+    Ok(BenchmarkResults {
+        range_10_percent_times: vec![range_10_time_ms],
+        range_50_percent_times: vec![range_50_time_ms],
+        range_100_percent_times: vec![range_100_time_ms],
+    })
+}
+
+fn analyze_and_print(label: &str, times: &[f64], unit: &str) {
+    let mut sorted_times = times.to_vec();
+    sorted_times.sort_by(|a, b| a.partial_cmp(b).unwrap());
+
+    let mean = times.iter().sum::<f64>() / times.len() as f64;
+    let median = sorted_times[times.len() / 2];
+    let min = *sorted_times.first().unwrap();
+    let max = *sorted_times.last().unwrap();
+
+    // Calculate standard deviation
+    let variance = times.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / times.len() as f64;
+    let std_dev = variance.sqrt();
+
+    // Calculate percentiles
+    let p25 = sorted_times[times.len() / 4];
+    let p75 = sorted_times[(times.len() * 3) / 4];
+
+    println!(
+        "{}: {:.2} ± {:.2} {} (median: {:.2}, range: {:.2}-{:.2}, p25: {:.2}, p75: {:.2})",
+        label, mean, std_dev, unit, median, min, max, p25, p75
+    );
+}
diff --git a/examples/realistic_rdf_benchmark.rs b/examples/realistic_rdf_benchmark.rs
new file mode 100644
index 0000000..cda886c
--- /dev/null
+++ b/examples/realistic_rdf_benchmark.rs
@@ -0,0 +1,306 @@
+use janus::storage::segmented_storage::StreamingSegmentedStorage;
+use janus::storage::util::StreamingConfig;
+use std::time::{Instant, SystemTime, UNIX_EPOCH};
+
+#[derive(Debug)]
+struct BenchmarkResults {
+    write_times: Vec<f64>,
+    read_times_1_percent: Vec<f64>,
+    read_times_10_percent: Vec<f64>,
+    read_times_50_percent: Vec<f64>,
+    read_times_100_percent: Vec<f64>,
+    point_query_times: Vec<f64>,
+}
+
+fn main() -> std::io::Result<()> {
+    println!("\nRealistic RDF Benchmark - IoT Sensor Observations");
+    println!("=====================================================");
+    println!("Running 33 iterations per test size, using middle 30 for statistics\n");
+
+    // Test sizes: 10, 100, 1k, 10k, 100k, 1M
+    let test_sizes = vec![10, 100, 1_000, 10_000, 100_000, 1_000_000];
+    let num_runs = 33;
+    let warmup_runs = 3;
+    let outlier_runs = 2;
+
+    // Define realistic RDF predicates for sensor observations
+    let predicates: Vec<String> = vec![
+        "http://rdfs.org/ns/void#inDataset".to_string(),
+        "https://saref.etsi.org/core/measurementMadeBy".to_string(),
+        "https://saref.etsi.org/core/relatesToProperty".to_string(),
+        "https://saref.etsi.org/core/hasTimestamp".to_string(),
+        "https://saref.etsi.org/core/hasValue".to_string(),
+    ];
+
+    for &size in &test_sizes {
+        println!("\n{}", "=".repeat(80));
+        println!(
+            "Testing with {} RDF Quads ({} runs, using middle 30)",
+            format_number(size),
+            num_runs
+        );
+        println!("{}\n", "=".repeat(80));
+
+        let mut all_results = Vec::new();
+
+        // Run benchmark multiple times
+        for run in 1..=num_runs {
+            if run % 10 == 0 || run == 1 {
+                println!("   Run {}/{}...", run, num_runs);
+            }
+
+            let result = run_single_benchmark(size, &predicates, run)?;
+            all_results.push(result);
+        }
+
+        // Analyze results (middle 30 runs: exclude first 3 and last 2)
+        let start_idx = warmup_runs;
+        let end_idx = num_runs - outlier_runs;
+        let analysis_results = &all_results[start_idx..end_idx];
+
+        println!(
+            "\nResults (Middle 30 runs, excluding first {} and last {} runs)",
+            warmup_runs, outlier_runs
+        );
+        println!("{}", "-".repeat(80));
+
+        // Write performance
+        let write_times: Vec<f64> = analysis_results.iter().map(|r| r.write_times[0]).collect();
+        analyze_and_print("Write Throughput", &write_times, "quads/sec");
+
+        // Read performance for different ranges
+        let read_1_times: Vec<f64> =
+            analysis_results.iter().map(|r| r.read_times_1_percent[0]).collect();
+        let read_10_times: Vec<f64> =
+            analysis_results.iter().map(|r| r.read_times_10_percent[0]).collect();
+        let read_50_times: Vec<f64> =
+            analysis_results.iter().map(|r| r.read_times_50_percent[0]).collect();
+        let read_100_times: Vec<f64> =
+            analysis_results.iter().map(|r| r.read_times_100_percent[0]).collect();
+
+        analyze_and_print("Read (1% range)", &read_1_times, "quads/sec");
+        analyze_and_print("Read (10% range)", &read_10_times, "quads/sec");
+        analyze_and_print("Read (50% range)", &read_50_times, "quads/sec");
+        analyze_and_print("Read (100% range)", &read_100_times, "quads/sec");
+
+        // Point query performance
+        let point_times: Vec<f64> =
+            analysis_results.iter().map(|r| r.point_query_times[0]).collect();
+        analyze_and_print("Point Query", &point_times, "ms");
+    }
+
+    println!("\n{}", "=".repeat(80));
+    println!("Benchmark Complete!");
+    println!("{}\n", "=".repeat(80));
+
+    Ok(())
+}
+
+fn run_single_benchmark(
+    size: u64,
+    predicates: &[String],
+    run_id: usize,
+) -> std::io::Result<BenchmarkResults> {
+    // Create storage
+    let config = StreamingConfig {
+        max_batch_events: 10000,
+        max_batch_bytes: 10 * 1024 * 1024,
+        max_batch_age_seconds: 5,
+        sparse_interval: 1000,
+        entries_per_index_block: 1000,
+        segment_base_path: format!("data/realistic_benchmark_{}_{}", size, run_id),
+    };
+
+    let mut storage = StreamingSegmentedStorage::new(config.clone())?;
+    storage.start_background_flushing();
+
+    let base_timestamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_millis() as u64;
+
+    let write_start = Instant::now();
+    let mut min_timestamp = u64::MAX;
+    let mut max_timestamp = 0u64;
+
+    // Generate realistic RDF quads - each with unique subject and timestamp
+    for i in 0..size {
+        // Each observation has a unique timestamp (1ms apart)
+        let timestamp = base_timestamp + i;
+        min_timestamp = min_timestamp.min(timestamp);
+        max_timestamp = max_timestamp.max(timestamp);
+
+        // Create a unique subject for each observation
+        // Format: https://dahcc.idlab.ugent.be/Protego/_participant{participant}/obs{i}
+        let subject = format!(
+            "https://dahcc.idlab.ugent.be/Protego/_participant{}/obs{}",
+            (i % 100) + 1, // Rotate through 100 participants
+            i
+        );
+
+        // Sensor data - rotating through different sensors
+        let sensor = format!(
+            "https://dahcc.idlab.ugent.be/Homelab/SensorsAndActuators/70:ee:50:67:30:{:02x}",
+            (i % 256) as u8
+        );
+
+        // Property type - rotating through different measurement types
+        let properties = [
+            "org.dyamand.types.common.AtmosphericPressure",
+            "org.dyamand.types.common.Temperature",
+            "org.dyamand.types.common.Humidity",
+            "org.dyamand.types.common.LightLevel",
+        ];
+        let property = format!(
+            "https://dahcc.idlab.ugent.be/Homelab/SensorsAndActuators/{}",
+            properties[(i % 4) as usize]
+        );
+
+        // Dataset
+        let dataset = format!("https://dahcc.idlab.ugent.be/Protego/_participant{}", (i % 100) + 1);
+
+        // Create 5 quads per observation (matching your example)
+        let quads = vec![
+            (
+                subject.clone(),
+                predicates[0].clone(),
+                dataset,
+                "http://example.org/graph1".to_string(),
+            ),
+            (
+                subject.clone(),
+                predicates[1].clone(),
+                sensor,
+                "http://example.org/graph1".to_string(),
+            ),
+            (
+                subject.clone(),
+                predicates[2].clone(),
+                property,
+                "http://example.org/graph1".to_string(),
+            ),
+            (
+                subject.clone(),
+                predicates[3].clone(),
+                format!("2022-01-03T09:04:{:02}.000000", (i % 60) as u32),
+                "http://example.org/graph1".to_string(),
+            ),
+            (
+                subject.clone(),
+                predicates[4].clone(),
+                format!("{:.1}", 1000.0 + (i as f64 * 0.1) % 100.0),
+                "http://example.org/graph1".to_string(),
+            ),
+        ];
+
+        // Write all 5 quads for this observation
+        for (s, p, o, g) in quads {
+            storage.write_rdf(timestamp, &s, &p, &o, &g)?;
+        }
+    }
+
+    let write_duration = write_start.elapsed();
+    let write_throughput = (size * 5) as f64 / write_duration.as_secs_f64();
+
+    // Wait for all data to be flushed to disk before read benchmarks
+    println!("   Waiting for background flush to complete...");
+    storage.shutdown()?;
+
+    // Restart storage for read-only benchmarks
+    let mut storage = StreamingSegmentedStorage::new(config.clone())?;
+
+    // Read benchmarks
+    let mut read_times_1_percent = Vec::new();
+    let mut read_times_10_percent = Vec::new();
+    let mut read_times_50_percent = Vec::new();
+    let mut read_times_100_percent = Vec::new();
+
+    // Test different query ranges
+    let query_percentages = vec![0.01, 0.1, 0.5, 1.0];
+
+    for &percentage in &query_percentages {
+        let range_size = ((max_timestamp - min_timestamp) as f64 * percentage) as u64;
+        let query_start_ts = min_timestamp;
+        let query_end_ts = min_timestamp + range_size;
+
+        let read_start = Instant::now();
+        let results = storage.query_rdf(query_start_ts, query_end_ts)?;
+        let read_duration = read_start.elapsed();
+
+        // Use microseconds for better precision, avoid division by zero
+        let duration_secs = read_duration.as_secs_f64().max(0.000001); // At least 1 microsecond
+        let read_throughput = results.len() as f64 / duration_secs;
+
+        match percentage {
+            0.01 => read_times_1_percent.push(read_throughput),
+            0.1 => read_times_10_percent.push(read_throughput),
+            0.5 => read_times_50_percent.push(read_throughput),
+            1.0 => read_times_100_percent.push(read_throughput),
+            _ => {}
+        }
+    }
+
+    // Point query benchmark - query for a specific observation (should return 5 quads)
+    // Query for the very first timestamp we wrote (we know it exists)
+    let single_ts = min_timestamp; // This is base_timestamp + 0
+
+    let point_start = Instant::now();
+    let point_results = storage.query_rdf(single_ts, single_ts)?;
+    let point_duration = point_start.elapsed();
+    // Use microseconds for sub-millisecond precision
+    let point_time_us = point_duration.as_micros() as f64;
+    let point_time_ms = point_time_us / 1000.0;
+
+    // Debug: show results count for small datasets
+    if size <= 10_000 {
+        eprintln!("   DEBUG: Point query at ts={} (min_ts, size={}) returned {} quads (duration: {:.3} µs = {:.3} ms)",
+                  single_ts, size, point_results.len(), point_time_us, point_time_ms);
+    }
+
+    // Cleanup
+    storage.shutdown()?;
+    let _ = std::fs::remove_dir_all(&config.segment_base_path);
+
+    Ok(BenchmarkResults {
+        write_times: vec![write_throughput],
+        read_times_1_percent,
+        read_times_10_percent,
+        read_times_50_percent,
+        read_times_100_percent,
+        point_query_times: vec![point_time_ms],
+    })
+}
+
+fn analyze_and_print(label: &str, times: &[f64], unit: &str) {
+    let mut sorted_times = times.to_vec();
+    sorted_times.sort_by(|a, b| a.partial_cmp(b).unwrap());
+
+    let mean = times.iter().sum::<f64>() / times.len() as f64;
+    let median = sorted_times[times.len() / 2];
+    let min = *sorted_times.first().unwrap();
+    let max = *sorted_times.last().unwrap();
+
+    // Calculate standard deviation
+    let variance = times.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / times.len() as f64;
+    let std_dev = variance.sqrt();
+
+    // Use higher precision for millisecond times (point queries)
+    if unit == "ms" {
+        println!(
+            "   {:<20}: {:.3} {} (median: {:.3}, std: {:.3}, range: {:.3} - {:.3})",
+            label, mean, unit, median, std_dev, min, max
+        );
+    } else {
+        println!(
+            "   {:<20}: {:.0} {} (median: {:.0}, std: {:.0}, range: {:.0} - {:.0})",
+            label, mean, unit, median, std_dev, min, max
+        );
+    }
+}
+
+fn format_number(n: u64) -> String {
+    if n >= 1_000_000 {
+        format!("{:.1}M", n as f64 / 1_000_000.0)
+    } else if n >= 1_000 {
+        format!("{:.1}K", n as f64 / 1_000.0)
+    } else {
+        n.to_string()
+    }
+}
diff --git a/rustfmt.toml b/rustfmt.toml
index c161143..276694f 100644
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -24,63 +24,24 @@ chain_width = 80
 # Maximum line length for single line if-else expressions
 single_line_if_else_max_width = 50
 
-# Brace style for items
-brace_style = "SameLineWhere"
-
-# Brace style for control flow constructs
-control_brace_style = "AlwaysSameLine"
-
 # How to indent
 hard_tabs = false
 
 # Number of spaces per tab
 tab_spaces = 4
 
-# Where to put a binary operator when a binary expression goes multiline
-binop_separator = "Front"
-
-# Combine control expressions with function calls
-combine_control_expr = true
-
-# Maximum length of comments
-comment_width = 100
-
 # Use field init shorthand if possible
 use_field_init_shorthand = true
 
 # Use try shorthand
 use_try_shorthand = true
 
-# Format code in doc comments
-format_code_in_doc_comments = true
-
-# Format strings using rustfmt
-format_strings = true
-
-# Merge imports
-imports_granularity = "Crate"
-
 # Reorder imports
 reorder_imports = true
 
 # Reorder modules
 reorder_modules = true
 
-# Where to put the opening brace of structs
-struct_brace_style = "SameLineWhere"
-
-# Indent style for function parameters
-indent_style = "Block"
-
-# Leave a space before the colon in a type annotation
-space_before_colon = false
-
-# Leave a space after the colon in a type annotation
-space_after_colon = true
-
-# Put empty-body functions and impls on a single line
-empty_item_single_line = true
-
 # Newline style
 newline_style = "Unix"
 
diff --git a/src/benchmarking/benchmark.rs b/src/benchmarking/benchmark.rs
index 92b7f69..607e922 100644
--- a/src/benchmarking/benchmark.rs
+++ b/src/benchmarking/benchmark.rs
@@ -1,4 +1,5 @@
-use crate::indexing::{dense, shared::LogWriter, sparse};
+use crate::indexing::shared::LogWriter;
+use crate::storage::indexing::{dense, sparse};
 use std::fs;
 use std::time::Instant;
 
@@ -16,10 +17,10 @@ fn setup_data(number_records: u64) -> std::io::Result<()> {
 
     for i in 0..number_records {
         let timestamp = i;
-        let subject = (i % 1000) as u64;
-        let predicate = (i % 500) as u64;
-        let object = (i % 2000) as u64;
-        let graph: u64 = 1;
+        let subject = (i % 1000) as u32;
+        let predicate = (i % 500) as u32;
+        let object = (i % 2000) as u32;
+        let graph: u32 = 1;
         writer.append_record(timestamp, subject, predicate, object, graph)?;
     }
 
diff --git a/src/benchmarking/mod.rs b/src/benchmarking/mod.rs
deleted file mode 100644
index 9820c42..0000000
--- a/src/benchmarking/mod.rs
+++ /dev/null
@@ -1,2 +0,0 @@
-#[doc=""]
-pub mod benchmark;
\ No newline at end of file
diff --git a/benches/benchmark.rs b/src/benchmarks/benchmark.rs
similarity index 87%
rename from benches/benchmark.rs
rename to src/benchmarks/benchmark.rs
index e6ca4db..89d4ae8 100644
--- a/benches/benchmark.rs
+++ b/src/benchmarks/benchmark.rs
@@ -1,13 +1,20 @@
-use janus::indexing::{dense, shared::LogWriter, sparse};
+use crate::indexing::shared::LogWriter;
+use crate::storage::indexing::{dense, sparse};
 use std::fs;
 use std::time::Instant;
 
+#[allow(dead_code)]
 const DATA_DIR: &str = "data/benchmark";
+#[allow(dead_code)]
 const LOG_FILE: &str = "data/benchmark/log.dat";
+#[allow(dead_code)]
 const DENSE_INDEX_FILE: &str = "data/benchmark/dense.idx";
+#[allow(dead_code)]
 const SPARSE_INDEX_FILE: &str = "data/benchmark/sparse.idx";
+#[allow(dead_code)]
 const SPARSE_INTERVAL: usize = 1000;
 
+#[allow(dead_code)]
 fn setup_data(number_records: u64) -> std::io::Result<()> {
     let _ = fs::remove_dir_all(DATA_DIR);
     fs::create_dir_all(DATA_DIR)?;
@@ -16,10 +23,10 @@ fn setup_data(number_records: u64) -> std::io::Result<()> {
 
     for i in 0..number_records {
         let timestamp = i;
-        let subject = (i % 1000) as u64;
-        let predicate = (i % 500) as u64;
-        let object = (i % 2000) as u64;
-        let graph: u64 = 1;
+        let subject = (i % 1000) as u32;
+        let predicate = (i % 500) as u32;
+        let object = (i % 2000) as u32;
+        let graph: u32 = 1;
         writer.append_record(timestamp, subject, predicate, object, graph)?;
     }
 
@@ -30,6 +37,8 @@ fn setup_data(number_records: u64) -> std::io::Result<()> {
     Ok(())
 }
 
+#[allow(dead_code)]
+#[allow(clippy::cast_precision_loss)]
 fn benchmark_indexing() -> std::io::Result<()> {
     println!("Indexing Benchmark");
 
@@ -57,6 +66,7 @@ fn benchmark_indexing() -> std::io::Result<()> {
     Ok(())
 }
 
+#[allow(dead_code)]
 fn benchmark_queries() -> std::io::Result<()> {
     println!("Query Benchmark");
     let dense_reader = dense::DenseIndexReader::open(DENSE_INDEX_FILE)?;
@@ -66,8 +76,8 @@ fn benchmark_queries() -> std::io::Result<()> {
         (0u64, 100u64, "100 records"),
         (5000u64, 5100u64, "100 records (mid-range)"),
         (0u64, 10000u64, "10K records"),
-        (0u64, 100000u64, "100K records"),
-        (0u64, 1000000u64, "1M records"),
+        (0u64, 100_000u64, "100K records"),
+        (0u64, 1_000_000u64, "1M records"),
     ];
 
     for (timestamp_start, timestamp_end, description) in query_ranges {
@@ -110,6 +120,7 @@ fn benchmark_queries() -> std::io::Result<()> {
     Ok(())
 }
 
+#[allow(dead_code)]
 fn main() -> std::io::Result<()> {
     println!("RDF Indexing Benchmark : Dense vs Sparse");
     println!("Setting up data...");
diff --git a/src/benchmarks/mod.rs b/src/benchmarks/mod.rs
new file mode 100644
index 0000000..2543f50
--- /dev/null
+++ b/src/benchmarks/mod.rs
@@ -0,0 +1 @@
+pub mod benchmark;
diff --git a/src/core/encoding.rs b/src/core/encoding.rs
new file mode 100644
index 0000000..8c76b7f
--- /dev/null
+++ b/src/core/encoding.rs
@@ -0,0 +1,74 @@
+//! Binary encoding/decoding utilities for RDF events
+
+use crate::core::{Event, RDFEvent};
+use crate::storage::indexing::dictionary::Dictionary;
+
+/// Size of a single encoded record in bytes
+/// Reduced from 40 to 24 bytes (40% space savings)
+pub const RECORD_SIZE: usize = 24;
+
+/// Encode an RDF event record into a byte buffer
+pub fn encode_record(
+    buffer: &mut [u8; RECORD_SIZE],
+    timestamp: u64,
+    subject: u32,
+    predicate: u32,
+    object: u32,
+    graph: u32,
+) {
+    buffer[0..8].copy_from_slice(&timestamp.to_le_bytes());
+    buffer[8..12].copy_from_slice(&subject.to_le_bytes());
+    buffer[12..16].copy_from_slice(&predicate.to_le_bytes());
+    buffer[16..20].copy_from_slice(&object.to_le_bytes());
+    buffer[20..24].copy_from_slice(&graph.to_le_bytes());
+}
+
+/// Decode a byte buffer into an RDF event record
+pub fn decode_record(buffer: &[u8; RECORD_SIZE]) -> (u64, u32, u32, u32, u32) {
+    let timestamp = u64::from_le_bytes(buffer[0..8].try_into().unwrap());
+    let subject = u32::from_le_bytes(buffer[8..12].try_into().unwrap());
+    let predicate = u32::from_le_bytes(buffer[12..16].try_into().unwrap());
+    let object = u32::from_le_bytes(buffer[16..20].try_into().unwrap());
+    let graph = u32::from_le_bytes(buffer[20..24].try_into().unwrap());
+    (timestamp, subject, predicate, object, graph)
+}
+
+impl RDFEvent {
+    /// Encode this RDF event to an internal Event using a dictionary
+    pub fn encode(&self, dict: &mut Dictionary) -> Event {
+        Event {
+            timestamp: self.timestamp,
+            subject: dict.encode(&self.subject),
+            predicate: dict.encode(&self.predicate),
+            object: dict.encode(&self.object),
+            graph: dict.encode(&self.graph),
+        }
+    }
+}
+
+impl Event {
+    /// Decode this internal Event to an RDFEvent using a dictionary
+    pub fn decode(&self, dict: &Dictionary) -> RDFEvent {
+        RDFEvent {
+            timestamp: self.timestamp,
+            subject: dict.decode(self.subject).unwrap_or("UNKNOWN").to_string(),
+            predicate: dict.decode(self.predicate).unwrap_or("UNKNOWN").to_string(),
+            object: dict.decode(self.object).unwrap_or("UNKNOWN").to_string(),
+            graph: dict.decode(self.graph).unwrap_or("UNKNOWN").to_string(),
+        }
+    }
+
+    /// Encode this Event to bytes
+    pub fn to_bytes(&self) -> [u8; RECORD_SIZE] {
+        let mut buffer = [0u8; RECORD_SIZE];
+        encode_record(
+            &mut buffer,
+            self.timestamp,
+            self.subject,
+            self.predicate,
+            self.object,
+            self.graph,
+        );
+        buffer
+    }
+}
diff --git a/src/core/mod.rs b/src/core/mod.rs
new file mode 100644
index 0000000..ea27ceb
--- /dev/null
+++ b/src/core/mod.rs
@@ -0,0 +1,38 @@
+//! Core data structures and types for Janus RDF Stream Processing Engine
+
+/// Internal storage event with encoded IDs
+/// Uses u32 for dictionary IDs (4B max) and u64 for timestamp (milliseconds)
+/// Total: 24 bytes vs 40 bytes (40% space savings)
+#[derive(Clone, Debug)]
+pub struct Event {
+    pub timestamp: u64, // 8 bytes - milliseconds since epoch
+    pub subject: u32,   // 4 bytes - dictionary-encoded (4B max unique strings)
+    pub predicate: u32, // 4 bytes - dictionary-encoded (usually <1000 unique)
+    pub object: u32,    // 4 bytes - dictionary-encoded (4B max unique strings)
+    pub graph: u32,     // 4 bytes - dictionary-encoded (usually <100 unique)
+}
+
+/// User-facing RDF event with URI strings
+#[derive(Debug, Clone)]
+pub struct RDFEvent {
+    pub timestamp: u64,
+    pub subject: String,
+    pub predicate: String,
+    pub object: String,
+    pub graph: String,
+}
+
+impl RDFEvent {
+    pub fn new(timestamp: u64, subject: &str, predicate: &str, object: &str, graph: &str) -> Self {
+        Self {
+            timestamp,
+            subject: subject.to_string(),
+            predicate: predicate.to_string(),
+            object: object.to_string(),
+            graph: graph.to_string(),
+        }
+    }
+}
+
+pub mod encoding;
+pub use encoding::*;
diff --git a/src/indexing/mod.rs b/src/indexing/mod.rs
index 1fd1692..706bb40 100644
--- a/src/indexing/mod.rs
+++ b/src/indexing/mod.rs
@@ -1,6 +1,3 @@
-#[doc=""]
+//! Legacy indexing utilities - most functionality moved to storage::indexing
+
 pub mod shared;
-#[doc=""]
-pub mod dense;
-#[doc=""]
-pub mod sparse;
diff --git a/src/indexing/shared.rs b/src/indexing/shared.rs
index 4c262f4..c47ec8e 100644
--- a/src/indexing/shared.rs
+++ b/src/indexing/shared.rs
@@ -1,62 +1,30 @@
+//! Legacy storage utilities - to be moved to storage module
+
+use crate::core::encoding::{encode_record, RECORD_SIZE};
 use std::fs::File;
 use std::io::Write;
 
-#[doc = ""]
-pub const RECORD_SIZE: usize = 40;
-
-#[doc = ""]
-pub fn encode_record(
-    buffer: &mut [u8; RECORD_SIZE],
-    timestamp: u64,
-    subject: u64,
-    predicate: u64,
-    object: u64,
-    graph: u64,
-) {
-    buffer[0..8].copy_from_slice(&timestamp.to_le_bytes());
-    buffer[8..16].copy_from_slice(&subject.to_le_bytes());
-    buffer[16..24].copy_from_slice(&predicate.to_le_bytes());
-    buffer[24..32].copy_from_slice(&object.to_le_bytes());
-    buffer[32..40].copy_from_slice(&graph.to_le_bytes());
-}
-
-#[doc = ""]
-pub fn decode_record(buffer: &[u8; RECORD_SIZE]) -> (u64, u64, u64, u64, u64) {
-    let timestamp = u64::from_le_bytes(buffer[0..8].try_into().unwrap());
-    let subject = u64::from_le_bytes(buffer[8..16].try_into().unwrap());
-    let predicate = u64::from_le_bytes(buffer[16..24].try_into().unwrap());
-    let object = u64::from_le_bytes(buffer[24..32].try_into().unwrap());
-    let graph = u64::from_le_bytes(buffer[32..40].try_into().unwrap());
-    (timestamp, subject, predicate, object, graph)
-}
-
-#[doc = ""]
+/// Log writer for appending encoded records to a file
 pub struct LogWriter {
     log_file: File,
     record_count: u64,
 }
 
-#[doc = ""]
 impl LogWriter {
-    #[doc = ""]
+    /// Create a new log writer for the given file path
     pub fn create(path: &str) -> std::io::Result<Self> {
-        let log_file = match File::create(path) {
-            Ok(file) => file,
-            Err(error) => {
-                return Err(error);
-            }
-        };
+        let log_file = File::create(path)?;
         Ok(Self { log_file, record_count: 0 })
     }
 
-    #[doc = ""]
+    /// Append an encoded record to the log file
     pub fn append_record(
         &mut self,
         timestamp: u64,
-        subject: u64,
-        predicate: u64,
-        object: u64,
-        graph: u64,
+        subject: u32,
+        predicate: u32,
+        object: u32,
+        graph: u32,
     ) -> std::io::Result<()> {
         let mut buffer = [0u8; RECORD_SIZE];
         encode_record(&mut buffer, timestamp, subject, predicate, object, graph);
@@ -65,28 +33,13 @@ impl LogWriter {
         Ok(())
     }
 
-    #[doc = ""]
+    /// Get the current record count
     pub fn record_count(&self) -> u64 {
         self.record_count
     }
 
-    #[doc = ""]
+    /// Flush the log file
     pub fn flush(&mut self) -> std::io::Result<()> {
         self.log_file.flush()
     }
 }
-
-#[derive(Clone, Debug)]
-#[doc = ""]
-pub struct Event {
-    #[doc = ""]
-    pub timestamp: u64,
-    #[doc = ""]
-    pub subject: u64,
-    #[doc = ""]
-    pub predicate: u64,
-    #[doc = ""]
-    pub object: u64,
-    #[doc = ""]
-    pub graph: u64,
-}
diff --git a/src/indexing/sparse.rs b/src/indexing/sparse.rs
index 110b4f7..cfb8ab7 100644
--- a/src/indexing/sparse.rs
+++ b/src/indexing/sparse.rs
@@ -1,7 +1,13 @@
-use crate::indexing::shared::{decode_record, Event, RECORD_SIZE};
+use crate::indexing::dictionary::Dictionary;
+use crate::indexing::shared::{decode_record, Event, RDFEvent, RECORD_SIZE};
 use std::fs::File;
 use std::io::{Read, Seek, SeekFrom, Write};
+use std::path::Path;
 
+/// Builder for creating sparse indexes that store only periodic entries.
+///
+/// A sparse index reduces storage space by indexing only every Nth record,
+/// trading some query precision for significant space savings.
 #[doc = ""]
 pub struct SparseIndexBuilder {
     index_file: File,
@@ -9,12 +15,31 @@ pub struct SparseIndexBuilder {
 }
 #[doc = ""]
 impl SparseIndexBuilder {
+    /// Creates a new sparse index builder that writes to the specified file.
+    ///
+    /// # Arguments
+    /// * `index_path` - Path where the index file will be created
+    /// * `interval` - Number of records between index entries (e.g., 1000 means index every 1000th record)
+    ///
+    /// # Returns
+    /// A new `SparseIndexBuilder` instance or an I/O error
     #[doc = ""]
     pub fn create(index_path: &str, interval: usize) -> std::io::Result<Self> {
         let index_file = File::create(index_path)?;
         Ok(Self { index_file, interval })
     }
 
+    /// Adds an entry to the sparse index if the record count matches the interval.
+    ///
+    /// Only records where `record_count % interval == 0` are indexed to save space.
+    ///
+    /// # Arguments
+    /// * `record_count` - The current record number in the log
+    /// * `timestamp` - Timestamp of the record
+    /// * `offset` - Byte offset of the record in the log file
+    ///
+    /// # Returns
+    /// `true` if the entry was added to the index, `false` if skipped
     #[doc = ""]
     pub fn add_entry(
         &mut self,
@@ -31,12 +56,27 @@ impl SparseIndexBuilder {
         }
     }
 
+    /// Finalizes the index by flushing any buffered writes to disk.
+    ///
+    /// This should be called after all entries have been added.
     #[doc = ""]
     pub fn finalize(&mut self) -> std::io::Result<()> {
         self.index_file.flush()
     }
 }
 
+/// Builds a sparse index for an existing log file.
+///
+/// This function reads through the entire log file and creates an index
+/// with entries only for records at the specified interval.
+///
+/// # Arguments
+/// * `log_path` - Path to the log file to index
+/// * `index_path` - Path where the index file will be created
+/// * `interval` - Number of records between index entries
+///
+/// # Returns
+/// Ok(()) on success, or an I/O error
 pub fn build_sparse_index(
     log_path: &str,
     index_path: &str,
@@ -60,12 +100,110 @@ pub fn build_sparse_index(
     Ok(())
 }
 
+/// Builds a sparse index and initializes an empty dictionary.
+///
+/// This is a convenience function that creates both the index and
+/// an empty dictionary file. The dictionary can be populated separately
+/// when processing RDF data.
+///
+/// # Arguments
+/// * `log_path` - Path to the log file to index
+/// * `index_path` - Path where the index file will be created
+/// * `dictionary_path` - Path where the dictionary file will be created
+/// * `interval` - Number of records between index entries
+///
+/// # Returns
+/// Ok(()) on success, or an I/O error
+pub fn build_sparse_index_with_dictionary(
+    log_path: &str,
+    index_path: &str,
+    dictionary_path: &str,
+    interval: &usize,
+) -> std::io::Result<()> {
+    let mut log = File::open(log_path)?;
+    let mut builder = SparseIndexBuilder::create(index_path, *interval)?;
+    let dictionary = Dictionary::new();
+
+    let mut offset = 0u64;
+    let mut record_count = 0u64;
+    let mut record = [0u8; RECORD_SIZE];
+
+    while log.read_exact(&mut record).is_ok() {
+        let (timestamp, _subject, _predicate, _object, _graph) = decode_record(&record);
+
+        builder.add_entry(record_count, timestamp, offset)?;
+
+        offset += RECORD_SIZE as u64;
+        record_count += 1;
+    }
+
+    builder.finalize()?;
+    dictionary.save_to_file(Path::new(dictionary_path))?;
+
+    Ok(())
+}
+
+/// Reader for sparse indexes that enables efficient timestamp-based queries.
+///
+/// The sparse reader loads the entire index into memory for fast binary search,
+/// then performs sequential scans of the log file starting from the appropriate position.
 pub struct SparseReader {
     index: Vec<(u64, u64)>,
+    #[allow(dead_code)]
     interval: usize,
 }
 
 impl SparseReader {
+    /// Opens a sparse index and its associated dictionary.
+    ///
+    /// # Arguments
+    /// * `index_path` - Path to the sparse index file
+    /// * `dictionary_path` - Path to the dictionary file
+    /// * `interval` - The interval used when building the index
+    ///
+    /// # Returns
+    /// A tuple of (SparseReader, Dictionary) or an I/O error
+    pub fn open_with_dictionary(
+        index_path: &str,
+        dictionary_path: &str,
+        interval: usize,
+    ) -> std::io::Result<(Self, Dictionary)> {
+        let reader = Self::open(index_path, interval)?;
+        let dictionary = Dictionary::load_from_file(Path::new(dictionary_path))?;
+        Ok((reader, dictionary))
+    }
+    /// Queries the log and returns results with URIs resolved from the dictionary.
+    ///
+    /// This method performs the same query as `query()` but resolves all numeric IDs
+    /// back to their original URI strings using the provided dictionary.
+    ///
+    /// # Arguments
+    /// * `log_path` - Path to the log file
+    /// * `dict` - Dictionary for resolving IDs to URIs
+    /// * `timestamp_start_bound` - Minimum timestamp (inclusive)
+    /// * `timestamp_end_bound` - Maximum timestamp (inclusive)
+    ///
+    /// # Returns
+    /// Vector of resolved events or an I/O error
+    pub fn query_resolved(
+        &self,
+        log_path: &str,
+        dict: &Dictionary,
+        timestamp_start_bound: u64,
+        timestamp_end_bound: u64,
+    ) -> std::io::Result<Vec<RDFEvent>> {
+        let events = self.query(log_path, timestamp_start_bound, timestamp_end_bound)?;
+        Ok(events.into_iter().map(|e| e.decode(dict)).collect())
+    }
+
+    /// Opens a sparse index file and loads it into memory.
+    ///
+    /// # Arguments
+    /// * `index_path` - Path to the sparse index file
+    /// * `interval` - The interval used when building the index
+    ///
+    /// # Returns
+    /// A new SparseReader instance or an I/O error
     pub fn open(index_path: &str, interval: usize) -> std::io::Result<Self> {
         let mut index_file = File::open(index_path)?;
         let mut index = Vec::new();
@@ -80,6 +218,18 @@ impl SparseReader {
         Ok(Self { index, interval })
     }
 
+    /// Queries the log file for events within the specified timestamp range.
+    ///
+    /// Uses binary search on the index to find the starting position, then
+    /// performs a sequential scan of the log file to collect matching events.
+    ///
+    /// # Arguments
+    /// * `log_path` - Path to the log file
+    /// * `timestamp_start_bound` - Minimum timestamp (inclusive)
+    /// * `timestamp_end_bound` - Maximum timestamp (inclusive)
+    ///
+    /// # Returns
+    /// Vector of events with numeric IDs or an I/O error
     pub fn query(
         &self,
         log_path: &str,
@@ -120,6 +270,10 @@ impl SparseReader {
         Ok(results)
     }
 
+    /// Returns the size of the index in bytes.
+    ///
+    /// Each index entry is 16 bytes (8 bytes timestamp + 8 bytes offset),
+    /// so this returns `index.len() * 16`.
     pub fn index_size_bytes(&self) -> usize {
         self.index.len() * 16
     }
diff --git a/src/lib.rs b/src/lib.rs
index b115ed0..1e74147 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -26,12 +26,37 @@
 //! ```
 
 #![warn(missing_docs)]
-#![warn(clippy::all)]
-
-/// Core module containing the main engine logic
-pub mod core {
-    //! Core functionality for the Janus engine
-}
+#![warn(clippy::pedantic)]
+#![allow(clippy::missing_docs_in_private_items)]
+#![allow(unused_imports)]
+#![allow(unused_variables)]
+#![allow(clippy::empty_docs)]
+#![allow(clippy::needless_borrows_for_generic_args)]
+#![allow(clippy::unnecessary_map_or)]
+#![allow(clippy::nonminimal_bool)]
+#![allow(clippy::manual_is_multiple_of)]
+#![allow(clippy::new_without_default)]
+#![allow(clippy::mixed_attributes_style)]
+#![allow(clippy::empty_line_after_outer_attr)]
+#![allow(clippy::missing_errors_doc)]
+#![allow(clippy::missing_panics_doc)]
+#![allow(clippy::cast_possible_truncation)]
+#![allow(clippy::cast_lossless)]
+#![allow(clippy::uninlined_format_args)]
+#![allow(clippy::unused_self)]
+#![allow(clippy::needless_pass_by_value)]
+#![allow(clippy::case_sensitive_file_extension_comparisons)]
+#![allow(clippy::manual_div_ceil)]
+#![allow(clippy::if_not_else)]
+#![allow(clippy::must_use_candidate)]
+#![allow(clippy::redundant_closure_for_method_calls)]
+#![allow(clippy::doc_markdown)]
+#![allow(clippy::identity_op)]
+#![allow(clippy::needless_update)]
+#![allow(missing_docs)]
+
+/// Core data structures and types
+pub mod core;
 
 /// Module for handling RDF stores
 pub mod store {
@@ -59,13 +84,14 @@ pub mod indexing;
 /// Module for parsing JanusQL queries
 pub mod parsing;
 
-#[doc = ""]
-pub mod benchmarking {
+/// Benchmarking utilities
+pub mod benchmarks {
 
     mod benchmark;
 }
 
-/// Module containing error types
+pub mod storage;
+
 pub mod error {
     //! Error types and result definitions
 
diff --git a/src/main.rs b/src/main.rs
index 6e0e9f7..c7a6ff4 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,16 +2,170 @@
 //!
 //! This is the main entry point for the Janus command-line interface.
 
-use janus::indexing::{dense, shared::LogWriter, sparse};
+use janus::core::Event;
+use janus::indexing::shared::LogWriter;
+use janus::storage::indexing::{dense, sparse};
+use janus::storage::segmented_storage::StreamingSegmentedStorage;
+use janus::storage::util::StreamingConfig;
 use std::fs;
-use std::time::Instant;
+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
 
+#[allow(dead_code)]
 const DATA_DIR: &str = "data/benchmark";
+#[allow(dead_code)]
 const LOG_FILE: &str = "data/benchmark/log.dat";
+#[allow(dead_code)]
 const DENSE_INDEX_FILE: &str = "data/benchmark/dense.idx";
+#[allow(dead_code)]
 const SPARSE_INDEX_FILE: &str = "data/benchmark/sparse.idx";
+#[allow(dead_code)]
 const SPARSE_INTERVAL: usize = 1000;
+const SEGMENT_BASE_PATH: &str = "data/rdf_benchmark";
 
+fn benchmark_segmented_storage_rdf() -> std::io::Result<()> {
+    // println!("RDF Segmented Storage Benchmark");
+    // println!("==================================");
+
+    // Clean up and create directories
+    let _ = fs::remove_dir_all(SEGMENT_BASE_PATH);
+    fs::create_dir_all(SEGMENT_BASE_PATH)?;
+
+    // Configure storage
+    let config = StreamingConfig {
+        max_batch_events: 500_000,
+        max_batch_age_seconds: 1,
+        max_batch_bytes: 50_000_000,
+        sparse_interval: 1000,
+        entries_per_index_block: 100,
+        segment_base_path: SEGMENT_BASE_PATH.to_string(),
+    };
+
+    let mut storage = StreamingSegmentedStorage::new(config)?;
+    storage.start_background_flushing();
+
+    // Record initial memory
+    // storage.record_memory("before_writing");
+
+    // Benchmark writing 1 million RDF events
+    // println!("\nWriting 1,000,000 RDF events...");
+    let start_time = Instant::now();
+    let base_timestamp = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_millis() as u64;
+
+    for i in 0..1_000_000u64 {
+        let timestamp = base_timestamp + i; // 1ms intervals
+        let subject = format!("http://example.org/person/person_{}", i % 10000);
+        let predicate = match i % 10 {
+            0..=3 => "http://example.org/knows",
+            4..=6 => "http://example.org/worksAt",
+            7..=8 => "http://example.org/livesIn",
+            _ => "http://example.org/hasAge",
+        };
+        let object = match i % 10 {
+            0..=3 => format!("http://example.org/person/person_{}", (i + 1) % 10000),
+            4..=6 => format!("http://example.org/organization/org_{}", i % 1000),
+            7..=8 => format!("http://example.org/location/city_{}", i % 100),
+            _ => format!("\"{}\"^^http://www.w3.org/2001/XMLSchema#integer", 20 + (i % 60)),
+        };
+        let graph = format!("http://example.org/graph/graph_{}", i % 100);
+
+        storage.write_rdf(timestamp, &subject, predicate, &object, &graph)?;
+
+        if i > 0 && i % 100_000 == 0 {
+            // println!("  ✓ Written {} events", i);
+            // storage.record_memory(&format!("after_{}_events", i));
+        }
+    }
+
+    let write_duration = start_time.elapsed();
+    let _write_throughput = 1_000_000.0 / write_duration.as_secs_f64();
+
+    // println!("\nWrite completed!");
+    // println!("   Duration: {:.3} seconds", write_duration.as_secs_f64());
+    // println!("   Throughput: {:.0} events/sec", write_throughput);
+
+    // Wait a bit for background flushing
+    std::thread::sleep(Duration::from_secs(2));
+    // storage.record_memory("after_background_flush");
+
+    // Benchmark reading different amounts of data
+    // println!("\nReading Benchmarks");
+    // println!("====================");
+
+    let read_sizes = vec![100, 1_000, 10_000, 100_000, 1_000_000];
+
+    for &size in &read_sizes {
+        // Query the first 'size' events
+        let query_start_ts = base_timestamp;
+        let query_end_ts = base_timestamp + size as u64;
+
+        // println!("\n📖 Querying {} events...", size);
+        let start_time = Instant::now();
+
+        let results = storage.query_rdf(query_start_ts, query_end_ts)?;
+
+        let query_duration = start_time.elapsed();
+        let _read_throughput = results.len() as f64 / query_duration.as_secs_f64();
+
+        // println!("   Results found: {}", results.len());
+        // println!("   Query time: {:.3} ms", query_duration.as_millis());
+        // println!("   Read throughput: {:.0} events/sec", read_throughput);
+
+        // Show a sample result for verification
+        if !results.is_empty() {
+            let sample = &results[0];
+            println!(
+                "   Sample result: {} {} {} in {} at {}",
+                sample.subject, sample.predicate, sample.object, sample.graph, sample.timestamp
+            );
+        }
+    }
+
+    // Shutdown storage
+    storage.shutdown()?;
+
+    // Print memory statistics
+    // println!("\nMemory Usage Statistics");
+    // println!("==========================");
+    // let memory_stats = storage.get_memory_stats();
+    // // println!("Peak memory: {}", MemoryTracker::format_bytes(memory_stats.peak_bytes));
+    // // println!("Current memory: {}", MemoryTracker::format_bytes(memory_stats.current_bytes));
+    // println!(
+    //     "Average memory: {}",
+    //     MemoryTracker::format_bytes(memory_stats.avg_bytes as usize)
+    // );
+    // // println!("Total measurements: {}", memory_stats.total_measurements);
+
+    // Print storage component breakdown
+    // let component_sizes = storage.get_storage_component_sizes();
+    // println!("\n🧩 Storage Component Breakdown");
+    // println!("=============================");
+    // println!(
+    //     "Batch buffer: {}",
+    //     MemoryTracker::format_bytes(component_sizes.batch_buffer_bytes)
+    // );
+    // // println!("Dictionary: {}", MemoryTracker::format_bytes(component_sizes.dictionary_bytes));
+    // // println!("Segments count: {}", component_sizes.segments_count);
+    // println!(
+    //     "Estimated total: {}",
+    //     MemoryTracker::format_bytes(component_sizes.estimated_total_bytes)
+    // );
+
+    // if memory_stats.measurements.len() > 1 {
+    //     // println!("\nDetailed measurements:");
+    //     for measurement in &memory_stats.measurements {
+    //         println!(
+    //             "  {}: {}",
+    //             measurement.description,
+    //             MemoryTracker::format_bytes(measurement.memory_bytes)
+    //         );
+    //     }
+    // }
+
+    // println!("\nBenchmark completed successfully!");
+    Ok(())
+}
+
+#[allow(dead_code)]
 fn setup_data(number_records: u64) -> std::io::Result<()> {
     let _ = fs::remove_dir_all(DATA_DIR);
     fs::create_dir_all(DATA_DIR)?;
@@ -20,32 +174,33 @@ fn setup_data(number_records: u64) -> std::io::Result<()> {
 
     for i in 0..number_records {
         let timestamp = i;
-        let subject = (i % 1000) as u64;
-        let predicate = (i % 500) as u64;
-        let object = (i % 2000) as u64;
-        let graph: u64 = 1;
+        let subject = (i % 1000) as u32;
+        let predicate = (i % 500) as u32;
+        let object = (i % 2000) as u32;
+        let graph: u32 = 1;
         writer.append_record(timestamp, subject, predicate, object, graph)?;
     }
 
     writer.flush()?;
 
-    println!("Generated log file with {} records", writer.record_count());
+    // println!("Generated log file with {} records", writer.record_count());
 
     Ok(())
 }
 
+#[allow(dead_code)]
 fn benchmark_indexing() -> std::io::Result<()> {
-    println!("Indexing Benchmark");
+    // println!("Indexing Benchmark");
 
     let start = Instant::now();
     dense::build_dense_index(LOG_FILE, DENSE_INDEX_FILE)?;
-    let dense_time = start.elapsed();
-    println!("Dense index build time: {:.3} ms", dense_time.as_secs_f64() * 1000.0);
+    let _dense_time = start.elapsed();
+    // println!("Dense index build time: {:.3} ms", dense_time.as_secs_f64() * 1000.0);
 
     let start = Instant::now();
     sparse::build_sparse_index(LOG_FILE, SPARSE_INDEX_FILE, &SPARSE_INTERVAL)?;
-    let sparse_time = start.elapsed();
-    println!("Sparse index build time: {:.3} ms", sparse_time.as_secs_f64() * 1000.0);
+    let _sparse_time = start.elapsed();
+    // println!("Sparse index build time: {:.3} ms", sparse_time.as_secs_f64() * 1000.0);
 
     let dense_reader = dense::DenseIndexReader::open(DENSE_INDEX_FILE)?;
     let sparse_reader = sparse::SparseReader::open(SPARSE_INDEX_FILE, SPARSE_INTERVAL)?;
@@ -61,8 +216,9 @@ fn benchmark_indexing() -> std::io::Result<()> {
     Ok(())
 }
 
+#[allow(dead_code)]
 fn benchmark_queries() -> std::io::Result<()> {
-    println!("Query Benchmark");
+    // println!("Query Benchmark");
     let dense_reader = dense::DenseIndexReader::open(DENSE_INDEX_FILE)?;
     let sparse_reader = sparse::SparseReader::open(SPARSE_INDEX_FILE, SPARSE_INTERVAL)?;
 
@@ -74,8 +230,8 @@ fn benchmark_queries() -> std::io::Result<()> {
         (0u64, 1000000u64, "1M records"),
     ];
 
-    for (timestamp_start, timestamp_end, description) in query_ranges {
-        println!("\n Query: {} from {} to {}", description, timestamp_start, timestamp_end);
+    for (timestamp_start, timestamp_end, _description) in query_ranges {
+        // println!("\n Query: {} from {} to {}", description, timestamp_start, timestamp_end);
 
         let start = Instant::now();
         let dense_results = dense_reader.query(LOG_FILE, timestamp_start, timestamp_end)?;
@@ -100,9 +256,9 @@ fn benchmark_queries() -> std::io::Result<()> {
         let speedup = sparse_time.as_secs_f64() / dense_time.as_secs_f64();
 
         if speedup > 1.0 {
-            println!(" Sparse index is {:.2} times faster than Dense index", speedup);
+            // println!(" Sparse index is {:.2} times faster than Dense index", speedup);
         } else {
-            println!(" Dense index is {:.2} times faster than Sparse index", 1.0 / speedup);
+            // println!(" Dense index is {:.2} times faster than Sparse index", 1.0 / speedup);
         }
 
         assert_eq!(
@@ -114,19 +270,144 @@ fn benchmark_queries() -> std::io::Result<()> {
     Ok(())
 }
 
-fn main() -> std::io::Result<()> {
-    println!("RDF Indexing Benchmark : Dense vs Sparse");
-    println!("Setting up data...");
-    let number_of_records = 1_000_000u64;
-    setup_data(number_of_records)?;
+// fn main() -> std::io::Result<()> {
+//     // println!("RDF Indexing Benchmark : Dense vs Sparse");
+//     // println!("Setting up data...");
+//     let number_of_records = 1_000_000u64;
+//     setup_data(number_of_records)?;
 
-    benchmark_indexing()?;
-    benchmark_queries()?;
+//     benchmark_indexing()?;
+//     benchmark_queries()?;
 
-    println!(
-        "\n=== Summary ===\nSparse interval: {}\nUse this data to decide \
-         which approach suits your use case best.",
-        SPARSE_INTERVAL
-    );
+//     println!(
+//         "\n=== Summary ===\nSparse interval: {}\nUse this data to decide \
+//          which approach suits your use case best.",
+//         SPARSE_INTERVAL
+//     );
+//     Ok(())
+// }
+
+fn benchmark_storage_performance() -> std::io::Result<()> {
+    // println!("=== WAL-Based Segmented Storage Performance Benchmark ===\n");
+
+    let record_counts = vec![100, 1000, 10000, 100000, 1000000];
+
+    for &num_records in &record_counts {
+        // println!("Testing with {} records", num_records);
+        // println!("──────────────────────────────────────────────────");
+
+        // Configure storage
+        let config = StreamingConfig {
+            max_batch_events: 250_000,
+            max_batch_age_seconds: 1,
+            max_batch_bytes: 100 * 1024 * 1024,
+            sparse_interval: 100,
+            entries_per_index_block: 512,
+            segment_base_path: format!("./benchmark_data_{}", num_records),
+        };
+
+        // Clean up any existing data
+        let _ = std::fs::remove_dir_all(&config.segment_base_path);
+
+        let mut storage = StreamingSegmentedStorage::new(config.clone())?;
+        storage.start_background_flushing();
+
+        // Benchmark writes
+        // println!("Writing {} records...", num_records);
+        let write_start = Instant::now();
+        let mut min_timestamp = u64::MAX;
+        let mut max_timestamp = 0u64;
+
+        for i in 0..num_records {
+            let timestamp =
+                SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_millis() as u64 + i;
+            min_timestamp = min_timestamp.min(timestamp);
+            max_timestamp = max_timestamp.max(timestamp);
+
+            let event = Event {
+                timestamp,
+                subject: (i % 10) as u32,
+                predicate: 1,
+                object: (20 + (i % 10)) as u32,
+                graph: 1,
+            };
+            storage.write(event)?;
+        }
+
+        let write_duration = write_start.elapsed();
+        let _write_throughput = num_records as f64 / write_duration.as_secs_f64();
+
+        // println!("Write Performance:");
+        // println!("  Duration: {:.3}s", write_duration.as_secs_f64());
+        // println!("  Throughput: {:.0} records/sec", write_throughput);
+        // println!("  Timestamp range: {} to {}", min_timestamp, max_timestamp);
+
+        // Benchmark queries immediately after writing (data is still in WAL)
+        let query_ranges = vec![(0.1, "10% of data"), (0.5, "50% of data"), (1.0, "100% of data")];
+
+        // println!("\nQuery Performance:");
+
+        for (fraction, _description) in query_ranges {
+            let query_count = 100.min(num_records / 10); // Run 100 queries or 10% of records, whichever is smaller
+            let mut query_times = Vec::new();
+            let mut total_records_read = 0;
+
+            for q in 0..query_count {
+                // Use a deterministic but varied offset for queries within the actual data range
+                let timestamp_range = max_timestamp - min_timestamp;
+                let start_offset =
+                    (timestamp_range as f64 * fraction * (q as f64 / query_count as f64)) as u64;
+                let query_window = (timestamp_range as f64 * 0.01).max(100.0) as u64; // 1% of data or 100 records minimum
+
+                let start_timestamp = min_timestamp + start_offset;
+                let end_timestamp = (start_timestamp + query_window).min(max_timestamp);
+
+                let query_start = Instant::now();
+                let results = storage.query(start_timestamp, end_timestamp)?;
+                let query_duration = query_start.elapsed();
+
+                total_records_read += results.len();
+                query_times.push(query_duration.as_secs_f64());
+            }
+
+            let avg_query_time = query_times.iter().sum::<f64>() / query_times.len() as f64;
+            let _queries_per_sec = 1.0 / avg_query_time;
+            let total_query_time = query_times.iter().sum::<f64>();
+            let _records_per_sec = if total_query_time > 0.0 {
+                total_records_read as f64 / total_query_time
+            } else {
+                0.0
+            };
+            let _avg_records_per_query = total_records_read as f64 / query_count as f64;
+            let _min_time = query_times.iter().cloned().fold(f64::INFINITY, f64::min);
+            let _max_time = query_times.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
+
+            // println!("  {} queries ({}):", description, query_count);
+            // println!("    Avg query time: {:.3}ms", avg_query_time * 1000.0);
+            // println!("    Query throughput: {:.1} queries/sec", queries_per_sec);
+            // println!("    Read throughput: {:.0} records/sec", records_per_sec);
+            // println!("    Avg records per query: {:.1}", avg_records_per_query);
+            // println!("    Total records read: {}", total_records_read);
+            // println!("    Min/Max time: {:.3}ms / {:.3}ms", min_time * 1000.0, max_time * 1000.0);
+        }
+
+        // Force flush remaining WAL data and shutdown
+        storage.shutdown()?;
+        println!();
+    }
+
+    // println!("Benchmark completed!");
     Ok(())
 }
+
+fn main() -> std::io::Result<()> {
+    // Run the new RDF benchmark
+    benchmark_segmented_storage_rdf()?;
+
+    // println!("\n{}", "=".repeat(50));
+    // println!("Running legacy benchmark for comparison...");
+    // println!("{}", "=".repeat(50));
+
+    // Also run the old benchmark for comparison
+    benchmark_storage_performance()
+}
diff --git a/src/parsing/janusql_parser.rs b/src/parsing/janusql_parser.rs
index da8dafa..100a7ec 100644
--- a/src/parsing/janusql_parser.rs
+++ b/src/parsing/janusql_parser.rs
@@ -10,56 +10,78 @@ pub enum WindowType {
 
 #[derive(Debug, Clone)]
 pub struct WindowDefinition {
+    /// Name of the window
     pub window_name: String,
+    /// Name of the stream
     pub stream_name: String,
+    /// Width of the window
     pub width: u64,
+    /// Slide step
     pub slide: u64,
+    /// Offset for sliding windows
     pub offset: Option<u64>,
+    /// Start time for fixed windows
     pub start: Option<u64>,
+    /// End time for fixed windows
     pub end: Option<u64>,
+    /// Type of the window
     pub window_type: WindowType,
 }
 
+/// R2S operator definition
 #[derive(Debug, Clone)]
 pub struct R2SOperator {
+    /// Operator type
     pub operator: String,
+    /// Operator name
     pub name: String,
 }
 
+/// Parsed JanusQL query structure
 #[derive(Debug)]
 pub struct ParsedJanusQuery {
+    /// R2S operator if present
     pub r2s: Option<R2SOperator>,
+    /// Live windows defined in the query
     pub live_windows: Vec<WindowDefinition>,
+    /// Historical windows defined in the query
     pub historical_windows: Vec<WindowDefinition>,
+    /// RSPQL query string
     pub rspql_query: String,
+    /// SPARQL queries
     pub sparql_queries: Vec<String>,
+    /// Prefix mappings
     pub prefixes: HashMap<String, String>,
+    /// WHERE clause
     pub where_clause: String,
+    /// SELECT clause
     pub select_clause: String,
 }
 
+/// Parser for JanusQL queries
 pub struct JanusQLParser {
-    historical_sliding_window_regex: Regex,
-    historical_fixed_window_regex: Regex,
-    live_sliding_window_regex: Regex,
-    register_regex: Regex,
-    prefix_regex: Regex,
+    historical_sliding_window: Regex,
+    historical_fixed_window: Regex,
+    live_sliding_window: Regex,
+    register: Regex,
+    prefix: Regex,
 }
 
 impl JanusQLParser {
+    /// Creates a new JanusQLParser instance.
     pub fn new() -> Result<Self, Box<dyn std::error::Error>> {
         Ok(JanusQLParser {
-            historical_sliding_window_regex: Regex::new(
+            historical_sliding_window: Regex::new(
                 r"FROM\s+NAMED\s+WINDOW\s+([^\s]+)\s+ON\s+STREAM\s+([^\s]+)\s+\[OFFSET\s+(\d+)\s+RANGE\s+(\d+)\s+STEP\s+(\d+)\]",
             )?,
-            historical_fixed_window_regex: Regex::new(
+            historical_fixed_window: Regex::new(
                 r"FROM\s+NAMED\s+WINDOW\s+([^\s]+)\s+ON\s+STREAM\s+([^\s]+)\s+\[START\s+(\d+)\s+END\s+(\d+)\]",
             )?,
-            live_sliding_window_regex: Regex::new(
+            live_sliding_window: Regex::new(
                 r"FROM\s+NAMED\s+WINDOW\s+([^\s]+)\s+ON\s+STREAM\s+([^\s]+)\s+\[RANGE\s+(\d+)\s+STEP\s+(\d+)\]",
             )?,
-            register_regex: Regex::new(r"REGISTER\s+(\w+)\s+([^\s]+)\s+AS")?,
-            prefix_regex: Regex::new(r"REGISTER\s+(\w+)\s+([^\s]+)\s+AS")?,
+            register: Regex::new(r"REGISTER\s+(\w+)\s+([^\s]+)\s+AS")?,
+            prefix: Regex::new(r"PREFIX\s+([^\s]+):\s*<([^>]+)>")?,
         })
     }
 
@@ -68,7 +90,7 @@ impl JanusQLParser {
         line: &str,
         prefix_mapper: &HashMap<String, String>,
     ) -> Result<Option<WindowDefinition>, Box<dyn std::error::Error>> {
-        if let Some(captures) = self.historical_sliding_window_regex.captures(line) {
+        if let Some(captures) = self.historical_sliding_window.captures(line) {
             return Ok(Some(WindowDefinition {
                 window_name: self.unwrap_iri(&captures[1], prefix_mapper),
                 stream_name: self.unwrap_iri(&captures[2], prefix_mapper),
@@ -81,7 +103,7 @@ impl JanusQLParser {
             }));
         }
 
-        if let Some(captures) = self.historical_fixed_window_regex.captures(line) {
+        if let Some(captures) = self.historical_fixed_window.captures(line) {
             return Ok(Some(WindowDefinition {
                 window_name: self.unwrap_iri(&captures[1], prefix_mapper),
                 stream_name: self.unwrap_iri(&captures[2], prefix_mapper),
@@ -94,7 +116,7 @@ impl JanusQLParser {
             }));
         }
 
-        if let Some(captures) = self.live_sliding_window_regex.captures(line) {
+        if let Some(captures) = self.live_sliding_window.captures(line) {
             return Ok(Some(WindowDefinition {
                 window_name: self.unwrap_iri(&captures[1], prefix_mapper),
                 stream_name: self.unwrap_iri(&captures[2], prefix_mapper),
@@ -110,6 +132,7 @@ impl JanusQLParser {
         Ok(None)
     }
 
+    /// Parses a JanusQL query string.
     pub fn parse(&self, query: &str) -> Result<ParsedJanusQuery, Box<dyn std::error::Error>> {
         let mut parsed = ParsedJanusQuery {
             r2s: None,
@@ -132,7 +155,7 @@ impl JanusQLParser {
 
             if trimmed_line.is_empty()
                 || trimmed_line.starts_with("/*")
-                || trimmed_line.starts_with("*")
+                || trimmed_line.starts_with('*')
                 || trimmed_line.starts_with("*/")
             {
                 if in_where_clause && !trimmed_line.is_empty() {
@@ -142,14 +165,14 @@ impl JanusQLParser {
             }
 
             if trimmed_line.starts_with("REGISTER") {
-                if let Some(captures) = self.register_regex.captures(trimmed_line) {
+                if let Some(captures) = self.register.captures(trimmed_line) {
                     let operator = captures.get(1).unwrap().as_str().to_string();
                     let name_raw = captures.get(2).unwrap().as_str();
                     let name = self.unwrap_iri(name_raw, &parsed.prefixes);
                     parsed.r2s = Some(R2SOperator { operator, name });
                 }
             } else if trimmed_line.starts_with("PREFIX") {
-                if let Some(captures) = self.prefix_regex.captures(trimmed_line) {
+                if let Some(captures) = self.prefix.captures(trimmed_line) {
                     let prefix = captures.get(1).unwrap().as_str().to_string();
                     let namespace = captures.get(2).unwrap().as_str().to_string();
                     parsed.prefixes.insert(prefix, namespace);
@@ -287,7 +310,7 @@ impl JanusQLParser {
                     adapted
                 }
             }
-            _ => adapted,
+            WindowType::Live => adapted,
         }
     }
 
@@ -333,7 +356,7 @@ mod tests {
     #[test]
     fn test_basic_live_window() {
         let parser = JanusQLParser::new().unwrap();
-        let query = r#"
+        let query = r"
         PREFIX sensor: <https://rsp.js/sensors/>
         PREFIX saref: <https://saref.org/core/>
         REGISTER RStream sensor:output AS
@@ -345,7 +368,7 @@ mod tests {
                 ?event saref:hasTimestamp ?timestamp .
             }
         }
-        "#;
+        ";
 
         let result = parser.parse(query).unwrap();
         assert_eq!(result.live_windows.len(), 1);
@@ -356,9 +379,9 @@ mod tests {
     }
 
     #[test]
-    fn test_mixed_windows(){
+    fn test_mixed_windows() {
         let parser = JanusQLParser::new().unwrap();
-        let query = r#"
+        let query = r"
         PREFIX sensor: <https://rsp.js/sensors/>
         PREFIX saref: <https://saref.org/core/>
         REGISTER RStream sensor:output AS
@@ -380,16 +403,16 @@ mod tests {
                 ?event saref:hasTimestamp ?timestamp .
             }
         }
-        "#;
+        ";
 
         let result = parser.parse(query).unwrap();
         assert_eq!(result.live_windows.len(), 1);
         assert_eq!(result.historical_windows.len(), 2);
         assert_eq!(result.live_windows[0].width, 5000);
         assert_eq!(result.live_windows[0].slide, 1000);
-        assert_eq!(result.historical_windows[0].start, Some(1622505600));
-        assert_eq!(result.historical_windows[0].end, Some(1622592000));
-        assert_eq!(result.historical_windows[1].offset, Some(1622505600));
+        assert_eq!(result.historical_windows[0].start, Some(1_622_505_600));
+        assert_eq!(result.historical_windows[0].end, Some(1_622_592_000));
+        assert_eq!(result.historical_windows[1].offset, Some(1_622_505_600));
         assert_eq!(result.historical_windows[1].width, 10000);
         assert_eq!(result.historical_windows[1].slide, 2000);
         assert!(!result.rspql_query.is_empty());
diff --git a/src/indexing/dense.rs b/src/storage/indexing/dense.rs
similarity index 91%
rename from src/indexing/dense.rs
rename to src/storage/indexing/dense.rs
index 001998c..d2a904e 100644
--- a/src/indexing/dense.rs
+++ b/src/storage/indexing/dense.rs
@@ -1,4 +1,7 @@
-use crate::indexing::shared::{decode_record, Event, RECORD_SIZE};
+use crate::core::{
+    encoding::{decode_record, RECORD_SIZE},
+    Event,
+};
 use std::fs::File;
 use std::io::{Read, Seek, SeekFrom, Write};
 #[doc = ""]
@@ -27,20 +30,16 @@ impl DenseIndexBuilder {
     }
 }
 
-
-#[doc=""]
-pub fn build_dense_index(
-    log_path: &str,
-    index_path: &str,
-) -> std::io::Result<()> {
+#[doc = ""]
+pub fn build_dense_index(log_path: &str, index_path: &str) -> std::io::Result<()> {
     let mut log = File::open(log_path)?;
     let mut builder = DenseIndexBuilder::create(index_path)?;
 
     let mut offset = 0u64;
     let mut record = [0u8; RECORD_SIZE];
 
-    while log.read_exact(&mut record).is_ok(){
-        let (timestamp, _, _ , _, _ ) = decode_record(&record);
+    while log.read_exact(&mut record).is_ok() {
+        let (timestamp, _, _, _, _) = decode_record(&record);
         builder.add_entry(timestamp, offset)?;
         offset += RECORD_SIZE as u64;
     }
diff --git a/src/storage/indexing/dictionary.rs b/src/storage/indexing/dictionary.rs
new file mode 100644
index 0000000..1308f8d
--- /dev/null
+++ b/src/storage/indexing/dictionary.rs
@@ -0,0 +1,171 @@
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::{Read, Write};
+use std::path::Path;
+
+use bincode;
+use serde::{Deserialize, Serialize};
+
+use crate::core::Event;
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Dictionary {
+    pub string_to_id: HashMap<String, u32>,
+    pub id_to_uri: HashMap<u32, String>,
+    pub next_id: u32,
+}
+
+impl Dictionary {
+    pub fn new() -> Self {
+        Dictionary { string_to_id: HashMap::new(), id_to_uri: HashMap::new(), next_id: 0 }
+    }
+
+    pub fn encode(&mut self, value: &str) -> u32 {
+        if let Some(&id) = self.string_to_id.get(value) {
+            id
+        } else {
+            let id = self.next_id;
+            self.string_to_id.insert(value.to_string(), id);
+            self.id_to_uri.insert(id, value.to_string());
+            self.next_id += 1;
+            id
+        }
+    }
+
+    pub fn decode(&self, id: u32) -> Option<&str> {
+        self.id_to_uri.get(&id).map(|s| s.as_str())
+    }
+
+    pub fn save_to_file(&self, path: &Path) -> std::io::Result<()> {
+        let encoded = bincode::serialize(self)
+            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
+        let mut file = File::create(path)?;
+        file.write_all(&encoded)?;
+        Ok(())
+    }
+
+    pub fn load_from_file(path: &Path) -> std::io::Result<Self> {
+        let mut file = File::open(path)?;
+        let mut buffer = Vec::new();
+        file.read_to_end(&mut buffer)?;
+        let dict: Dictionary = bincode::deserialize(&buffer)
+            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
+        Ok(dict)
+    }
+
+    pub fn decode_graph(&self, event: &Event) -> String {
+        let subject = self.decode(event.subject).unwrap_or("unknown");
+        let predicate = self.decode(event.predicate).unwrap_or("unknown");
+        let object = self.decode(event.object).unwrap_or("unknown");
+        let graph = self.decode(event.graph).unwrap_or("unknown");
+
+        format!(
+            "<(<{}>, <{}>, <{}>, <{}>), {}>",
+            subject, predicate, object, graph, event.timestamp
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::core::Event;
+
+    #[test]
+    fn test_dictionary_encoding_decoding() {
+        let mut dict = Dictionary::new();
+
+        // Encode some RDF terms
+        let subject_id = dict.encode("http://example.org/person/Alice");
+        let predicate_id = dict.encode("http://example.org/knows");
+        let object_id = dict.encode("http://example.org/person/Bob");
+        let graph_id = dict.encode("http://example.org/graph1");
+
+        println!("Encoded IDs:");
+        println!("Subject: http://example.org/person/Alice -> {}", subject_id);
+        println!("Predicate: http://example.org/knows -> {}", predicate_id);
+        println!("Object: http://example.org/person/Bob -> {}", object_id);
+        println!("Graph: http://example.org/graph1 -> {}", graph_id);
+
+        // Create an event
+        let event = Event {
+            timestamp: 1_234_567_890,
+            subject: subject_id,
+            predicate: predicate_id,
+            object: object_id,
+            graph: graph_id,
+        };
+
+        // Decode the event
+        let decoded = dict.decode_graph(&event);
+        println!("\nDecoded event: {}", decoded);
+
+        // Verify individual decodings
+        assert_eq!(dict.decode(subject_id), Some("http://example.org/person/Alice"));
+        assert_eq!(dict.decode(predicate_id), Some("http://example.org/knows"));
+        assert_eq!(dict.decode(object_id), Some("http://example.org/person/Bob"));
+        assert_eq!(dict.decode(graph_id), Some("http://example.org/graph1"));
+
+        // Test that the decoded string contains the expected format
+        assert!(decoded.contains("http://example.org/person/Alice"));
+        assert!(decoded.contains("http://example.org/knows"));
+        assert!(decoded.contains("http://example.org/person/Bob"));
+        assert!(decoded.contains("http://example.org/graph1"));
+        assert!(decoded.contains("1234567890"));
+    }
+
+    #[test]
+    fn test_clean_rdf_api() {
+        use crate::core::RDFEvent;
+
+        let mut dict = Dictionary::new();
+
+        // Test the clean API - user provides URIs directly
+        let rdf_event = RDFEvent::new(
+            1_234_567_890,
+            "http://example.org/person/Alice",
+            "http://example.org/knows",
+            "http://example.org/person/Bob",
+            "http://example.org/graph1",
+        );
+
+        // Encoding happens internally
+        let encoded_event = rdf_event.encode(&mut dict);
+
+        // Decoding happens internally
+        let decoded_event = encoded_event.decode(&dict);
+
+        // Verify the round-trip works
+        assert_eq!(decoded_event.subject, "http://example.org/person/Alice");
+        assert_eq!(decoded_event.predicate, "http://example.org/knows");
+        assert_eq!(decoded_event.object, "http://example.org/person/Bob");
+        assert_eq!(decoded_event.graph, "http://example.org/graph1");
+        assert_eq!(decoded_event.timestamp, 1_234_567_890);
+
+        println!("Clean API test passed!");
+        println!(
+            "Original: {} {} {} in {} at timestamp {}",
+            rdf_event.subject,
+            rdf_event.predicate,
+            rdf_event.object,
+            rdf_event.graph,
+            rdf_event.timestamp
+        );
+        println!(
+            "Encoded IDs: {} {} {} {} at timestamp {}",
+            encoded_event.subject,
+            encoded_event.predicate,
+            encoded_event.object,
+            encoded_event.graph,
+            encoded_event.timestamp
+        );
+        println!(
+            "Decoded: {} {} {} in {} at timestamp {}",
+            decoded_event.subject,
+            decoded_event.predicate,
+            decoded_event.object,
+            decoded_event.graph,
+            decoded_event.timestamp
+        );
+    }
+}
diff --git a/src/storage/indexing/sparse.rs b/src/storage/indexing/sparse.rs
new file mode 100644
index 0000000..3169e9f
--- /dev/null
+++ b/src/storage/indexing/sparse.rs
@@ -0,0 +1,283 @@
+use crate::core::{
+    encoding::{decode_record, RECORD_SIZE},
+    Event, RDFEvent,
+};
+use crate::storage::indexing::dictionary::Dictionary;
+use std::fs::File;
+use std::io::{Read, Seek, SeekFrom, Write};
+use std::path::Path;
+
+/// Builder for creating sparse indexes that store only periodic entries.
+///
+/// A sparse index reduces storage space by indexing only every Nth record,
+/// trading some query precision for significant space savings.
+#[doc = ""]
+pub struct SparseIndexBuilder {
+    index_file: File,
+    interval: usize,
+}
+#[doc = ""]
+impl SparseIndexBuilder {
+    /// Creates a new sparse index builder that writes to the specified file.
+    ///
+    /// # Arguments
+    /// * `index_path` - Path where the index file will be created
+    /// * `interval` - Number of records between index entries (e.g., 1000 means index every 1000th record)
+    ///
+    /// # Returns
+    /// A new `SparseIndexBuilder` instance or an I/O error
+    #[doc = ""]
+    pub fn create(index_path: &str, interval: usize) -> std::io::Result<Self> {
+        let index_file = File::create(index_path)?;
+        Ok(Self { index_file, interval })
+    }
+
+    /// Adds an entry to the sparse index if the record count matches the interval.
+    ///
+    /// Only records where `record_count % interval == 0` are indexed to save space.
+    ///
+    /// # Arguments
+    /// * `record_count` - The current record number in the log
+    /// * `timestamp` - Timestamp of the record
+    /// * `offset` - Byte offset of the record in the log file
+    ///
+    /// # Returns
+    /// `true` if the entry was added to the index, `false` if skipped
+    #[doc = ""]
+    pub fn add_entry(
+        &mut self,
+        record_count: u64,
+        timestamp: u64,
+        offset: u64,
+    ) -> std::io::Result<bool> {
+        if record_count % self.interval as u64 == 0 {
+            self.index_file.write_all(&timestamp.to_be_bytes())?;
+            self.index_file.write_all(&offset.to_be_bytes())?;
+            Ok(true)
+        } else {
+            Ok(false)
+        }
+    }
+
+    /// Finalizes the index by flushing any buffered writes to disk.
+    ///
+    /// This should be called after all entries have been added.
+    #[doc = ""]
+    pub fn finalize(&mut self) -> std::io::Result<()> {
+        self.index_file.flush()
+    }
+}
+
+/// Builds a sparse index for an existing log file.
+///
+/// This function reads through the entire log file and creates an index
+/// with entries only for records at the specified interval.
+///
+/// # Arguments
+/// * `log_path` - Path to the log file to index
+/// * `index_path` - Path where the index file will be created
+/// * `interval` - Number of records between index entries
+///
+/// # Returns
+/// Ok(()) on success, or an I/O error
+pub fn build_sparse_index(
+    log_path: &str,
+    index_path: &str,
+    interval: &usize,
+) -> std::io::Result<()> {
+    let mut log = File::open(log_path)?;
+    let mut builder = SparseIndexBuilder::create(index_path, *interval)?;
+
+    let mut offset = 0u64;
+    let mut record_count = 0u64;
+    let mut record = [0u8; RECORD_SIZE];
+
+    while log.read_exact(&mut record).is_ok() {
+        let (timestamp, _, _, _, _) = decode_record(&record);
+        builder.add_entry(record_count, timestamp, offset)?;
+        offset += RECORD_SIZE as u64;
+        record_count += 1;
+    }
+
+    builder.finalize()?;
+    Ok(())
+}
+
+/// Builds a sparse index and initializes an empty dictionary.
+///
+/// This is a convenience function that creates both the index and
+/// an empty dictionary file. The dictionary can be populated separately
+/// when processing RDF data.
+///
+/// # Arguments
+/// * `log_path` - Path to the log file to index
+/// * `index_path` - Path where the index file will be created
+/// * `dictionary_path` - Path where the dictionary file will be created
+/// * `interval` - Number of records between index entries
+///
+/// # Returns
+/// Ok(()) on success, or an I/O error
+pub fn build_sparse_index_with_dictionary(
+    log_path: &str,
+    index_path: &str,
+    dictionary_path: &str,
+    interval: &usize,
+) -> std::io::Result<()> {
+    let mut log = File::open(log_path)?;
+    let mut builder = SparseIndexBuilder::create(index_path, *interval)?;
+    let dictionary = Dictionary::new();
+
+    let mut offset = 0u64;
+    let mut record_count = 0u64;
+    let mut record = [0u8; RECORD_SIZE];
+
+    while log.read_exact(&mut record).is_ok() {
+        let (timestamp, _subject, _predicate, _object, _graph) = decode_record(&record);
+
+        builder.add_entry(record_count, timestamp, offset)?;
+
+        offset += RECORD_SIZE as u64;
+        record_count += 1;
+    }
+
+    builder.finalize()?;
+    dictionary.save_to_file(Path::new(dictionary_path))?;
+
+    Ok(())
+}
+
+/// Reader for sparse indexes that enables efficient timestamp-based queries.
+///
+/// The sparse reader loads the entire index into memory for fast binary search,
+/// then performs sequential scans of the log file starting from the appropriate position.
+pub struct SparseReader {
+    index: Vec<(u64, u64)>,
+    #[allow(dead_code)]
+    interval: usize,
+}
+
+impl SparseReader {
+    /// Opens a sparse index and its associated dictionary.
+    ///
+    /// # Arguments
+    /// * `index_path` - Path to the sparse index file
+    /// * `dictionary_path` - Path to the dictionary file
+    /// * `interval` - The interval used when building the index
+    ///
+    /// # Returns
+    /// A tuple of (SparseReader, Dictionary) or an I/O error
+    pub fn open_with_dictionary(
+        index_path: &str,
+        dictionary_path: &str,
+        interval: usize,
+    ) -> std::io::Result<(Self, Dictionary)> {
+        let reader = Self::open(index_path, interval)?;
+        let dictionary = Dictionary::load_from_file(Path::new(dictionary_path))?;
+        Ok((reader, dictionary))
+    }
+    /// Queries the log and returns results with URIs resolved from the dictionary.
+    ///
+    /// This method performs the same query as `query()` but resolves all numeric IDs
+    /// back to their original URI strings using the provided dictionary.
+    ///
+    /// # Arguments
+    /// * `log_path` - Path to the log file
+    /// * `dict` - Dictionary for resolving IDs to URIs
+    /// * `timestamp_start_bound` - Minimum timestamp (inclusive)
+    /// * `timestamp_end_bound` - Maximum timestamp (inclusive)
+    ///
+    /// # Returns
+    /// Vector of resolved events or an I/O error
+    pub fn query_resolved(
+        &self,
+        log_path: &str,
+        dict: &Dictionary,
+        timestamp_start_bound: u64,
+        timestamp_end_bound: u64,
+    ) -> std::io::Result<Vec<RDFEvent>> {
+        let events = self.query(log_path, timestamp_start_bound, timestamp_end_bound)?;
+        Ok(events.into_iter().map(|e| e.decode(dict)).collect())
+    }
+
+    /// Opens a sparse index file and loads it into memory.
+    ///
+    /// # Arguments
+    /// * `index_path` - Path to the sparse index file
+    /// * `interval` - The interval used when building the index
+    ///
+    /// # Returns
+    /// A new SparseReader instance or an I/O error
+    pub fn open(index_path: &str, interval: usize) -> std::io::Result<Self> {
+        let mut index_file = File::open(index_path)?;
+        let mut index = Vec::new();
+        let mut entry = [0u8; 16];
+
+        while index_file.read_exact(&mut entry).is_ok() {
+            let timestamp = u64::from_be_bytes(entry[0..8].try_into().unwrap());
+            let offset = u64::from_be_bytes(entry[8..16].try_into().unwrap());
+
+            index.push((timestamp, offset));
+        }
+        Ok(Self { index, interval })
+    }
+
+    /// Queries the log file for events within the specified timestamp range.
+    ///
+    /// Uses binary search on the index to find the starting position, then
+    /// performs a sequential scan of the log file to collect matching events.
+    ///
+    /// # Arguments
+    /// * `log_path` - Path to the log file
+    /// * `timestamp_start_bound` - Minimum timestamp (inclusive)
+    /// * `timestamp_end_bound` - Maximum timestamp (inclusive)
+    ///
+    /// # Returns
+    /// Vector of events with numeric IDs or an I/O error
+    pub fn query(
+        &self,
+        log_path: &str,
+        timestamp_start_bound: u64,
+        timestamp_end_bound: u64,
+    ) -> std::io::Result<Vec<Event>> {
+        if timestamp_start_bound > timestamp_end_bound {
+            return Ok(Vec::new());
+        }
+
+        if self.index.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        let position = self
+            .index
+            .binary_search_by_key(&timestamp_start_bound, |x| x.0)
+            .unwrap_or_else(|i| i.saturating_sub(1));
+
+        let mut log = File::open(log_path)?;
+        log.seek(SeekFrom::Start(self.index[position].1))?;
+
+        let mut results = Vec::new();
+        let mut record = [0u8; RECORD_SIZE];
+
+        while log.read_exact(&mut record).is_ok() {
+            let (timestamp, subject, predicate, object, graph) = decode_record(&record);
+
+            if timestamp > timestamp_end_bound {
+                break;
+            }
+
+            if timestamp >= timestamp_start_bound {
+                results.push(Event { timestamp, subject, predicate, object, graph });
+            }
+        }
+
+        Ok(results)
+    }
+
+    /// Returns the size of the index in bytes.
+    ///
+    /// Each index entry is 16 bytes (8 bytes timestamp + 8 bytes offset),
+    /// so this returns `index.len() * 16`.
+    pub fn index_size_bytes(&self) -> usize {
+        self.index.len() * 16
+    }
+}
diff --git a/src/storage/memory_tracker.rs b/src/storage/memory_tracker.rs
new file mode 100644
index 0000000..e63f4a0
--- /dev/null
+++ b/src/storage/memory_tracker.rs
@@ -0,0 +1,239 @@
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+
+/// Memory usage tracker for benchmarking purposes
+#[derive(Debug, Clone)]
+pub struct MemoryTracker {
+    peak_memory_bytes: Arc<AtomicUsize>,
+    current_memory_bytes: Arc<AtomicUsize>,
+    measurements: Arc<std::sync::Mutex<Vec<MemoryMeasurement>>>,
+}
+
+#[derive(Debug, Clone)]
+pub struct MemoryMeasurement {
+    pub timestamp: std::time::Instant,
+    pub memory_bytes: usize,
+    pub description: String,
+}
+
+#[derive(Debug)]
+pub struct MemoryStats {
+    pub current_bytes: usize,
+    pub peak_bytes: usize,
+    pub total_measurements: usize,
+    pub avg_bytes: f64,
+    pub measurements: Vec<MemoryMeasurement>,
+}
+
+impl MemoryTracker {
+    pub fn new() -> Self {
+        Self {
+            peak_memory_bytes: Arc::new(AtomicUsize::new(0)),
+            current_memory_bytes: Arc::new(AtomicUsize::new(0)),
+            measurements: Arc::new(std::sync::Mutex::new(Vec::new())),
+        }
+    }
+
+    /// Record current memory usage with a description
+    pub fn record(&self, description: &str) {
+        let current = self.estimate_current_memory();
+        self.current_memory_bytes.store(current, Ordering::Relaxed);
+
+        // Update peak if necessary
+        let peak = self.peak_memory_bytes.load(Ordering::Relaxed);
+        if current > peak {
+            self.peak_memory_bytes.store(current, Ordering::Relaxed);
+        }
+
+        // Store measurement
+        let measurement = MemoryMeasurement {
+            timestamp: std::time::Instant::now(),
+            memory_bytes: current,
+            description: description.to_string(),
+        };
+
+        if let Ok(mut measurements) = self.measurements.lock() {
+            measurements.push(measurement);
+        }
+    }
+
+    /// Get current memory statistics
+    #[allow(clippy::cast_precision_loss)]
+    pub fn get_stats(&self) -> MemoryStats {
+        let current = self.current_memory_bytes.load(Ordering::Relaxed);
+        let peak = self.peak_memory_bytes.load(Ordering::Relaxed);
+
+        let measurements = if let Ok(m) = self.measurements.lock() {
+            m.clone()
+        } else {
+            Vec::new()
+        };
+
+        let avg_bytes = if measurements.is_empty() {
+            0.0
+        } else {
+            measurements.iter().map(|m| m.memory_bytes as f64).sum::<f64>()
+                / measurements.len() as f64
+        };
+
+        MemoryStats {
+            current_bytes: current,
+            peak_bytes: peak,
+            total_measurements: measurements.len(),
+            avg_bytes,
+            measurements,
+        }
+    }
+
+    /// Reset all measurements
+    pub fn reset(&self) {
+        self.current_memory_bytes.store(0, Ordering::Relaxed);
+        self.peak_memory_bytes.store(0, Ordering::Relaxed);
+        if let Ok(mut measurements) = self.measurements.lock() {
+            measurements.clear();
+        }
+    }
+
+    /// Estimate current memory usage of the process
+    fn estimate_current_memory(&self) -> usize {
+        // On macOS/Linux, try to read from /proc/self/status or use system calls
+        #[cfg(target_os = "macos")]
+        {
+            // For macOS, try using sysctl first, then fallback to basic estimation
+            match self.get_memory_macos_simple() {
+                Ok(mem) if mem > 0 => mem,
+                _ => self.estimate_heap_usage(),
+            }
+        }
+        #[cfg(target_os = "linux")]
+        {
+            match self.get_memory_linux() {
+                mem if mem > 0 => mem,
+                _ => self.estimate_heap_usage(),
+            }
+        }
+        #[cfg(not(any(target_os = "macos", target_os = "linux")))]
+        {
+            // Fallback: estimate based on heap allocation (rough approximation)
+            self.estimate_heap_usage()
+        }
+    }
+
+    /// Simple heap usage estimation (very rough)
+    fn estimate_heap_usage(&self) -> usize {
+        // This is a very rough estimation based on typical memory patterns
+        // In a real implementation, you might use a memory allocator that tracks usage
+
+        // Rough estimation: assume we're using around 50-100MB for a typical session
+        // This is obviously very imprecise but gives us something to work with
+        let estimated_base = 50 * 1024 * 1024; // 50MB base
+
+        // Add some dynamic component based on time (simulating growth)
+        let dynamic_component = (std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs()
+            % 1000) as usize
+            * 1024; // Up to 1MB variation
+
+        estimated_base + dynamic_component
+    }
+
+    #[cfg(target_os = "macos")]
+    fn get_memory_macos_simple(&self) -> Result<usize, Box<dyn std::error::Error>> {
+        // Try using ps command as a fallback
+        use std::process::Command;
+
+        let output = Command::new("ps")
+            .args(&["-o", "rss=", "-p", &std::process::id().to_string()])
+            .output()?;
+
+        if output.status.success() {
+            let rss_str = std::str::from_utf8(&output.stdout)?;
+            let rss_kb: usize = rss_str.trim().parse()?;
+            Ok(rss_kb * 1024) // Convert KB to bytes
+        } else {
+            Err("ps command failed".into())
+        }
+    }
+
+    #[cfg(target_os = "macos")]
+    #[allow(dead_code)]
+    fn get_memory_macos(&self) -> usize {
+        use std::mem;
+        use std::ptr;
+
+        #[repr(C)]
+        struct TaskBasicInfo {
+            virtual_size: u32,
+            resident_size: u32,
+            policy: u32,
+            flags: u32,
+        }
+
+        extern "C" {
+            #[allow(dead_code)]
+            fn mach_task_self() -> u32;
+            #[allow(dead_code)]
+            fn task_info(
+                target_task: u32,
+                flavor: u32,
+                task_info_out: *mut TaskBasicInfo,
+                task_info_outCnt: *mut u32,
+            ) -> i32;
+        }
+
+        const TASK_BASIC_INFO: u32 = 5;
+        let mut info: TaskBasicInfo = unsafe { mem::zeroed() };
+        let mut count = (mem::size_of::<TaskBasicInfo>() / mem::size_of::<u32>()) as u32;
+
+        let result =
+            unsafe { task_info(mach_task_self(), TASK_BASIC_INFO, &raw mut info, &raw mut count) };
+
+        if result == 0 {
+            info.resident_size as usize
+        } else {
+            0
+        }
+    }
+
+    #[cfg(target_os = "linux")]
+    fn get_memory_linux(&self) -> usize {
+        use std::fs;
+
+        if let Ok(contents) = fs::read_to_string("/proc/self/status") {
+            for line in contents.lines() {
+                if line.starts_with("VmRSS:") {
+                    let parts: Vec<&str> = line.split_whitespace().collect();
+                    if parts.len() >= 2 {
+                        if let Ok(kb) = parts[1].parse::<usize>() {
+                            return kb * 1024; // Convert KB to bytes
+                        }
+                    }
+                }
+            }
+        }
+        0
+    }
+
+    /// Format bytes in human-readable format
+    #[allow(clippy::cast_precision_loss)]
+    pub fn format_bytes(bytes: usize) -> String {
+        const UNITS: &[&str] = &["B", "KB", "MB", "GB"];
+        let mut size = bytes as f64;
+        let mut unit_index = 0;
+
+        while size >= 1024.0 && unit_index < UNITS.len() - 1 {
+            size /= 1024.0;
+            unit_index += 1;
+        }
+
+        format!("{:.2} {}", size, UNITS[unit_index])
+    }
+}
+
+impl Default for MemoryTracker {
+    fn default() -> Self {
+        Self::new()
+    }
+}
diff --git a/src/storage/mod.rs b/src/storage/mod.rs
new file mode 100644
index 0000000..b96e7ca
--- /dev/null
+++ b/src/storage/mod.rs
@@ -0,0 +1,8 @@
+pub mod memory_tracker;
+pub mod segmented_storage;
+pub mod util;
+pub mod indexing {
+    pub mod dense;
+    pub mod dictionary;
+    pub mod sparse;
+}
diff --git a/src/storage/segmented_storage.rs b/src/storage/segmented_storage.rs
new file mode 100644
index 0000000..a2215a3
--- /dev/null
+++ b/src/storage/segmented_storage.rs
@@ -0,0 +1,717 @@
+use std::{
+    collections::VecDeque,
+    io::{BufWriter, Read, Seek, SeekFrom, Write},
+    rc::Rc,
+    sync::{Arc, Mutex, RwLock},
+    thread::JoinHandle,
+    time::{Duration, SystemTime, UNIX_EPOCH},
+};
+
+use crate::{
+    core::{
+        encoding::{decode_record, encode_record, RECORD_SIZE},
+        Event, RDFEvent,
+    },
+    storage::{
+        indexing::dictionary::Dictionary,
+        util::{BatchBuffer, EnhancedSegmentMetadata, IndexBlock, StreamingConfig},
+    },
+};
+
+#[doc = "Struct for the Implementation of the Segmented Storage of RDF Streams."]
+pub struct StreamingSegmentedStorage {
+    batch_buffer: Arc<RwLock<BatchBuffer>>,
+    segments: Arc<RwLock<Vec<EnhancedSegmentMetadata>>>,
+    dictionary: Rc<RwLock<Dictionary>>,
+    flush_handle: Option<JoinHandle<()>>,
+    shutdown_signal: Arc<Mutex<bool>>,
+    config: StreamingConfig,
+}
+
+impl StreamingSegmentedStorage {
+    #[doc = ""]
+    pub fn new(config: StreamingConfig) -> std::io::Result<Self> {
+        std::fs::create_dir_all(&config.segment_base_path)?;
+
+        let storage = Self {
+            batch_buffer: Arc::new(RwLock::new(BatchBuffer {
+                events: VecDeque::new(),
+                total_bytes: 0,
+                oldest_timestamp_bound: None,
+                newest_timestamp_bound: None,
+            })),
+
+            segments: Arc::new(RwLock::new(Vec::new())),
+            dictionary: Rc::new(RwLock::new(Dictionary::new())),
+            flush_handle: None,
+            shutdown_signal: Arc::new(Mutex::new(false)),
+            config,
+        };
+        storage.load_existing_segments()?;
+        Ok(storage)
+    }
+
+    #[doc = ""]
+    pub fn start_background_flushing(&mut self) {
+        let batch_buffer_clone = Arc::clone(&self.batch_buffer);
+        let segments_clone = Arc::clone(&self.segments);
+        let shutdown_clone = Arc::clone(&self.shutdown_signal);
+        let config_clone = self.config.clone();
+
+        let handle = std::thread::spawn(move || {
+            Self::background_flush_loop(
+                batch_buffer_clone,
+                segments_clone,
+                shutdown_clone,
+                config_clone,
+            );
+        });
+
+        self.flush_handle = Some(handle);
+    }
+
+    pub fn write(&self, event: Event) -> std::io::Result<()> {
+        let event_size = std::mem::size_of::<Event>();
+
+        {
+            let mut batch_buffer = self.batch_buffer.write().unwrap();
+
+            if batch_buffer.oldest_timestamp_bound.is_none() {
+                batch_buffer.oldest_timestamp_bound = Some(event.timestamp);
+            }
+
+            batch_buffer.newest_timestamp_bound = Some(event.timestamp);
+
+            batch_buffer.total_bytes += event_size;
+
+            batch_buffer.events.push_back(event);
+        }
+        // Note: Synchronous flushing removed for high throughput.
+        // Background thread handles all flushing based on time limits.
+        Ok(())
+    }
+
+    /// User-friendly API: Write RDF data directly with URI strings
+    pub fn write_rdf(
+        &self,
+        timestamp: u64,
+        subject: &str,
+        predicate: &str,
+        object: &str,
+        graph: &str,
+    ) -> std::io::Result<()> {
+        let rdf_event = RDFEvent::new(timestamp, subject, predicate, object, graph);
+        let encoded_event = {
+            let mut dict = self.dictionary.write().unwrap();
+            rdf_event.encode(&mut dict)
+        };
+        self.write(encoded_event)
+    }
+
+    #[allow(dead_code)]
+    fn should_flush(&self) -> bool {
+        let batch_buffer = self.batch_buffer.read().unwrap();
+
+        batch_buffer.events.len() >= self.config.max_batch_events.try_into().unwrap()
+            || batch_buffer.total_bytes > self.config.max_batch_bytes
+            || batch_buffer.oldest_timestamp_bound.map_or(false, |oldest| {
+                let current_timestamp = Self::current_timestamp();
+
+                // Use saturating subtraction to avoid underflow if oldest > current_timestamp
+                current_timestamp.saturating_sub(oldest)
+                    >= self.config.max_batch_age_seconds * 1_000
+            })
+    }
+
+    fn current_timestamp() -> u64 {
+        SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_millis() as u64
+    }
+
+    fn flush_batch_buffer_to_segment(&self) -> std::io::Result<()> {
+        // Automatically extract events from the batch buffer.
+
+        let events_to_flush = {
+            let mut batch_buffer = self.batch_buffer.write().unwrap();
+            if batch_buffer.events.is_empty() {
+                return Ok(());
+            }
+
+            let events: Vec<Event> = batch_buffer.events.drain(..).collect();
+
+            batch_buffer.total_bytes = 0;
+            batch_buffer.oldest_timestamp_bound = None;
+            batch_buffer.newest_timestamp_bound = None;
+            events
+        };
+
+        let segment = self.create_segment_with_two_level_index(events_to_flush)?;
+
+        {
+            let mut segments = self.segments.write().unwrap();
+            segments.push(segment);
+        }
+        Ok(())
+    }
+
+    fn create_segment_with_two_level_index(
+        &self,
+        mut events: Vec<Event>,
+    ) -> std::io::Result<EnhancedSegmentMetadata> {
+        events.sort_by_key(|e| e.timestamp);
+
+        let segment_id = Self::generate_segment_id();
+
+        let data_path = format!("{}/segment-{}.log", self.config.segment_base_path, segment_id);
+        let index_path = format!("{}/segment-{}.idx", self.config.segment_base_path, segment_id);
+
+        let mut data_file = BufWriter::new(std::fs::File::create(&data_path)?);
+        let mut index_file = BufWriter::new(std::fs::File::create(&index_path)?);
+
+        let mut index_directory = Vec::new();
+        let mut current_block_entries = Vec::new();
+
+        let mut current_block_min_ts = None;
+        let mut current_block_max_ts = 0u64;
+
+        let mut data_offset = 0u64;
+
+        for (record_count, event) in events.iter().enumerate() {
+            let record_bytes = self.serialize_event_to_fixed_size(event);
+            data_file.write_all(&record_bytes)?;
+
+            if record_count % self.config.sparse_interval == 0 {
+                let sparse_entry = (event.timestamp, data_offset);
+
+                if current_block_min_ts.is_none() {
+                    current_block_min_ts = Some(event.timestamp);
+                }
+
+                current_block_max_ts = event.timestamp;
+                current_block_entries.push(sparse_entry);
+
+                if current_block_entries.len() >= self.config.entries_per_index_block {
+                    let block_metadata = self.flush_index_block(
+                        &mut index_file,
+                        &current_block_entries,
+                        current_block_min_ts.unwrap(),
+                        current_block_max_ts,
+                    )?;
+
+                    index_directory.push(block_metadata);
+
+                    current_block_entries.clear();
+                    current_block_min_ts = None;
+                }
+            }
+            data_offset += record_bytes.len() as u64;
+        }
+
+        if !current_block_entries.is_empty() {
+            let block_metadata = self.flush_index_block(
+                &mut index_file,
+                &current_block_entries,
+                current_block_min_ts.unwrap(),
+                current_block_max_ts,
+            )?;
+
+            index_directory.push(block_metadata);
+        }
+
+        data_file.flush()?;
+        index_file.flush()?;
+
+        Ok(EnhancedSegmentMetadata {
+            start_timstamp: events.first().unwrap().timestamp,
+            end_timestamp: events.last().unwrap().timestamp,
+            data_path,
+            index_path,
+            record_count: events.len() as u64,
+            index_directory,
+        })
+    }
+
+    fn flush_index_block(
+        &self,
+        index_file: &mut BufWriter<std::fs::File>,
+        entries: &[(u64, u64)],
+        min_ts: u64,
+        max_ts: u64,
+    ) -> std::io::Result<IndexBlock> {
+        Self::flush_index_block_static(index_file, entries, min_ts, max_ts)
+    }
+
+    pub fn query(&self, start_timestamp: u64, end_timestamp: u64) -> std::io::Result<Vec<Event>> {
+        let mut results = Vec::new();
+
+        // First try to query the immediate batch buffer which has the fastest visibility.
+
+        {
+            let batch_buffer = self.batch_buffer.read().unwrap();
+
+            for event in &batch_buffer.events {
+                if event.timestamp >= start_timestamp && event.timestamp <= end_timestamp {
+                    results.push(event.clone());
+                }
+            }
+        }
+
+        // Then querying the relevant segment with a two level indexing
+
+        {
+            let segments = self.segments.read().unwrap();
+
+            for segment in segments.iter() {
+                if self.segment_overlaps(segment, start_timestamp, end_timestamp) {
+                    let segment_results =
+                        self.query_segment_two_level(segment, start_timestamp, end_timestamp)?;
+                    results.extend(segment_results);
+                }
+            }
+        }
+
+        results.sort_by_key(|e| e.timestamp);
+
+        Ok(results)
+    }
+
+    /// User-friendly API: Query and return RDF events with URI strings
+    pub fn query_rdf(
+        &self,
+        start_timestamp: u64,
+        end_timestamp: u64,
+    ) -> std::io::Result<Vec<RDFEvent>> {
+        let encoded_events = self.query(start_timestamp, end_timestamp)?;
+        let dict = self.dictionary.read().unwrap();
+        Ok(encoded_events.into_iter().map(|event| event.decode(&dict)).collect())
+    }
+
+    fn query_segment_two_level(
+        &self,
+        segment: &EnhancedSegmentMetadata,
+        start_timestamp: u64,
+        end_timestamp: u64,
+    ) -> std::io::Result<Vec<Event>> {
+        // If we have index directory, use two-level indexing
+        if !segment.index_directory.is_empty() {
+            // Step 1 : Find relevant index blocks using in-memory directory
+            let relevant_blocks: Vec<&IndexBlock> = segment
+                .index_directory
+                .iter()
+                .filter(|block| {
+                    block.min_timestamp <= end_timestamp && block.max_timestamp >= start_timestamp
+                })
+                .collect();
+
+            if relevant_blocks.is_empty() {
+                return Ok(Vec::new());
+            }
+
+            // Step 2 : Load only the relevant blocks from the disk
+            let sparse_entries =
+                self.load_relevant_index_blocks(&segment.index_path, &relevant_blocks)?;
+
+            if sparse_entries.is_empty() {
+                return Ok(Vec::new());
+            }
+
+            // Step 3 : Binary search the loaded entries
+            let lb = sparse_entries.partition_point(|(ts, _)| *ts < start_timestamp);
+            let start_position = lb.saturating_sub(1);
+            let start_offset = sparse_entries[start_position].1;
+
+            // Step 4 : Sequential Scan from the checkpoint
+            self.scan_data_from_offset(
+                &segment.data_path,
+                start_offset,
+                start_timestamp,
+                end_timestamp,
+            )
+        } else {
+            // Fallback: Full scan of the data file (for segments without loaded index)
+            self.scan_data_from_offset(&segment.data_path, 0, start_timestamp, end_timestamp)
+        }
+    }
+
+    fn load_relevant_index_blocks(
+        &self,
+        index_path: &str,
+        blocks: &[&IndexBlock],
+    ) -> std::io::Result<Vec<(u64, u64)>> {
+        let mut index_file = std::fs::File::open(index_path)?;
+        let mut sparse_entries = Vec::new();
+
+        for block in blocks {
+            index_file.seek(SeekFrom::Start(block.file_offset))?;
+
+            let block_size = block.entry_count as usize * 16; // 16 bytes per entry.
+            let mut buffer = vec![0u8; block_size];
+            index_file.read_exact(&mut buffer)?;
+
+            // Parse the entries.
+
+            for chunk in buffer.chunks_exact(16) {
+                let timestamp = u64::from_le_bytes(chunk[0..8].try_into().unwrap());
+                let offset = u64::from_be_bytes(chunk[8..16].try_into().unwrap());
+                sparse_entries.push((timestamp, offset));
+            }
+        }
+
+        sparse_entries.sort_by_key(|&(ts, _)| ts);
+        Ok(sparse_entries)
+    }
+
+    fn scan_data_from_offset(
+        &self,
+        data_path: &str,
+        start_offset: u64,
+        start_timestamp: u64,
+        end_timestamp: u64,
+    ) -> std::io::Result<Vec<Event>> {
+        let mut file = std::fs::File::open(data_path)?;
+        file.seek(SeekFrom::Start(start_offset))?;
+
+        let mut results = Vec::new();
+        let mut record = [0u8; RECORD_SIZE];
+
+        while file.read_exact(&mut record).is_ok() {
+            let (timestamp, subject, predicate, object, graph) = decode_record(&record);
+
+            if timestamp > end_timestamp {
+                break;
+            }
+
+            if timestamp >= start_timestamp {
+                results.push(Event { timestamp, subject, predicate, object, graph });
+            }
+        }
+        Ok(results)
+    }
+
+    fn segment_overlaps(
+        &self,
+        segment: &EnhancedSegmentMetadata,
+        start_ts: u64,
+        end_ts: u64,
+    ) -> bool {
+        segment.start_timstamp <= end_ts && segment.end_timestamp >= start_ts
+    }
+
+    fn background_flush_loop(
+        batch_buffer: Arc<RwLock<BatchBuffer>>,
+        segments: Arc<RwLock<Vec<EnhancedSegmentMetadata>>>,
+        shutdown_signal: Arc<Mutex<bool>>,
+        config: StreamingConfig,
+    ) {
+        while !*shutdown_signal.lock().unwrap() {
+            std::thread::sleep(Duration::from_millis(100));
+
+            // Check if flush is needed or not.
+
+            let should_flush = {
+                let batch_buffer = batch_buffer.read().unwrap();
+
+                batch_buffer.events.len() >= config.max_batch_events.try_into().unwrap()
+                    || batch_buffer.total_bytes >= config.max_batch_bytes
+                    || batch_buffer.oldest_timestamp_bound.map_or(false, |oldest| {
+                        let current_timestamp = Self::current_timestamp();
+                        current_timestamp.saturating_sub(oldest)
+                            >= config.max_batch_age_seconds * 1_000
+                    })
+            };
+
+            if should_flush {
+                // TODO : Add better error handling here in this case
+                if let Err(e) =
+                    Self::flush_background(batch_buffer.clone(), segments.clone(), &config)
+                {
+                    eprintln!("Background flush failed: {}", e);
+                }
+            }
+        }
+    }
+
+    fn flush_background(
+        batch_buffer: Arc<RwLock<BatchBuffer>>,
+        segments: Arc<RwLock<Vec<EnhancedSegmentMetadata>>>,
+        config: &StreamingConfig,
+    ) -> std::io::Result<()> {
+        // Automatically extract events from the batch buffer.
+
+        let events_to_flush = {
+            let mut batch_buffer = batch_buffer.write().unwrap();
+            if batch_buffer.events.is_empty() {
+                return Ok(());
+            }
+
+            let events: Vec<Event> = batch_buffer.events.drain(..).collect();
+
+            batch_buffer.total_bytes = 0;
+            batch_buffer.oldest_timestamp_bound = None;
+            batch_buffer.newest_timestamp_bound = None;
+            events
+        };
+
+        // Create a new segment for these events
+        let segment_id = Self::current_timestamp();
+        let data_path = format!("{}/segment-{}.log", config.segment_base_path, segment_id);
+        let index_path = format!("{}/segment-{}.idx", config.segment_base_path, segment_id);
+
+        // Use buffered writers for performance (same as original implementation)
+        let mut data_file = BufWriter::new(std::fs::File::create(&data_path)?);
+        let mut index_file = BufWriter::new(std::fs::File::create(&index_path)?);
+
+        let mut index_directory = Vec::new();
+        let mut current_block_entries = Vec::new();
+        let mut current_block_min_ts = None;
+        let mut current_block_max_ts = 0u64;
+        let mut data_offset = 0u64;
+
+        for (record_count, event) in events_to_flush.iter().enumerate() {
+            // Use the same serialization as the original
+            let record_bytes = Self::serialize_event_to_fixed_size_static(event);
+            data_file.write_all(&record_bytes)?;
+
+            if record_count % config.sparse_interval == 0 {
+                let sparse_entry = (event.timestamp, data_offset);
+
+                if current_block_min_ts.is_none() {
+                    current_block_min_ts = Some(event.timestamp);
+                }
+
+                current_block_max_ts = event.timestamp;
+                current_block_entries.push(sparse_entry);
+
+                if current_block_entries.len() >= config.entries_per_index_block {
+                    let block_metadata = Self::flush_index_block_static(
+                        &mut index_file,
+                        &current_block_entries,
+                        current_block_min_ts.unwrap(),
+                        current_block_max_ts,
+                    )?;
+
+                    index_directory.push(block_metadata);
+
+                    current_block_entries.clear();
+                    current_block_min_ts = None;
+                }
+            }
+            data_offset += record_bytes.len() as u64;
+        }
+
+        if !current_block_entries.is_empty() {
+            let block_metadata = Self::flush_index_block_static(
+                &mut index_file,
+                &current_block_entries,
+                current_block_min_ts.unwrap(),
+                current_block_max_ts,
+            )?;
+
+            index_directory.push(block_metadata);
+        }
+
+        data_file.flush()?;
+        index_file.flush()?;
+
+        // Add the new segment to the segments list
+        let new_segment = EnhancedSegmentMetadata {
+            start_timstamp: events_to_flush.first().unwrap().timestamp,
+            end_timestamp: events_to_flush.last().unwrap().timestamp,
+            data_path,
+            index_path,
+            record_count: events_to_flush.len() as u64,
+            index_directory,
+        };
+
+        {
+            let mut segments = segments.write().unwrap();
+            segments.push(new_segment);
+            // Keep segments sorted by start timestamp
+            segments.sort_by_key(|s| s.start_timstamp);
+        }
+
+        Ok(())
+    }
+
+    fn load_existing_segments(&self) -> std::io::Result<()> {
+        use std::fs;
+
+        let segment_dir = &self.config.segment_base_path;
+        if !fs::metadata(segment_dir).is_ok() {
+            return Ok(());
+        }
+
+        let entries = fs::read_dir(segment_dir)?;
+        let mut segments = Vec::new();
+
+        for entry in entries {
+            let entry = entry?;
+            let path = entry.path();
+
+            if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
+                if filename.starts_with("segment-") && filename.ends_with(".log") {
+                    // Extract segment ID from filename
+                    if let Some(id_str) =
+                        filename.strip_prefix("segment-").and_then(|s| s.strip_suffix(".log"))
+                    {
+                        if let Ok(segment_id) = id_str.parse::<u64>() {
+                            // Try to load the segment metadata by reading the data file
+                            let data_path = format!("{}/segment-{}.log", segment_dir, segment_id);
+                            let index_path = format!("{}/segment-{}.idx", segment_dir, segment_id);
+
+                            if let Ok(_metadata) = fs::metadata(&data_path) {
+                                // Load index directory if index file exists
+                                let (index_directory, start_ts, end_ts, record_count) =
+                                    if fs::metadata(&index_path).is_ok() {
+                                        Self::load_index_directory_from_file(&index_path)
+                                            .unwrap_or_else(|_| (Vec::new(), 0, u64::MAX, 0))
+                                    } else {
+                                        (Vec::new(), 0, u64::MAX, 0)
+                                    };
+
+                                let segment = EnhancedSegmentMetadata {
+                                    start_timstamp: start_ts,
+                                    end_timestamp: end_ts,
+                                    data_path,
+                                    index_path,
+                                    record_count,
+                                    index_directory,
+                                };
+                                segments.push(segment);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Sort segments by start timestamp
+        segments.sort_by_key(|s| s.start_timstamp);
+
+        {
+            let mut self_segments = self.segments.write().unwrap();
+            *self_segments = segments;
+        }
+
+        Ok(())
+    }
+
+    fn load_index_directory_from_file(
+        index_path: &str,
+    ) -> std::io::Result<(Vec<IndexBlock>, u64, u64, u64)> {
+        use std::io::Read;
+
+        let mut file = std::fs::File::open(index_path)?;
+        let mut buffer = Vec::new();
+        file.read_to_end(&mut buffer)?;
+
+        // Index file format: each block is stored as consecutive (timestamp, offset) pairs (16 bytes each)
+        // We need to reconstruct the IndexBlock directory structure
+
+        if buffer.is_empty() {
+            return Ok((Vec::new(), 0, u64::MAX, 0));
+        }
+
+        let mut index_directory = Vec::new();
+        let mut file_offset = 0u64;
+        let mut global_min_ts = u64::MAX;
+        let mut global_max_ts = 0u64;
+        let mut total_records = 0u64;
+
+        // Read all entries to reconstruct blocks
+        // Note: This is a simplified reconstruction - in practice you'd want to store block boundaries
+        let entries_per_block = 1000; // From config.entries_per_index_block
+        let mut current_block_start = 0;
+
+        while current_block_start < buffer.len() {
+            let block_size =
+                std::cmp::min(entries_per_block * 16, buffer.len() - current_block_start);
+            let block_end = current_block_start + block_size;
+            let block_entries = block_end - current_block_start;
+            let entry_count = (block_entries / 16) as u32;
+
+            if entry_count == 0 {
+                break;
+            }
+
+            //Read first and last timestamp of this block
+            let first_ts = u64::from_le_bytes(
+                buffer[current_block_start..current_block_start + 8].try_into().unwrap(),
+            );
+            let last_entry_start = current_block_start + ((entry_count - 1) as usize * 16);
+            let last_ts = u64::from_le_bytes(
+                buffer[last_entry_start..last_entry_start + 8].try_into().unwrap(),
+            );
+
+            global_min_ts = global_min_ts.min(first_ts);
+            global_max_ts = global_max_ts.max(last_ts);
+            total_records += entry_count as u64;
+
+            index_directory.push(IndexBlock {
+                min_timestamp: first_ts,
+                max_timestamp: last_ts,
+                file_offset,
+                entry_count,
+            });
+
+            file_offset += block_size as u64;
+            current_block_start = block_end;
+        }
+
+        Ok((index_directory, global_min_ts, global_max_ts, total_records))
+    }
+
+    pub fn shutdown(&mut self) -> std::io::Result<()> {
+        *self.shutdown_signal.lock().unwrap() = true;
+
+        // Final Flush
+
+        self.flush_batch_buffer_to_segment()?;
+
+        if let Some(handle) = self.flush_handle.take() {
+            handle.join().unwrap();
+        }
+        Ok(())
+    }
+
+    fn serialize_event_to_fixed_size(&self, event: &Event) -> Vec<u8> {
+        Self::serialize_event_to_fixed_size_static(event)
+    }
+
+    fn serialize_event_to_fixed_size_static(event: &Event) -> Vec<u8> {
+        let mut record = [0u8; RECORD_SIZE];
+        encode_record(
+            &mut record,
+            event.timestamp,
+            event.subject,
+            event.predicate,
+            event.object,
+            event.graph,
+        );
+        record.to_vec()
+    }
+
+    fn flush_index_block_static(
+        index_file: &mut BufWriter<std::fs::File>,
+        entries: &[(u64, u64)],
+        min_ts: u64,
+        max_ts: u64,
+    ) -> std::io::Result<IndexBlock> {
+        let file_offset = index_file.stream_position()?;
+
+        for (timestamp, offset) in entries {
+            index_file.write_all(&timestamp.to_le_bytes())?;
+            index_file.write_all(&offset.to_be_bytes())?;
+        }
+
+        Ok(IndexBlock {
+            min_timestamp: min_ts,
+            max_timestamp: max_ts,
+            file_offset,
+            entry_count: entries.len() as u32,
+        })
+    }
+
+    fn generate_segment_id() -> u64 {
+        SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_millis() as u64
+    }
+}
diff --git a/src/storage/util.rs b/src/storage/util.rs
new file mode 100644
index 0000000..e6f7a2d
--- /dev/null
+++ b/src/storage/util.rs
@@ -0,0 +1,68 @@
+use std::collections::{HashMap, VecDeque};
+use std::sync::{Arc, Mutex, RwLock};
+use std::thread::JoinHandle;
+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
+
+use crate::core::Event;
+
+#[derive(Debug)]
+/// Storage component memory usage breakdown
+pub struct StorageComponentSizes {
+    pub batch_buffer_bytes: usize,
+    pub segments_count: usize,
+    pub dictionary_bytes: usize,
+    pub estimated_total_bytes: usize,
+}
+
+#[derive(Debug)]
+/// In-memory buffer that batches events before persisting them to disk
+pub struct BatchBuffer {
+    pub events: VecDeque<Event>,
+    pub total_bytes: usize,
+    pub oldest_timestamp_bound: Option<u64>,
+    pub newest_timestamp_bound: Option<u64>,
+}
+
+#[derive(Debug, Clone)]
+pub struct IndexBlock {
+    pub min_timestamp: u64,
+    pub max_timestamp: u64,
+    pub file_offset: u64,
+    pub entry_count: u32,
+}
+
+#[derive(Debug, Clone)]
+pub struct EnhancedSegmentMetadata {
+    pub start_timstamp: u64,
+    pub end_timestamp: u64,
+    pub data_path: String,
+    pub index_path: String,
+    pub record_count: u64,
+    pub index_directory: Vec<IndexBlock>,
+}
+
+#[derive(Clone)]
+pub struct StreamingConfig {
+    /// Maximum number of events to buffer before flushing to disk
+    pub max_batch_events: u64,
+    /// Maximum age in seconds before flushing buffered events to disk
+    pub max_batch_age_seconds: u64,
+    /// Maximum bytes to buffer before flushing to disk
+    pub max_batch_bytes: usize,
+    pub sparse_interval: usize,
+    pub entries_per_index_block: usize,
+    pub segment_base_path: String,
+}
+
+impl Default for StreamingConfig {
+    fn default() -> Self {
+        Self {
+            max_batch_bytes: 10 * 1024 * 1024,
+            max_batch_age_seconds: 60,
+            max_batch_events: 100_000,
+            sparse_interval: 1000,
+            entries_per_index_block: 1024,
+            segment_base_path: "./data".to_string(),
+        }
+    }
+}
diff --git a/tests/dictionary_encoding_test.rs b/tests/dictionary_encoding_test.rs
new file mode 100644
index 0000000..269a9cf
--- /dev/null
+++ b/tests/dictionary_encoding_test.rs
@@ -0,0 +1,623 @@
+//! Dictionary Encoding Integration Tests
+//!
+//! These tests verify the dictionary-based encoding system for RDF terms.
+//!
+//! **Important**: The dictionary stores the actual URI/literal strings WITHOUT RDF syntax:
+//! - URIs: stored as "https://example.org/resource" (not "<https://example.org/resource>")
+//! - Literals: stored as the value string (e.g., "23.5" or "2025-11-05T10:30:00Z")
+//! - Datatypes: stored separately as URIs (e.g., "http://www.w3.org/2001/XMLSchema#double")
+//!
+//! The RDF syntax (angle brackets, quotes, ^^datatype) is handled by the RDF parser/serializer,
+//! not by the dictionary encoding layer. This keeps the dictionary implementation clean and
+//! format-agnostic.
+//!
+//! Example RDF triple in Turtle syntax:
+//! ```turtle
+//! <https://rsp.js/event1> <http://www.w3.org/ns/saref#hasValue> "23.5"^^<http://www.w3.org/2001/XMLSchema#double> .
+//! ```
+//!
+//! Is stored in the dictionary as 4 separate entries:
+//! - Subject ID → "https://rsp.js/event1"
+//! - Predicate ID → "http://www.w3.org/ns/saref#hasValue"
+//! - Object ID → "23.5" (the literal value)
+//! - Datatype ID → "http://www.w3.org/2001/XMLSchema#double" (if needed)
+
+use janus::core::encoding::{decode_record, encode_record, RECORD_SIZE};
+use janus::indexing::shared::LogWriter;
+use janus::storage::indexing::dictionary::Dictionary;
+use janus::storage::indexing::sparse::{build_sparse_index, SparseReader};
+use std::fs;
+use std::path::Path;
+
+#[test]
+fn test_rdf_syntax_to_dictionary_mapping() {
+    let mut dict = Dictionary::new();
+
+    // RDF Triple in Turtle syntax:
+    // <https://rsp.js/event1> <http://www.w3.org/ns/saref#hasValue> "23.5"^^<http://www.w3.org/2001/XMLSchema#double> <https://example.org/graph> .
+    //
+    // The parser would extract these components and store them WITHOUT RDF syntax:
+
+    // Subject: <https://rsp.js/event1> → stored as the URI string
+    let subject = "https://rsp.js/event1";
+    let subject_id = dict.encode(subject);
+
+    // Predicate: <http://www.w3.org/ns/saref#hasValue> → stored as the URI string
+    let predicate = "http://www.w3.org/ns/saref#hasValue";
+    let predicate_id = dict.encode(predicate);
+
+    // Object: "23.5"^^xsd:double → stored as the literal value "23.5"
+    let object = "23.5";
+    let object_id = dict.encode(object);
+
+    // Datatype: ^^<http://www.w3.org/2001/XMLSchema#double> → stored as URI string
+    let datatype = "http://www.w3.org/2001/XMLSchema#double";
+    let datatype_id = dict.encode(datatype);
+
+    // Graph: <https://example.org/graph> → stored as the URI string
+    let graph = "https://example.org/graph";
+    let graph_id = dict.encode(graph);
+
+    // Verify all components are stored correctly
+    assert_eq!(dict.decode(subject_id), Some(subject));
+    assert_eq!(dict.decode(predicate_id), Some(predicate));
+    assert_eq!(dict.decode(object_id), Some(object));
+    assert_eq!(dict.decode(datatype_id), Some(datatype));
+    assert_eq!(dict.decode(graph_id), Some(graph));
+
+    // In a real system, you'd also store metadata about which IDs are literals vs URIs
+    // and what datatype each literal has. This test just demonstrates the string storage.
+}
+
+#[test]
+fn test_rdf_literal_datatypes() {
+    let mut dict = Dictionary::new();
+
+    // Example RDF triples with different literal types:
+    //
+    // Triple 1: <event1> <hasTimestamp> "2025-11-05T10:30:00Z"^^xsd:dateTime
+    let timestamp_value = "2025-11-05T10:30:00Z";
+    let timestamp_datatype = "http://www.w3.org/2001/XMLSchema#dateTime";
+
+    // Triple 2: <event1> <hasTemperature> "23.5"^^xsd:double
+    let temp_value = "23.5";
+    let temp_datatype = "http://www.w3.org/2001/XMLSchema#double";
+
+    // Triple 3: <event1> <hasCount> "42"^^xsd:integer
+    let count_value = "42";
+    let count_datatype = "http://www.w3.org/2001/XMLSchema#integer";
+
+    // Triple 4: <event1> <hasLabel> "Sensor Reading"^^xsd:string
+    let label_value = "Sensor Reading";
+    let label_datatype = "http://www.w3.org/2001/XMLSchema#string";
+
+    // Store all values and datatypes in dictionary
+    let timestamp_val_id = dict.encode(timestamp_value);
+    let timestamp_dt_id = dict.encode(timestamp_datatype);
+
+    let temp_val_id = dict.encode(temp_value);
+    let temp_dt_id = dict.encode(temp_datatype);
+
+    let count_val_id = dict.encode(count_value);
+    let count_dt_id = dict.encode(count_datatype);
+
+    let label_val_id = dict.encode(label_value);
+    let label_dt_id = dict.encode(label_datatype);
+
+    // Verify all are stored correctly
+    assert_eq!(dict.decode(timestamp_val_id), Some(timestamp_value));
+    assert_eq!(dict.decode(timestamp_dt_id), Some(timestamp_datatype));
+
+    assert_eq!(dict.decode(temp_val_id), Some(temp_value));
+    assert_eq!(dict.decode(temp_dt_id), Some(temp_datatype));
+
+    assert_eq!(dict.decode(count_val_id), Some(count_value));
+    assert_eq!(dict.decode(count_dt_id), Some(count_datatype));
+
+    assert_eq!(dict.decode(label_val_id), Some(label_value));
+    assert_eq!(dict.decode(label_dt_id), Some(label_datatype));
+
+    // Note: Datatype URIs are reused across multiple literals
+    // E.g., many literals will have ^^xsd:double as their datatype
+    assert_eq!(temp_dt_id, dict.encode(temp_datatype)); // Same ID when requested again
+}
+
+#[test]
+fn test_dictionary_basic_operations() {
+    let mut dict = Dictionary::new();
+
+    // Test get_or_insert with real RDF URIs
+    let uri1 = "https://rsp.js/event1";
+    let uri2 = "http://www.w3.org/ns/saref#hasTimestamp";
+    let uri3 = "http://example.org/sensor/temperature";
+    let uri4 = "http://www.w3.org/ns/ssn#observedBy";
+
+    // First insertion should return ID 0
+    let id1 = dict.encode(uri1);
+    assert_eq!(id1, 0);
+
+    // Subsequent insertions should return sequential IDs
+    let id2 = dict.encode(uri2);
+    assert_eq!(id2, 1);
+
+    let id3 = dict.encode(uri3);
+    assert_eq!(id3, 2);
+
+    let id4 = dict.encode(uri4);
+    assert_eq!(id4, 3);
+
+    // Requesting same URI should return same ID
+    let id1_again = dict.encode(uri1);
+    assert_eq!(id1_again, id1);
+
+    // Test retrieval
+    assert_eq!(dict.decode(id1), Some(uri1));
+    assert_eq!(dict.decode(id2), Some(uri2));
+    assert_eq!(dict.decode(id3), Some(uri3));
+    assert_eq!(dict.decode(id4), Some(uri4));
+
+    // Test invalid ID
+    assert_eq!(dict.decode(999), None);
+
+    // Test length
+    assert_eq!(dict.id_to_uri.len(), 4);
+    assert!(!dict.id_to_uri.is_empty());
+}
+
+#[test]
+fn test_dictionary_persistence() -> std::io::Result<()> {
+    let test_dir = "target/test_data/dict_persistence";
+    let _ = fs::remove_dir_all(test_dir);
+    fs::create_dir_all(test_dir)?;
+
+    let dict_path = Path::new(test_dir).join("test_dict.bin");
+
+    // Create and populate dictionary
+    let mut dict = Dictionary::new();
+    let uris = [
+        "https://example.org/resource/event001",
+        "http://www.w3.org/ns/saref#hasValue",
+        "http://www.w3.org/2001/XMLSchema#dateTime",
+        "https://solid.ti.rw.fau.de/public/ns/stream#",
+    ];
+
+    let ids: Vec<u32> = uris.iter().map(|uri| dict.encode(uri)).collect();
+
+    // Save to file
+    dict.save_to_file(&dict_path)?;
+
+    // Load from file
+    let loaded_dict = Dictionary::load_from_file(&dict_path)?;
+
+    // Verify all URIs are preserved with correct IDs
+    for (i, uri) in uris.iter().enumerate() {
+        assert_eq!(loaded_dict.decode(ids[i]), Some(*uri));
+    }
+
+    assert_eq!(loaded_dict.id_to_uri.len(), uris.len());
+
+    Ok(())
+}
+
+#[test]
+fn test_rdf_event_encoding_with_dictionary() {
+    let mut dict = Dictionary::new();
+
+    // RDF Quad in N-Quads syntax would look like:
+    // <https://rsp.js/event/sensor-reading-001> <http://www.w3.org/ns/saref#hasTimestamp> "2025-11-05T10:30:00Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> <https://solid.ti.rw.fau.de/public/ns/stream#default> .
+    //
+    // But we store the actual string values WITHOUT syntax markers:
+
+    let subject_uri = "https://rsp.js/event/sensor-reading-001";
+    let predicate_uri = "http://www.w3.org/ns/saref#hasTimestamp";
+    let object_uri = "2025-11-05T10:30:00Z"; // The literal value itself
+    let graph_uri = "https://solid.ti.rw.fau.de/public/ns/stream#default";
+
+    // Map URIs to IDs
+    let timestamp: u64 = 1699181400;
+    let subject_id = dict.encode(subject_uri);
+    let predicate_id = dict.encode(predicate_uri);
+    let object_id = dict.encode(object_uri);
+    let graph_id = dict.encode(graph_uri);
+
+    // Encode record with IDs
+    let mut buffer = [0u8; RECORD_SIZE];
+    encode_record(&mut buffer, timestamp, subject_id, predicate_id, object_id, graph_id);
+
+    // Decode record
+    let (dec_timestamp, dec_subject, dec_predicate, dec_object, dec_graph) = decode_record(&buffer);
+
+    // Verify IDs are correctly encoded/decoded
+    assert_eq!(dec_timestamp, timestamp);
+    assert_eq!(dec_subject, subject_id);
+    assert_eq!(dec_predicate, predicate_id);
+    assert_eq!(dec_object, object_id);
+    assert_eq!(dec_graph, graph_id);
+
+    // Resolve IDs back to URIs
+    assert_eq!(dict.decode(dec_subject), Some(subject_uri));
+    assert_eq!(dict.decode(dec_predicate), Some(predicate_uri));
+    assert_eq!(dict.decode(dec_object), Some(object_uri));
+    assert_eq!(dict.decode(dec_graph), Some(graph_uri));
+}
+
+#[test]
+fn test_iot_sensor_events_with_dictionary() -> std::io::Result<()> {
+    let test_dir = "target/test_data/iot_sensor";
+    let _ = fs::remove_dir_all(test_dir);
+    fs::create_dir_all(test_dir)?;
+
+    let log_path = format!("{}/iot_sensor.log", test_dir);
+    let mut dict = Dictionary::new();
+
+    // Define common IoT RDF predicates and graph URIs
+    let predicates = [
+        "http://www.w3.org/ns/saref#hasTimestamp",
+        "http://www.w3.org/ns/saref#hasValue",
+        "http://www.w3.org/ns/ssn#observedBy",
+        "http://www.w3.org/ns/sosa#observedProperty",
+    ];
+
+    // Map predicates to IDs first (these will be reused)
+    let predicate_ids: Vec<u32> = predicates.iter().map(|p| dict.encode(p)).collect();
+
+    let graph_uri = "https://solid.ti.rw.fau.de/public/ns/stream#iot";
+    let graph_id = dict.encode(graph_uri);
+
+    // Create log writer
+    let mut writer = LogWriter::create(&log_path)?;
+
+    // Generate 100 IoT sensor events with unique event IDs but shared predicates
+    for i in 0..100 {
+        let timestamp = 1699181400 + i;
+
+        // Each event has unique subject (sensor reading ID)
+        let subject_uri = format!("https://rsp.js/event/sensor-reading-{:03}", i);
+        let subject_id = dict.encode(&subject_uri);
+
+        // Rotate through predicates (demonstrating reuse)
+        let predicate_id = predicate_ids[(i % predicate_ids.len() as u64) as usize];
+
+        // Unique object (sensor value)
+        let object_uri = format!("value-{}", i * 10);
+        let object_id = dict.encode(&object_uri);
+
+        writer.append_record(timestamp, subject_id, predicate_id, object_id, graph_id)?;
+    }
+
+    writer.flush()?;
+
+    // Verify dictionary statistics
+    // We should have:
+    // - 100 unique subjects
+    // - 4 predicates (reused)
+    // - 100 unique objects
+    // - 1 graph URI
+    // Total: 205 unique URIs
+    assert_eq!(dict.id_to_uri.len(), 205);
+
+    // Verify predicate reuse - predicates should have low IDs (0-3)
+    for (i, pred) in predicates.iter().enumerate() {
+        assert_eq!(dict.encode(pred), i as u32);
+    }
+
+    Ok(())
+}
+
+#[test]
+fn test_sparse_index_with_dictionary_integration() -> std::io::Result<()> {
+    let test_dir = "target/test_data/sparse_integration";
+    let _ = fs::remove_dir_all(test_dir);
+    fs::create_dir_all(test_dir)?;
+
+    let log_path = format!("{}/indexed_sensor.log", test_dir);
+    let index_path = format!("{}/indexed_sensor.idx", test_dir);
+    let dict_path = format!("{}/indexed_sensor_dict.bin", test_dir);
+
+    let mut dict = Dictionary::new();
+
+    // Define RDF components
+    let predicates =
+        ["http://www.w3.org/ns/saref#hasTimestamp", "http://www.w3.org/ns/saref#hasValue"];
+
+    let predicate_ids: Vec<u32> = predicates.iter().map(|p| dict.encode(p)).collect();
+
+    let graph_uri = "https://example.org/graph/sensors";
+    let graph_id = dict.encode(graph_uri);
+
+    // Create log with 1000 events
+    let mut writer = LogWriter::create(&log_path)?;
+
+    for i in 0..1000 {
+        let timestamp = i;
+        let subject_uri = format!("https://rsp.js/event/{:04}", i);
+        let subject_id = dict.encode(&subject_uri);
+        let predicate_id = predicate_ids[(i % 2) as usize];
+        let object_uri = format!("reading-{}", i);
+        let object_id = dict.encode(&object_uri);
+
+        writer.append_record(timestamp, subject_id, predicate_id, object_id, graph_id)?;
+    }
+
+    writer.flush()?;
+
+    // Save dictionary BEFORE building index
+    dict.save_to_file(Path::new(&dict_path))?;
+
+    // Build sparse index (without dictionary parameter since we saved it separately)
+    build_sparse_index(&log_path, &index_path, &100)?;
+
+    // Load dictionary and reader
+    let (reader, loaded_dict) = SparseReader::open_with_dictionary(&index_path, &dict_path, 100)?;
+
+    // Query a range and verify results
+    let results = reader.query_resolved(&log_path, &loaded_dict, 100, 199)?;
+
+    // Should get 100 events (timestamps 100-199)
+    assert_eq!(results.len(), 100);
+
+    // Verify first result has resolved URIs
+    assert!(results[0].subject.starts_with("https://rsp.js/event/"));
+    assert!(results[0].predicate.starts_with("http://www.w3.org/ns/saref#"));
+    assert!(results[0].object.starts_with("reading-"));
+    assert_eq!(results[0].graph, graph_uri);
+
+    // Verify timestamps are in order
+    for (i, event) in results.iter().enumerate() {
+        assert_eq!(event.timestamp, 100 + i as u64);
+    }
+
+    Ok(())
+}
+
+#[test]
+fn test_large_uri_handling() {
+    let mut dict = Dictionary::new();
+
+    // Test with very long URIs (realistic for RDF)
+    let long_uri = format!(
+        "https://solid.ti.rw.fau.de/public/2025/11/05/sensors/building-3/floor-2/room-205/temperature-sensor-{}/reading-{}",
+        "TMP-4532-XYZ-9871-ABC-DEF",
+        "measurement-with-very-long-identifier-12345678901234567890"
+    );
+
+    let id = dict.encode(&long_uri);
+    assert_eq!(id, 0);
+
+    // Verify retrieval works
+    assert_eq!(dict.decode(id), Some(long_uri.as_str()));
+
+    // Test that we can handle many long URIs
+    for i in 0..100 {
+        let uri = format!(
+            "https://example.org/very/long/path/to/resource/{}/subresource/{}/final-resource-{}",
+            i,
+            i * 2,
+            i * 3
+        );
+        dict.encode(&uri);
+    }
+
+    assert_eq!(dict.id_to_uri.len(), 101);
+}
+
+#[test]
+fn test_rdf_namespace_reuse() {
+    let mut dict = Dictionary::new();
+
+    // Common RDF namespace URIs that should be reused
+    let common_namespaces = [
+        "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+        "http://www.w3.org/2000/01/rdf-schema#",
+        "http://www.w3.org/2001/XMLSchema#",
+        "http://www.w3.org/ns/saref#",
+        "http://www.w3.org/ns/ssn#",
+        "http://www.w3.org/ns/sosa#",
+    ];
+
+    // Map each namespace
+    let namespace_ids: Vec<u32> = common_namespaces.iter().map(|ns| dict.encode(ns)).collect();
+
+    // Create 1000 events that all use these namespaces
+    for i in 0..1000 {
+        let event_uri = format!("https://rsp.js/event/{}", i);
+        dict.encode(&event_uri);
+
+        // Reference one of the common namespaces
+        let ns_id = namespace_ids[i % namespace_ids.len()];
+        assert!(dict.decode(ns_id).is_some());
+    }
+
+    // Dictionary should have: 6 namespaces + 1000 events = 1006 entries
+    assert_eq!(dict.id_to_uri.len(), 1006);
+
+    // Verify namespace IDs are unchanged (demonstrating reuse)
+    for (i, ns) in common_namespaces.iter().enumerate() {
+        assert_eq!(dict.encode(ns), namespace_ids[i]);
+    }
+}
+
+#[test]
+fn test_event_resolution_workflow() -> std::io::Result<()> {
+    let test_dir = "target/test_data/event_resolution";
+    let _ = fs::remove_dir_all(test_dir);
+    fs::create_dir_all(test_dir)?;
+
+    let log_path = format!("{}/resolution_test.log", test_dir);
+    let mut dict = Dictionary::new();
+
+    // Create realistic RDF event
+    let event_uris = vec![
+        (
+            1699181400u64,
+            "https://rsp.js/event/temp-reading-001",
+            "http://www.w3.org/ns/saref#hasValue",
+            "23.5",
+            "https://example.org/graph/sensors",
+        ),
+        (
+            1699181401u64,
+            "https://rsp.js/event/temp-reading-002",
+            "http://www.w3.org/ns/saref#hasValue",
+            "24.1",
+            "https://example.org/graph/sensors",
+        ),
+        (
+            1699181402u64,
+            "https://rsp.js/event/humidity-reading-001",
+            "http://www.w3.org/ns/saref#hasValue",
+            "65.0",
+            "https://example.org/graph/sensors",
+        ),
+    ];
+
+    // Write events with dictionary encoding
+    let mut writer = LogWriter::create(&log_path)?;
+
+    for (timestamp, subject, predicate, object, graph) in &event_uris {
+        let subject_id = dict.encode(subject);
+        let predicate_id = dict.encode(predicate);
+        let object_id = dict.encode(object);
+        let graph_id = dict.encode(graph);
+
+        writer.append_record(*timestamp, subject_id, predicate_id, object_id, graph_id)?;
+    }
+
+    writer.flush()?;
+
+    // Read back and resolve
+    let mut log_file = std::fs::File::open(&log_path)?;
+    use std::io::Read;
+
+    for (timestamp, subject, predicate, object, graph) in &event_uris {
+        let mut buffer = [0u8; RECORD_SIZE];
+        log_file.read_exact(&mut buffer)?;
+
+        let (dec_ts, dec_subj_id, dec_pred_id, dec_obj_id, dec_graph_id) = decode_record(&buffer);
+
+        // Verify timestamp
+        assert_eq!(dec_ts, *timestamp);
+
+        // Resolve IDs to URIs
+        assert_eq!(dict.decode(dec_subj_id), Some(*subject));
+        assert_eq!(dict.decode(dec_pred_id), Some(*predicate));
+        assert_eq!(dict.decode(dec_obj_id), Some(*object));
+        assert_eq!(dict.decode(dec_graph_id), Some(*graph));
+    }
+
+    Ok(())
+}
+
+#[test]
+fn test_dictionary_space_savings() {
+    let mut dict = Dictionary::new();
+
+    // Calculate space used by raw URIs
+    let uris = [
+        "https://solid.ti.rw.fau.de/public/ns/stream#event001",
+        "http://www.w3.org/ns/saref#hasTimestamp",
+        "2025-11-05T10:30:00Z",
+        "https://solid.ti.rw.fau.de/public/ns/stream#default",
+    ];
+
+    let raw_size: usize = uris.iter().map(|u| u.len()).sum();
+
+    // With dictionary, we store 8 bytes per ID
+    let ids: Vec<u32> = uris.iter().map(|u| dict.encode(u)).collect();
+    let encoded_size = ids.len() * 8; // 8 bytes per u64
+
+    println!("Raw URIs size: {} bytes", raw_size);
+    println!("Encoded IDs size: {} bytes", encoded_size);
+    println!("Space savings per record: {} bytes", raw_size - encoded_size);
+
+    // For 1000 records reusing same URIs:
+    let records = 1000;
+    let raw_total = raw_size * records;
+    let encoded_total = encoded_size * records + raw_size; // IDs + dictionary overhead
+
+    println!("\nFor {} records:", records);
+    println!("Raw storage: {} bytes", raw_total);
+    println!(
+        "Dictionary storage: {} bytes (IDs) + {} bytes (dictionary)",
+        encoded_size * records,
+        raw_size
+    );
+    println!("Total with dictionary: {} bytes", encoded_total);
+    println!(
+        "Space saved: {} bytes ({:.1}% reduction)",
+        raw_total - encoded_total,
+        (1.0 - encoded_total as f64 / raw_total as f64) * 100.0
+    );
+
+    // Verify space savings
+    assert!(encoded_total < raw_total);
+}
+
+#[test]
+fn test_complete_rdf_quad_with_datatype() {
+    let mut dict = Dictionary::new();
+
+    // Complete RDF quad in N-Quads syntax:
+    // <https://rsp.js/event/temp-sensor-001> <http://www.w3.org/ns/saref#hasValue> "23.5"^^<http://www.w3.org/2001/XMLSchema#double> <https://example.org/graph/sensors> .
+    //
+    // This quad has 5 components that get stored in the dictionary:
+
+    let components = vec![
+        ("subject", "https://rsp.js/event/temp-sensor-001"),
+        ("predicate", "http://www.w3.org/ns/saref#hasValue"),
+        ("object_value", "23.5"), // Just the literal value
+        ("object_datatype", "http://www.w3.org/2001/XMLSchema#double"), // Datatype as separate URI
+        ("graph", "https://example.org/graph/sensors"),
+    ];
+
+    // Store all components and get their IDs
+    let mut component_ids = std::collections::HashMap::new();
+    for (name, value) in &components {
+        let id = dict.encode(value);
+        component_ids.insert(*name, id);
+        println!("{}: '{}' → ID {}", name, value, id);
+    }
+
+    // In the actual record, we'd store:
+    // - timestamp (u64)
+    // - subject_id (u64)
+    // - predicate_id (u64)
+    // - object_value_id (u64)
+    // - graph_id (u64)
+    //
+    // The object_datatype_id would be stored in a separate metadata structure
+    // that tracks which object IDs are literals and what their datatypes are.
+
+    // Verify retrieval
+    assert_eq!(dict.decode(component_ids["subject"]), Some(components[0].1));
+    assert_eq!(dict.decode(component_ids["predicate"]), Some(components[1].1));
+    assert_eq!(dict.decode(component_ids["object_value"]), Some(components[2].1));
+    assert_eq!(dict.decode(component_ids["object_datatype"]), Some(components[3].1));
+    assert_eq!(dict.decode(component_ids["graph"]), Some(components[4].1));
+
+    // Another quad with the same datatype:
+    // <https://rsp.js/event/humidity-sensor-001> <http://www.w3.org/ns/saref#hasValue> "65.2"^^<http://www.w3.org/2001/XMLSchema#double> <https://example.org/graph/sensors> .
+
+    let subject2 = "https://rsp.js/event/humidity-sensor-001";
+    let value2 = "65.2";
+
+    let _subject2_id = dict.encode(subject2);
+    let _value2_id = dict.encode(value2);
+
+    // These components are REUSED (same ID returned):
+    let predicate2_id = dict.encode("http://www.w3.org/ns/saref#hasValue");
+    let datatype2_id = dict.encode("http://www.w3.org/2001/XMLSchema#double");
+    let graph2_id = dict.encode("https://example.org/graph/sensors");
+
+    // Verify reuse
+    assert_eq!(predicate2_id, component_ids["predicate"]);
+    assert_eq!(datatype2_id, component_ids["object_datatype"]);
+    assert_eq!(graph2_id, component_ids["graph"]);
+
+    // Dictionary has: 5 original components + 2 new (subject2, value2) = 7 total
+    assert_eq!(dict.id_to_uri.len(), 7);
+
+    println!("\n✓ Demonstrated RDF datatype handling with dictionary encoding");
+    println!("✓ Showed URI reuse across multiple quads (predicate, datatype, graph)");
+    println!("✓ Dictionary size: {} entries for 2 complete RDF quads", dict.id_to_uri.len());
+}
diff --git a/tests/integration_test.rs b/tests/integration_test.rs
index 44f3268..df2e932 100644
--- a/tests/integration_test.rs
+++ b/tests/integration_test.rs
@@ -8,7 +8,7 @@ use janus::{Error, Result};
 #[test]
 fn test_basic_functionality() {
     // TODO: Add integration tests
-    assert!(true);
+    // assert!(true); // Removed as it's always true
 }
 
 #[test]