Jakedismo
diff --git a/‎crates/codegraph-ai/src/agentic_schemas.rs‎
Lines changed: 1 addition & 1 deletion b/‎crates/codegraph-ai/src/agentic_schemas.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎crates/codegraph-ai/src/llm_factory.rs‎
Lines changed: 36 additions & 1 deletion b/‎crates/codegraph-ai/src/llm_factory.rs‎
Lines changed: 36 additions & 1 deletion
diff --git a/‎crates/codegraph-ai/src/openai_compatible_provider.rs‎
Lines changed: 3 additions & 3 deletions b/‎crates/codegraph-ai/src/openai_compatible_provider.rs‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎crates/codegraph-ai/src/openai_llm_provider.rs‎
Lines changed: 1 addition & 1 deletion b/‎crates/codegraph-ai/src/openai_llm_provider.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎crates/codegraph-ai/src/optimization/models.rs‎
Lines changed: 4 additions & 48 deletions b/‎crates/codegraph-ai/src/optimization/models.rs‎
Lines changed: 4 additions & 48 deletions
@@ -236,7 +236,7 @@ impl AgenticOutput {
                 // layers and coupling_metrics are now flexible Value types
                 o.hub_nodes.iter().collect()
             }
-            Self::APISurface(o) => {
+            Self::APISurface(_) => {
                 // integration_points are now flexible Value types, can't extract directly
                 vec![]
             }
 
@@ -26,7 +26,8 @@ impl LLMProviderFactory {
         let provider_name = config.provider.to_lowercase();
 
         match provider_name.as_str() {
-            "ollama" | "qwen" => Self::create_qwen_provider(config),
+            "ollama" => Self::create_ollama_openai_provider(config),
+            "qwen" => Self::create_qwen_provider(config),
             "lmstudio" => Self::create_lmstudio_provider(config),
             #[cfg(feature = "anthropic")]
             "anthropic" => Self::create_anthropic_provider(config),
@@ -58,6 +59,38 @@ impl LLMProviderFactory {
         }
     }
 
+    /// Create an Ollama provider using the OpenAI-compatible endpoint
+    fn create_ollama_openai_provider(config: &LLMConfig) -> Result<Arc<dyn LLMProvider>> {
+        #[cfg(feature = "openai-compatible")]
+        {
+            let base_url = format!("{}/v1", config.ollama_url.trim_end_matches('/'));
+            let compat_config = OpenAICompatibleConfig {
+                base_url,
+                model: config
+                    .model
+                    .clone()
+                    .unwrap_or_else(|| "qwen2.5-coder:14b".to_string()),
+                context_window: config.context_window,
+                timeout_secs: config.timeout_secs,
+                max_retries: 3,
+                api_key: None,
+                provider_name: "ollama".to_string(),
+                use_responses_api: false,
+            };
+
+            Ok(Arc::new(OpenAICompatibleProvider::new(compat_config)?))
+        }
+
+        #[cfg(not(feature = "openai-compatible"))]
+        {
+            let _ = config;
+            Err(anyhow!(
+                "Ollama provider now relies on the 'openai-compatible' feature. \
+                 Rebuild with --features codegraph-ai/openai-compatible or use 'qwen' provider instead."
+            ))
+        }
+    }
+
     /// Create a Qwen provider (Ollama-based)
     fn create_qwen_provider(config: &LLMConfig) -> Result<Arc<dyn LLMProvider>> {
         let qwen_config = QwenConfig {
@@ -99,6 +132,7 @@ impl LLMProviderFactory {
 
         #[cfg(not(feature = "openai-compatible"))]
         {
+            let _ = config;
             Err(anyhow!(
                 "LM Studio provider requires 'openai-compatible' feature to be enabled. \
                  Please rebuild with --features openai-compatible or use 'ollama' provider instead."
@@ -222,6 +256,7 @@ impl LLMProviderFactory {
 
     /// Get a list of supported providers (based on enabled features)
     pub fn supported_providers() -> Vec<&'static str> {
+        #[allow(unused_mut)]
         let mut providers = vec!["ollama", "qwen"];
 
         #[cfg(feature = "openai-compatible")]
 
@@ -31,7 +31,7 @@ impl Default for OpenAICompatibleConfig {
         Self {
             base_url: "http://localhost:1234/v1".to_string(),
             model: "local-model".to_string(),
-            context_window: config.context_window,
+            context_window: 128_000,
             timeout_secs: 120,
             max_retries: 3,
             api_key: None,
@@ -47,7 +47,7 @@ impl OpenAICompatibleConfig {
         Self {
             base_url: "http://localhost:1234/v1".to_string(),
             model,
-            context_window: config.context_window,
+            context_window: 128_000,
             provider_name: "lmstudio".to_string(),
             use_responses_api: false, // LM Studio doesn't support Responses API
             ..Default::default()
@@ -59,7 +59,7 @@ impl OpenAICompatibleConfig {
         Self {
             base_url: "http://localhost:11434/v1".to_string(),
             model,
-            context_window: config.context_window,
+            context_window: 128_000,
             provider_name: "ollama".to_string(),
             use_responses_api: false, // Ollama doesn't support Responses API
             ..Default::default()
 
@@ -33,7 +33,7 @@ impl Default for OpenAIConfig {
             api_key: std::env::var("OPENAI_API_KEY").unwrap_or_default(),
             base_url: OPENAI_API_BASE.to_string(),
             model: DEFAULT_MODEL.to_string(),
-            context_window: config.context_window,
+            context_window: 128_000,
             timeout_secs: 120,
             max_retries: 3,
             organization: std::env::var("OPENAI_ORG_ID").ok(),
 
@@ -2,7 +2,7 @@ use std::sync::Arc;
 use std::time::{Duration, Instant};
 
 use anyhow::Result;
-use futures::FutureExt;
+
 use parking_lot::RwLock;
 use prometheus::{
     register_gauge, register_histogram, register_int_counter, Gauge, Histogram, IntCounter,
@@ -351,21 +351,7 @@ impl ModelOptimizer {
 
     // Quantization APIs – backend-specific implementations behind feature flags.
     pub fn quantize_fp16(&self) -> Result<()> {
-        #[cfg(feature = "tch")]
-        {
-            // With tch, a full-graph conversion requires model-specific access.
-            // For now, expose as not implemented until integrated with a model holder.
-            return Err(AiOptimizeError::NotImplemented("tch fp16 quantization").into());
-        }
-        #[cfg(feature = "onnx")]
-        {
-            // ONNX Runtime quantization should be handled offline or through tooling.
-            return Err(AiOptimizeError::NotImplemented("onnx fp16 quantization").into());
-        }
-        #[cfg(feature = "candle")]
-        {
-            return Err(AiOptimizeError::NotImplemented("candle fp16 quantization").into());
-        }
+
         #[allow(unreachable_code)]
         {
             warn!("fp16 quantization requested but no backend enabled");
@@ -374,18 +360,7 @@ impl ModelOptimizer {
     }
 
     pub fn quantize_int8(&self) -> Result<()> {
-        #[cfg(feature = "onnx")]
-        {
-            return Err(AiOptimizeError::NotImplemented("onnx int8 quantization").into());
-        }
-        #[cfg(feature = "tch")]
-        {
-            return Err(AiOptimizeError::NotImplemented("tch int8 quantization").into());
-        }
-        #[cfg(feature = "candle")]
-        {
-            return Err(AiOptimizeError::NotImplemented("candle int8 quantization").into());
-        }
+
         #[allow(unreachable_code)]
         {
             warn!("int8 quantization requested but no backend enabled");
@@ -434,26 +409,7 @@ impl ModelOptimizer {
 
     pub fn start_monitoring(&self) {
         // GPU utilization polling via NVML if available
-        #[cfg(feature = "nvml")]
-        {
-            use nvml_wrapper::Nvml;
-            let nvml = Nvml::init().ok();
-            let metrics = self.metrics.clone();
-            tokio::spawn(async move {
-                if let Some(nvml) = nvml {
-                    loop {
-                        if let Ok(device) = nvml.device_by_index(0) {
-                            if let Ok(util) = device.utilization_rates() {
-                                metrics.gpu_utilization.set(util.gpu as f64);
-                            }
-                        }
-                        sleep(Duration::from_millis(1000)).await;
-                    }
-                } else {
-                    warn!("NVML init failed; GPU utilization metrics disabled");
-                }
-            });
-        }
+
 
         // Alerting loop to check thresholds; emits logs (integrate with Alertmanager externally)
         let thresholds = self.thresholds.clone();
Original file line number	Diff line number	Diff line change
`@@ -236,7 +236,7 @@ impl AgenticOutput {`
`236`	`236`	`// layers and coupling_metrics are now flexible Value types`
`237`	`237`	`o.hub_nodes.iter().collect()`
`238`	`238`	`}`
`239`		`- Self::APISurface(o) => {`
	`239`	`+ Self::APISurface(_) => {`
`240`	`240`	`// integration_points are now flexible Value types, can't extract directly`
`241`	`241`	`vec![]`
`242`	`242`	`}`