Jakedismo
diff --git a/‎.codegraph.toml.example‎
Lines changed: 11 additions & 1 deletion b/‎.codegraph.toml.example‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎crates/codegraph-ai/src/llm_provider.rs‎
Lines changed: 11 additions & 5 deletions b/‎crates/codegraph-ai/src/llm_provider.rs‎
Lines changed: 11 additions & 5 deletions
@@ -86,9 +86,19 @@ context_window = 32000
 # Temperature for generation (0.0 = deterministic, 2.0 = very creative)
 temperature = 0.1
 
-# Maximum tokens to generate in responses
+# Maximum tokens to generate in responses (legacy parameter, use max_output_tokens for Responses API)
 max_tokens = 4096
 
+# Maximum output tokens for Responses API and reasoning models
+# If not set, falls back to max_tokens
+# max_output_tokens = 4096
+
+# Reasoning effort for reasoning models (o1, o3, o4-mini, GPT-5)
+# Options: "minimal", "low", "medium", "high"
+# Higher effort = more reasoning tokens = better quality but slower and more expensive
+# Only applies to reasoning models, ignored by standard models
+# reasoning_effort = "medium"
+
 # Request timeout in seconds
 timeout_secs = 120
 
 
@@ -25,15 +25,19 @@ pub struct ProviderCharacteristics {
 /// Configuration for generation parameters
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct GenerationConfig {
-    /// Temperature for sampling (0.0 to 2.0)
+    /// Temperature for sampling (0.0 to 2.0) - Not supported by reasoning models
     pub temperature: f32,
-    /// Maximum tokens to generate
+    /// Maximum tokens to generate (legacy parameter for Chat Completions API)
     pub max_tokens: Option<usize>,
-    /// Top-p nucleus sampling parameter
+    /// Maximum output tokens (for Responses API and reasoning models)
+    pub max_output_tokens: Option<usize>,
+    /// Reasoning effort for reasoning models: "minimal", "low", "medium", "high"
+    pub reasoning_effort: Option<String>,
+    /// Top-p nucleus sampling parameter - Not supported by reasoning models
     pub top_p: Option<f32>,
-    /// Frequency penalty (-2.0 to 2.0)
+    /// Frequency penalty (-2.0 to 2.0) - Not supported by reasoning models
     pub frequency_penalty: Option<f32>,
-    /// Presence penalty (-2.0 to 2.0)
+    /// Presence penalty (-2.0 to 2.0) - Not supported by reasoning models
     pub presence_penalty: Option<f32>,
     /// Stop sequences
     pub stop: Option<Vec<String>>,
@@ -44,6 +48,8 @@ impl Default for GenerationConfig {
         Self {
             temperature: 0.1,
             max_tokens: Some(4096),
+            max_output_tokens: None, // Will use max_tokens if not set
+            reasoning_effort: None,  // Only for reasoning models
             top_p: None,
             frequency_penalty: None,
             presence_penalty: None,