🤖 feat: add GPT-5.1-Codex-Max model with xhigh reasoning level (#933)

ThomasK33 · ammar-agent · web-flow · commit d64ed5d6a327 · 2025-12-06T14:47:39.000-06:00
Add support for OpenAI's `gpt-5.1-codex-max` model with the new `xhigh`
(Extra High) thinking level.

## Changes
- Allow `xhigh` thinking level end-to-end (types, schemas, UI policy,
enforcement)
- Normalize model detection so `gpt-5.1-codex-max` works with/without
the `openai:` prefix or `codex-max` alias
- Update thinking slider visuals to clamp extra levels to the strongest
glow
- Refresh LiteLLM model data (`models.json`) to include codex-max
- Document codex-max/XHIGH support in `docs/models.md`
- Add tests covering bare/alias codex-max and policy enforcement
fallbacks

## Behavior
- `xhigh` only surfaces for codex-max; other models fall back to their
allowed max (e.g., medium or high)
- Command palette may show all levels, but backend/UI enforce policy via
`enforceThinkingPolicy`

## Validation
- `make typecheck`
- `bun test src/browser/utils/thinking/policy.test.ts`
- `bun run scripts/update_models.ts` (models.json refreshed)

---
_Generated with `mux`_

---------

Signed-off-by: Thomas Kosiewski &lt;tk@coder.com&gt;
Co-authored-by: Ammar &lt;ammar+ai@ammar.io&gt;
diff --git a/docs/models.md b/docs/models.md
@@ -47,6 +47,7 @@ GPT-5 family of models:
 - `openai:gpt-5.1`
 - `openai:gpt-5-pro`
 - `openai:gpt-5.1-codex`
+- `openai:gpt-5.1-codex-max` — supports the XHIGH (extra high) thinking level; aliases: `gpt-5.1-codex-max`, `codex-max`
 - `openai:gpt-5.1-codex-mini`
 
 #### Google (Cloud)
diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx
@@ -515,6 +515,7 @@ export const ChatInput: React.FC<ChatInputProps> = (props) => {
         low: "Low — adds light reasoning",
         medium: "Medium — balanced reasoning",
         high: "High — maximum reasoning depth",
+        xhigh: "Extra High — extended deep thinking",
       };
 
       setToast({
diff --git a/src/browser/components/ThinkingSlider.tsx b/src/browser/components/ThinkingSlider.tsx
@@ -123,10 +123,14 @@ export const ThinkingSliderComponent: React.FC<ThinkingControlProps> = ({ modelS
   const sliderValue = currentIndex === -1 ? 0 : currentIndex;
   const maxSteps = allowed.length - 1;
 
-  // For styling, we still want to map to the "global" intensity 0-3
-  // to keep colors consistent (e.g. "high" is always purple, even if it's step 1 of 2)
-  const globalLevelIndex = ["off", "low", "medium", "high"].indexOf(thinkingLevel);
-  const visualValue = globalLevelIndex === -1 ? 0 : globalLevelIndex;
+  // Map levels to visual intensity indices (0-3) so colors/glow stay consistent
+  // Levels outside the base 4 (e.g., xhigh) map to the strongest intensity
+  const baseVisualOrder: ThinkingLevel[] = ["off", "low", "medium", "high"];
+  const visualValue = (() => {
+    const idx = baseVisualOrder.indexOf(thinkingLevel);
+    if (idx >= 0) return idx;
+    return baseVisualOrder.length - 1; // clamp extras (e.g., xhigh) to strongest glow
+  })();
 
   const sliderStyles = getSliderStyles(visualValue, isHovering);
   const textStyle = getTextStyle(visualValue);
diff --git a/src/browser/utils/commands/sources.ts b/src/browser/utils/commands/sources.ts
@@ -50,7 +50,7 @@ export interface BuildSourcesParams {
   onOpenSettings?: (section?: string) => void;
 }
 
-const THINKING_LEVELS: ThinkingLevel[] = ["off", "low", "medium", "high"];
+const THINKING_LEVELS: ThinkingLevel[] = ["off", "low", "medium", "high", "xhigh"];
 
 /**
  * Command palette section names
@@ -431,6 +431,7 @@ export function buildCoreSources(p: BuildSourcesParams): Array<() => CommandActi
         low: "Low — add a bit of reasoning",
         medium: "Medium — balanced reasoning",
         high: "High — maximum reasoning depth",
+        xhigh: "Extra High — extended deep thinking",
       };
       const currentLevel = p.getThinkingLevel(workspaceId);
 
diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts
@@ -2,6 +2,56 @@ import { describe, expect, test } from "bun:test";
 import { getThinkingPolicyForModel, enforceThinkingPolicy } from "./policy";
 
 describe("getThinkingPolicyForModel", () => {
+  test("returns 5 levels including xhigh for gpt-5.1-codex-max", () => {
+    expect(getThinkingPolicyForModel("openai:gpt-5.1-codex-max")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+      "xhigh",
+    ]);
+  });
+
+  test("returns 5 levels for gpt-5.1-codex-max with version suffix", () => {
+    expect(getThinkingPolicyForModel("openai:gpt-5.1-codex-max-2025-12-01")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+      "xhigh",
+    ]);
+  });
+
+  test("returns 5 levels for bare gpt-5.1-codex-max without prefix", () => {
+    expect(getThinkingPolicyForModel("gpt-5.1-codex-max")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+      "xhigh",
+    ]);
+  });
+
+  test("returns 5 levels for codex-max alias", () => {
+    expect(getThinkingPolicyForModel("codex-max")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+      "xhigh",
+    ]);
+  });
+
+  test("returns 5 levels for gpt-5.1-codex-max with whitespace after colon", () => {
+    expect(getThinkingPolicyForModel("openai: gpt-5.1-codex-max")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+      "xhigh",
+    ]);
+  });
+
   test("returns single HIGH for gpt-5-pro base model", () => {
     expect(getThinkingPolicyForModel("openai:gpt-5-pro")).toEqual(["high"]);
   });
@@ -111,6 +161,32 @@ describe("enforceThinkingPolicy", () => {
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("off");
     });
   });
+
+  describe("GPT-5.1-Codex-Max (5 levels including xhigh)", () => {
+    test("allows all 5 levels including xhigh", () => {
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "off")).toBe("off");
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "low")).toBe("low");
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "medium")).toBe("medium");
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "high")).toBe("high");
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max", "xhigh")).toBe("xhigh");
+    });
+
+    test("allows xhigh for versioned model", () => {
+      expect(enforceThinkingPolicy("openai:gpt-5.1-codex-max-2025-12-01", "xhigh")).toBe("xhigh");
+    });
+  });
+
+  describe("xhigh fallback for non-codex-max models", () => {
+    test("falls back to medium when xhigh requested on standard model", () => {
+      // Standard models don't support xhigh, so fall back to medium (preferred fallback)
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "xhigh")).toBe("medium");
+    });
+
+    test("falls back to high when xhigh requested on gpt-5-pro", () => {
+      // gpt-5-pro only supports high, so xhigh falls back to high
+      expect(enforceThinkingPolicy("openai:gpt-5-pro", "xhigh")).toBe("high");
+    });
+  });
 });
 
 // Note: Tests for invalid levels removed - TypeScript type system prevents invalid
diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts
@@ -24,26 +24,35 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
  * Returns the thinking policy for a given model.
  *
  * Rules:
+ * - openai:gpt-5.1-codex-max → ["off", "low", "medium", "high", "xhigh"] (5 levels including xhigh)
  * - openai:gpt-5-pro → ["high"] (only supported level)
  * - gemini-3 → ["low", "high"] (thinking level only)
- * - default → ["off", "low", "medium", "high"] (all levels selectable)
+ * - default → ["off", "low", "medium", "high"] (standard 4 levels)
  *
  * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
  * Does NOT match gpt-5-pro-mini (uses negative lookahead).
  */
 export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
-  // Match "openai:" followed by optional whitespace and "gpt-5-pro"
-  // Allow version suffixes like "-2025-10-06" but NOT "-mini" or other text suffixes
-  if (/^openai:\s*gpt-5-pro(?!-[a-z])/.test(modelString)) {
+  // Normalize to be robust to provider prefixes, whitespace, and version suffixes
+  const normalized = modelString.trim().toLowerCase();
+  const withoutPrefix = normalized.replace(/^[a-z0-9_-]+:\s*/, "");
+
+  // GPT-5.1-Codex-Max supports 5 reasoning levels including xhigh (Extra High)
+  if (withoutPrefix.startsWith("gpt-5.1-codex-max") || withoutPrefix.startsWith("codex-max")) {
+    return ["off", "low", "medium", "high", "xhigh"];
+  }
+
+  // gpt-5-pro (not mini) with optional version suffix
+  if (/^gpt-5-pro(?!-[a-z])/.test(withoutPrefix)) {
     return ["high"];
   }
 
   // Gemini 3 Pro only supports "low" and "high" reasoning levels
-  if (modelString.includes("gemini-3")) {
+  if (withoutPrefix.includes("gemini-3")) {
     return ["low", "high"];
   }
 
-  // Default policy: all levels selectable
+  // Default policy: standard 4 levels (xhigh only for codex-max)
   return ["off", "low", "medium", "high"];
 }
 
diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts
@@ -70,6 +70,13 @@ const MODEL_DEFINITIONS = {
     providerModelId: "gpt-5.1-codex-mini",
     aliases: ["codex-mini"],
   },
+  GPT_CODEX_MAX: {
+    provider: "openai",
+    providerModelId: "gpt-5.1-codex-max",
+    aliases: ["codex-max"],
+    warm: true,
+    tokenizerOverride: "openai/gpt-5",
+  },
   GEMINI_3_PRO: {
     provider: "google",
     providerModelId: "gemini-3-pro-preview",
diff --git a/src/common/orpc/schemas/stream.ts b/src/common/orpc/schemas/stream.ts
@@ -313,7 +313,7 @@ export const ToolPolicySchema = z.array(ToolPolicyFilterSchema).meta({
 // SendMessage options
 export const SendMessageOptionsSchema = z.object({
   editMessageId: z.string().optional(),
-  thinkingLevel: z.enum(["off", "low", "medium", "high"]).optional(),
+  thinkingLevel: z.enum(["off", "low", "medium", "high", "xhigh"]).optional(),
   model: z.string("No model specified"),
   toolPolicy: ToolPolicySchema.optional(),
   additionalSystemInstructions: z.string().optional(),
diff --git a/src/common/orpc/schemas/telemetry.ts b/src/common/orpc/schemas/telemetry.ts
@@ -29,7 +29,7 @@ const FrontendPlatformInfoSchema = z.object({
 });
 
 // Thinking level enum (matches payload.ts TelemetryThinkingLevel)
-const TelemetryThinkingLevelSchema = z.enum(["off", "low", "medium", "high"]);
+const TelemetryThinkingLevelSchema = z.enum(["off", "low", "medium", "high", "xhigh"]);
 
 // Command type enum (matches payload.ts TelemetryCommandType)
 const TelemetryCommandTypeSchema = z.enum([
diff --git a/src/common/telemetry/payload.ts b/src/common/telemetry/payload.ts
@@ -86,7 +86,7 @@ export interface WorkspaceSwitchedPayload {
 /**
  * Thinking level for extended thinking feature
  */
-export type TelemetryThinkingLevel = "off" | "low" | "medium" | "high";
+export type TelemetryThinkingLevel = "off" | "low" | "medium" | "high" | "xhigh";
 
 /**
  * Chat/AI interaction events
diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts
@@ -5,7 +5,7 @@
  * different AI providers (Anthropic, OpenAI, etc.)
  */
 
-export type ThinkingLevel = "off" | "low" | "medium" | "high";
+export type ThinkingLevel = "off" | "low" | "medium" | "high" | "xhigh";
 
 /**
  * Active thinking levels (excludes "off")
@@ -30,6 +30,7 @@ export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
   low: 4000,
   medium: 10000,
   high: 20000,
+  xhigh: 20000, // Same as high - Anthropic doesn't support xhigh
 };
 
 /**
@@ -47,6 +48,7 @@ export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high">
   low: "low",
   medium: "medium",
   high: "high",
+  xhigh: "high", // Fallback to high - Anthropic doesn't support xhigh
 };
 
 /**
@@ -66,6 +68,7 @@ export const OPENAI_REASONING_EFFORT: Record<ThinkingLevel, string | undefined>
   low: "low",
   medium: "medium",
   high: "high",
+  xhigh: "xhigh", // Extra High - only supported by gpt-5.1-codex-max
 };
 
 /**
@@ -83,6 +86,7 @@ export const GEMINI_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
   low: 2048,
   medium: 8192,
   high: 16384, // Conservative max (some models go to 32k)
+  xhigh: 16384, // Same as high - Gemini doesn't support xhigh
 } as const;
 export const OPENROUTER_REASONING_EFFORT: Record<
   ThinkingLevel,
@@ -92,4 +96,5 @@ export const OPENROUTER_REASONING_EFFORT: Record<
   low: "low",
   medium: "medium",
   high: "high",
+  xhigh: "high", // Fallback to high - OpenRouter doesn't support xhigh
 };
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
@@ -254,8 +254,11 @@ export function buildProviderOptions(
       };
 
       if (isGemini3) {
-        // Gemini 3 uses thinkingLevel (low/high)
-        thinkingConfig.thinkingLevel = effectiveThinking === "medium" ? "low" : effectiveThinking;
+        // Gemini 3 uses thinkingLevel (low/high) - map medium/xhigh to supported values
+        thinkingConfig.thinkingLevel =
+          effectiveThinking === "medium" || effectiveThinking === "xhigh"
+            ? "high"
+            : effectiveThinking;
       } else {
         // Gemini 2.5 uses thinkingBudget
         const budget = GEMINI_THINKING_BUDGETS[effectiveThinking];
diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts
@@ -88,4 +88,21 @@ export const modelsExtra: Record<string, ModelData> = {
     supports_reasoning: true,
     supports_response_schema: true,
   },
+
+  // GPT-5.1-Codex-Max - Extended reasoning model with xhigh support
+  // Same pricing as gpt-5.1-codex: $1.25/M input, $10/M output
+  // Supports 5 reasoning levels: off, low, medium, high, xhigh
+  "gpt-5.1-codex-max": {
+    max_input_tokens: 272000, // Same as gpt-5.1-codex
+    max_output_tokens: 128000, // Same as gpt-5.1-codex
+    input_cost_per_token: 0.00000125, // $1.25 per million input tokens
+    output_cost_per_token: 0.00001, // $10 per million output tokens
+    litellm_provider: "openai",
+    mode: "chat",
+    supports_function_calling: true,
+    supports_vision: true,
+    supports_reasoning: true,
+    supports_response_schema: true,
+    supported_endpoints: ["/v1/responses"],
+  },
 };
diff --git a/src/common/utils/tokens/models.json b/src/common/utils/tokens/models.json