braintrustdata · knjiang · Dec 30, 2025 · Dec 29, 2025
diff --git a/payloads/cases/advanced.ts b/payloads/cases/advanced.ts
@@ -1,10 +1,15 @@
+import { Type } from "@google/genai";
 import { TestCaseCollection } from "./types";
 import {
   OPENAI_CHAT_COMPLETIONS_MODEL,
   OPENAI_RESPONSES_MODEL,
   ANTHROPIC_MODEL,
+  BEDROCK_MODEL,
 } from "./models";
 
+const IMAGE_BASE64 =
+  "/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AKp//2Q==";
+
 // Advanced test cases - complex functionality testing
 export const advancedCases: TestCaseCollection = {
   multimodalRequest: {
@@ -73,6 +78,49 @@ export const advancedCases: TestCaseCollection = {
         },
       ],
     },
+
+    google: {
+      contents: [
+        {
+          role: "user",
+          parts: [
+            { text: "What do you see in this image?" },
+            {
+              inlineData: {
+                mimeType: "image/jpeg",
+                data: IMAGE_BASE64,
+              },
+            },
+          ],
+        },
+      ],
+      config: {
+        maxOutputTokens: 300,
+      },
+    },
+
+    bedrock: {
+      modelId: BEDROCK_MODEL,
+      messages: [
+        {
+          role: "user",
+          content: [
+            { text: "What do you see in this image?" },
+            {
+              image: {
+                format: "jpeg",
+                source: {
+                  bytes: Buffer.from(IMAGE_BASE64, "base64"),
+                },
+              },
+            },
+          ],
+        },
+      ],
+      inferenceConfig: {
+        maxTokens: 300,
+      },
+    },
   },
 
   complexReasoningRequest: {
@@ -112,6 +160,39 @@ export const advancedCases: TestCaseCollection = {
         },
       ],
     },
+
+    google: {
+      contents: [
+        {
+          role: "user",
+          parts: [
+            {
+              text: "There is a digital clock, with minutes and hours in the form of 00:00. The clock shows all times from 00:00 to 23:59 and repeating. Imagine you had a list of all these times. Which digit(s) is the most common and which is the rarest? Can you find their percentage?",
+            },
+          ],
+        },
+      ],
+      config: {
+        maxOutputTokens: 20_000,
+      },
+    },
+
+    bedrock: {
+      modelId: BEDROCK_MODEL,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              text: "There is a digital clock, with minutes and hours in the form of 00:00. The clock shows all times from 00:00 to 23:59 and repeating. Imagine you had a list of all these times. Which digit(s) is the most common and which is the rarest? Can you find their percentage?",
+            },
+          ],
+        },
+      ],
+      inferenceConfig: {
+        maxTokens: 20_000,
+      },
+    },
   },
 
   reasoningWithOutput: {
@@ -144,6 +225,25 @@ export const advancedCases: TestCaseCollection = {
         },
       ],
     },
+
+    google: {
+      contents: [
+        {
+          role: "user",
+          parts: [{ text: "What color is the sky?" }],
+        },
+      ],
+    },
+
+    bedrock: {
+      modelId: BEDROCK_MODEL,
+      messages: [
+        {
+          role: "user",
+          content: [{ text: "What color is the sky?" }],
+        },
+      ],
+    },
   },
 
   toolCallRequest: {
@@ -233,5 +333,66 @@ export const advancedCases: TestCaseCollection = {
       ],
       tool_choice: "auto",
     },
+
+    google: {
+      contents: [
+        {
+          role: "user",
+          parts: [{ text: "What's the weather like in San Francisco?" }],
+        },
+      ],
+      tools: [
+        {
+          functionDeclarations: [
+            {
+              name: "get_weather",
+              description: "Get the current weather for a location",
+              parameters: {
+                type: Type.OBJECT,
+                properties: {
+                  location: {
+                    type: Type.STRING,
+                    description: "The city and state, e.g. San Francisco, CA",
+                  },
+                },
+                required: ["location"],
+              },
+            },
+          ],
+        },
+      ],
+    },
+
+    bedrock: {
+      modelId: BEDROCK_MODEL,
+      messages: [
+        {
+          role: "user",
+          content: [{ text: "What's the weather like in San Francisco?" }],
+        },
+      ],
+      toolConfig: {
+        tools: [
+          {
+            toolSpec: {
+              name: "get_weather",
+              description: "Get the current weather for a location",
+              inputSchema: {
+                json: {
+                  type: "object",
+                  properties: {
+                    location: {
+                      type: "string",
+                      description: "The city and state, e.g. San Francisco, CA",
+                    },
+                  },
+                  required: ["location"],
+                },
+              },
+            },
+          },
+        ],
+      },
+    },
   },
 };
diff --git a/payloads/cases/index.ts b/payloads/cases/index.ts
@@ -1,6 +1,7 @@
 // Export types and utilities
 export * from "./types";
 export * from "./utils";
+export * from "./models";
 
 // Export all case collections
 export { simpleCases } from "./simple";

diff --git a/payloads/cases/models.ts b/payloads/cases/models.ts
@@ -2,3 +2,5 @@
 export const OPENAI_CHAT_COMPLETIONS_MODEL = "gpt-5-nano";
 export const OPENAI_RESPONSES_MODEL = "gpt-5-nano";
 export const ANTHROPIC_MODEL = "claude-sonnet-4-20250514";
+export const GOOGLE_MODEL = "gemini-2.5-flash";
+export const BEDROCK_MODEL = "us.anthropic.claude-haiku-4-5-20251001-v1:0";
diff --git a/payloads/cases/simple.ts b/payloads/cases/simple.ts
@@ -1,8 +1,10 @@
+import { Type } from "@google/genai";
 import { TestCaseCollection } from "./types";
 import {
   OPENAI_CHAT_COMPLETIONS_MODEL,
   OPENAI_RESPONSES_MODEL,
   ANTHROPIC_MODEL,
+  BEDROCK_MODEL,
 } from "./models";
 
 // Simple test cases - basic functionality testing
@@ -41,6 +43,25 @@ export const simpleCases: TestCaseCollection = {
         },
       ],
     },
+
+    google: {
+      contents: [
+        {
+          role: "user",
+          parts: [{ text: "What is the capital of France?" }],
+        },
+      ],
+    },
+
+    bedrock: {
+      modelId: BEDROCK_MODEL,
+      messages: [
+        {
+          role: "user",
+          content: [{ text: "What is the capital of France?" }],
+        },
+      ],
+    },
   },
 
   reasoningRequest: {
@@ -77,6 +98,33 @@ export const simpleCases: TestCaseCollection = {
         },
       ],
     },
+
+    google: {
+      contents: [
+        {
+          role: "user",
+          parts: [
+            {
+              text: "Solve this step by step: If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?",
+            },
+          ],
+        },
+      ],
+    },
+
+    bedrock: {
+      modelId: BEDROCK_MODEL,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              text: "Solve this step by step: If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?",
+            },
+          ],
+        },
+      ],
+    },
   },
 
   reasoningRequestTruncated: {
@@ -115,6 +163,39 @@ export const simpleCases: TestCaseCollection = {
         },
       ],
     },
+
+    google: {
+      contents: [
+        {
+          role: "user",
+          parts: [
+            {
+              text: "Solve this step by step: If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?",
+            },
+          ],
+        },
+      ],
+      config: {
+        maxOutputTokens: 100,
+      },
+    },
+
+    bedrock: {
+      modelId: BEDROCK_MODEL,
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              text: "Solve this step by step: If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?",
+            },
+          ],
+        },
+      ],
+      inferenceConfig: {
+        maxTokens: 100,
+      },
+    },
   },
 
   toolCallRequest: {
@@ -201,5 +282,66 @@ export const simpleCases: TestCaseCollection = {
         },
       ],
     },
+
+    google: {
+      contents: [
+        {
+          role: "user",
+          parts: [{ text: "What's the weather like in San Francisco?" }],
+        },
+      ],
+      tools: [
+        {
+          functionDeclarations: [
+            {
+              name: "get_weather",
+              description: "Get the current weather for a location",
+              parameters: {
+                type: Type.OBJECT,
+                properties: {
+                  location: {
+                    type: Type.STRING,
+                    description: "The city and state, e.g. San Francisco, CA",
+                  },
+                },
+                required: ["location"],
+              },
+            },
+          ],
+        },
+      ],
+    },
+
+    bedrock: {
+      modelId: BEDROCK_MODEL,
+      messages: [
+        {
+          role: "user",
+          content: [{ text: "What's the weather like in San Francisco?" }],
+        },
+      ],
+      toolConfig: {
+        tools: [
+          {
+            toolSpec: {
+              name: "get_weather",
+              description: "Get the current weather for a location",
+              inputSchema: {
+                json: {
+                  type: "object",
+                  properties: {
+                    location: {
+                      type: "string",
+                      description: "The city and state, e.g. San Francisco, CA",
+                    },
+                  },
+                  required: ["location"],
+                },
+              },
+            },
+          },
+        ],
+      },
+    },
   },
 };