Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 161 additions & 0 deletions payloads/cases/advanced.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
import { Type } from "@google/genai";
import { TestCaseCollection } from "./types";
import {
OPENAI_CHAT_COMPLETIONS_MODEL,
OPENAI_RESPONSES_MODEL,
ANTHROPIC_MODEL,
BEDROCK_MODEL,
} from "./models";

const IMAGE_BASE64 =
"/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AKp//2Q==";

// Advanced test cases - complex functionality testing
export const advancedCases: TestCaseCollection = {
multimodalRequest: {
Expand Down Expand Up @@ -73,6 +78,49 @@ export const advancedCases: TestCaseCollection = {
},
],
},

google: {
contents: [
{
role: "user",
parts: [
{ text: "What do you see in this image?" },
{
inlineData: {
mimeType: "image/jpeg",
data: IMAGE_BASE64,
},
},
],
},
],
config: {
maxOutputTokens: 300,
},
},

bedrock: {
modelId: BEDROCK_MODEL,
messages: [
{
role: "user",
content: [
{ text: "What do you see in this image?" },
{
image: {
format: "jpeg",
source: {
bytes: Buffer.from(IMAGE_BASE64, "base64"),
},
},
},
],
},
],
inferenceConfig: {
maxTokens: 300,
},
},
},

complexReasoningRequest: {
Expand Down Expand Up @@ -112,6 +160,39 @@ export const advancedCases: TestCaseCollection = {
},
],
},

google: {
contents: [
{
role: "user",
parts: [
{
text: "There is a digital clock, with minutes and hours in the form of 00:00. The clock shows all times from 00:00 to 23:59 and repeating. Imagine you had a list of all these times. Which digit(s) is the most common and which is the rarest? Can you find their percentage?",
},
],
},
],
config: {
maxOutputTokens: 20_000,
},
},

bedrock: {
modelId: BEDROCK_MODEL,
messages: [
{
role: "user",
content: [
{
text: "There is a digital clock, with minutes and hours in the form of 00:00. The clock shows all times from 00:00 to 23:59 and repeating. Imagine you had a list of all these times. Which digit(s) is the most common and which is the rarest? Can you find their percentage?",
},
],
},
],
inferenceConfig: {
maxTokens: 20_000,
},
},
},

reasoningWithOutput: {
Expand Down Expand Up @@ -144,6 +225,25 @@ export const advancedCases: TestCaseCollection = {
},
],
},

google: {
contents: [
{
role: "user",
parts: [{ text: "What color is the sky?" }],
},
],
},

bedrock: {
modelId: BEDROCK_MODEL,
messages: [
{
role: "user",
content: [{ text: "What color is the sky?" }],
},
],
},
},

toolCallRequest: {
Expand Down Expand Up @@ -233,5 +333,66 @@ export const advancedCases: TestCaseCollection = {
],
tool_choice: "auto",
},

google: {
contents: [
{
role: "user",
parts: [{ text: "What's the weather like in San Francisco?" }],
},
],
tools: [
{
functionDeclarations: [
{
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: Type.OBJECT,
properties: {
location: {
type: Type.STRING,
description: "The city and state, e.g. San Francisco, CA",
},
},
required: ["location"],
},
},
],
},
],
},

bedrock: {
modelId: BEDROCK_MODEL,
messages: [
{
role: "user",
content: [{ text: "What's the weather like in San Francisco?" }],
},
],
toolConfig: {
tools: [
{
toolSpec: {
name: "get_weather",
description: "Get the current weather for a location",
inputSchema: {
json: {
type: "object",
properties: {
location: {
type: "string",
description: "The city and state, e.g. San Francisco, CA",
},
},
required: ["location"],
},
},
},
},
],
},
},
},
};
1 change: 1 addition & 0 deletions payloads/cases/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Export types and utilities
export * from "./types";
export * from "./utils";
export * from "./models";

// Export all case collections
export { simpleCases } from "./simple";
Expand Down
2 changes: 2 additions & 0 deletions payloads/cases/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
export const OPENAI_CHAT_COMPLETIONS_MODEL = "gpt-5-nano";
export const OPENAI_RESPONSES_MODEL = "gpt-5-nano";
export const ANTHROPIC_MODEL = "claude-sonnet-4-20250514";
export const GOOGLE_MODEL = "gemini-2.5-flash";
export const BEDROCK_MODEL = "us.anthropic.claude-haiku-4-5-20251001-v1:0";
142 changes: 142 additions & 0 deletions payloads/cases/simple.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import { Type } from "@google/genai";
import { TestCaseCollection } from "./types";
import {
OPENAI_CHAT_COMPLETIONS_MODEL,
OPENAI_RESPONSES_MODEL,
ANTHROPIC_MODEL,
BEDROCK_MODEL,
} from "./models";

// Simple test cases - basic functionality testing
Expand Down Expand Up @@ -41,6 +43,25 @@ export const simpleCases: TestCaseCollection = {
},
],
},

google: {
contents: [
{
role: "user",
parts: [{ text: "What is the capital of France?" }],
},
],
},

bedrock: {
modelId: BEDROCK_MODEL,
messages: [
{
role: "user",
content: [{ text: "What is the capital of France?" }],
},
],
},
},

reasoningRequest: {
Expand Down Expand Up @@ -77,6 +98,33 @@ export const simpleCases: TestCaseCollection = {
},
],
},

google: {
contents: [
{
role: "user",
parts: [
{
text: "Solve this step by step: If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?",
},
],
},
],
},

bedrock: {
modelId: BEDROCK_MODEL,
messages: [
{
role: "user",
content: [
{
text: "Solve this step by step: If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?",
},
],
},
],
},
},

reasoningRequestTruncated: {
Expand Down Expand Up @@ -115,6 +163,39 @@ export const simpleCases: TestCaseCollection = {
},
],
},

google: {
contents: [
{
role: "user",
parts: [
{
text: "Solve this step by step: If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?",
},
],
},
],
config: {
maxOutputTokens: 100,
},
},

bedrock: {
modelId: BEDROCK_MODEL,
messages: [
{
role: "user",
content: [
{
text: "Solve this step by step: If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?",
},
],
},
],
inferenceConfig: {
maxTokens: 100,
},
},
},

toolCallRequest: {
Expand Down Expand Up @@ -201,5 +282,66 @@ export const simpleCases: TestCaseCollection = {
},
],
},

google: {
contents: [
{
role: "user",
parts: [{ text: "What's the weather like in San Francisco?" }],
},
],
tools: [
{
functionDeclarations: [
{
name: "get_weather",
description: "Get the current weather for a location",
parameters: {
type: Type.OBJECT,
properties: {
location: {
type: Type.STRING,
description: "The city and state, e.g. San Francisco, CA",
},
},
required: ["location"],
},
},
],
},
],
},

bedrock: {
modelId: BEDROCK_MODEL,
messages: [
{
role: "user",
content: [{ text: "What's the weather like in San Francisco?" }],
},
],
toolConfig: {
tools: [
{
toolSpec: {
name: "get_weather",
description: "Get the current weather for a location",
inputSchema: {
json: {
type: "object",
properties: {
location: {
type: "string",
description: "The city and state, e.g. San Francisco, CA",
},
},
required: ["location"],
},
},
},
},
],
},
},
},
};
Loading