Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1184016
update to zod 4
cpinn Dec 23, 2025
2e0583b
Fix Zod v4 schema generation for OpenAI function calling
cpinn Dec 24, 2025
8dd9a8c
Fix remaining zodToJsonSchema call and prettier formatting
cpinn Dec 24, 2025
508d006
no transforms in ragas, defualt to zod 4 json parsing
cpinn Dec 26, 2025
619eadb
update autoevals
cpinn Dec 28, 2025
eb63be0
add copy of zod json schema
cpinn Dec 28, 2025
9b4b407
drop python 3.8 support
cpinn Dec 29, 2025
eb2ae2d
switch back to using native toJSONSchema in zod 4.2
cpinn Dec 29, 2025
947418a
use openai 6.7 which fixes zod support for native toJSONSchema
cpinn Dec 29, 2025
6fb2c09
Use Zod v3 compatibility mode for production API compatibility
cpinn Dec 29, 2025
7e4d5e1
Add Zod as peer dependency
cpinn Dec 29, 2025
2be0919
use zod 3 syntax on the template
cpinn Dec 29, 2025
93f5e28
make zod a peer dependency in autoevals
cpinn Dec 29, 2025
7d36c10
bump package version
cpinn Dec 29, 2025
b4ff806
Merge branch 'main' into caitlin/update-zod4
cpinn Dec 29, 2025
bd3c048
Revert "bump package version"
cpinn Dec 30, 2025
7813882
Revert "make zod a peer dependency in autoevals"
cpinn Dec 30, 2025
5ec6b50
Revert "use zod 3 syntax on the template"
cpinn Dec 30, 2025
84e2c48
Revert "Add Zod as peer dependency"
cpinn Dec 30, 2025
3dacda0
Revert "Use Zod v3 compatibility mode for production API compatibility"
cpinn Dec 30, 2025
16a8e50
Reapply "Use Zod v3 compatibility mode for production API compatibility"
cpinn Dec 30, 2025
a7ce47f
Reapply "Add Zod as peer dependency"
cpinn Dec 30, 2025
3c73f4c
Reapply "use zod 3 syntax on the template"
cpinn Dec 30, 2025
afe5fa7
Reapply "make zod a peer dependency in autoevals"
cpinn Dec 30, 2025
3b66a4e
Reapply "bump package version"
cpinn Dec 30, 2025
cf5e3a7
keep zod on v3 for dev
cpinn Dec 31, 2025
666ea6a
Merge branch 'main' into caitlin/update-zod4
cpinn Jan 1, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,340 changes: 114 additions & 1,226 deletions evals/datasets/coqa-closed-qa.json

Large diffs are not rendered by default.

2,010 changes: 171 additions & 1,839 deletions evals/datasets/coqa-context-relevancy.json

Large diffs are not rendered by default.

2,010 changes: 171 additions & 1,839 deletions evals/datasets/coqa-factuality.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion evals/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"dependencies": {
"autoevals": "workspace:*",
"braintrust": "^0.0.140",
"zod": "^3.22.4"
"zod": "^4.2.1"
},
"devDependencies": {
"@types/node": "^20.10.5",
Expand Down
2 changes: 1 addition & 1 deletion evals/src/autoevals.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
coqaCaseSchema,
dataDir,
} from "./datasets";
import { z } from "zod";
import { z } from "zod/v3";
import {
AnswerCorrectness,
ClosedQA,
Expand Down
2 changes: 1 addition & 1 deletion evals/src/datasets.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { z } from "zod";
import { z } from "zod/v3";

import path from "path";

Expand Down
2 changes: 1 addition & 1 deletion evals/src/sync_datasets.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { duckq, getDuckDBConn } from "./duckdb";

import { z } from "zod";
import { z } from "zod/v3";
import {
coqaSchema,
dataDir,
Expand Down
111 changes: 43 additions & 68 deletions js/ragas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ import { Scorer, ScorerArgs } from "./score";
import { DEFAULT_MODEL, LLMArgs } from "./llm";
import { buildOpenAIClient, extractOpenAIArgs } from "./oai";
import OpenAI from "openai";
import { zodFunction } from "openai/helpers/zod";
import { ListContains } from "./list";
import { EmbeddingSimilarity } from "./string";
import { z } from "zod";
import zodToJsonSchema from "zod-to-json-schema";
import { z } from "zod/v3";
import { makePartial, ScorerWithPartial } from "./partial";

type RagasArgs = {
Expand Down Expand Up @@ -87,14 +87,11 @@ export const ContextEntityRecall: ScorerWithPartial<
},
],
tools: [
{
type: "function",
function: {
name: "extract_entities",
description: "Extract unique entities from a given text",
parameters: zodToJsonSchema(entitySchema),
},
},
zodFunction({
name: "extract_entities",
description: "Extract unique entities from a given text",
parameters: entitySchema,
}),
],
tool_choice: { type: "function", function: { name: "extract_entities" } },
});
Expand Down Expand Up @@ -167,14 +164,11 @@ export const ContextRelevancy: ScorerWithPartial<string, RagasArgs> =
},
],
tools: [
{
type: "function",
function: {
name: "extract_sentences",
description: "Extract relevant sentences from a given context",
parameters: zodToJsonSchema(relevantSentencesSchema),
},
},
zodFunction({
name: "extract_sentences",
description: "Extract relevant sentences from a given context",
parameters: relevantSentencesSchema,
}),
],
tool_choice: {
type: "function",
Expand Down Expand Up @@ -267,13 +261,10 @@ export const ContextRecall: ScorerWithPartial<string, RagasArgs> = makePartial(
},
],
tools: [
{
type: "function",
function: {
name: "extract_statements",
parameters: zodToJsonSchema(contextRecallSchema),
},
},
zodFunction({
name: "extract_statements",
parameters: contextRecallSchema,
}),
],
tool_choice: {
type: "function",
Expand Down Expand Up @@ -367,15 +358,11 @@ export const ContextPrecision: ScorerWithPartial<string, RagasArgs> =
},
],
tools: [
{
type: "function",
function: {
name: "verify",
description:
"Verify if context was useful in arriving at the answer",
parameters: zodToJsonSchema(contextPrecisionSchema),
},
},
zodFunction({
name: "verify",
description: "Verify if context was useful in arriving at the answer",
parameters: contextPrecisionSchema,
}),
],
tool_choice: { type: "function", function: { name: "verify" } },
});
Expand Down Expand Up @@ -494,14 +481,11 @@ export const Faithfulness: ScorerWithPartial<string, RagasArgs> = makePartial(
},
],
tools: [
{
type: "function",
function: {
name: "extract_statements",
description: "Extract statements from an answer given a question",
parameters: zodToJsonSchema(extractedStatementsSchema),
},
},
zodFunction({
name: "extract_statements",
description: "Extract statements from an answer given a question",
parameters: extractedStatementsSchema,
}),
],
tool_choice: {
type: "function",
Expand All @@ -525,15 +509,12 @@ export const Faithfulness: ScorerWithPartial<string, RagasArgs> = makePartial(
},
],
tools: [
{
type: "function",
function: {
name: "judge_statements",
description:
"Judge whether the statements are faithful to the context",
parameters: zodToJsonSchema(statementFaithfulnessSchema),
},
},
zodFunction({
name: "judge_statements",
description:
"Judge whether the statements are faithful to the context",
parameters: statementFaithfulnessSchema,
}),
],
tool_choice: { type: "function", function: { name: "judge_statements" } },
});
Expand Down Expand Up @@ -635,15 +616,12 @@ export const AnswerRelevancy: ScorerWithPartial<
},
],
tools: [
{
type: "function",
function: {
name: "generate_question",
description:
"Generate a question for the given answer and identify if the answer is noncommittal",
parameters: zodToJsonSchema(questionGenSchema),
},
},
zodFunction({
name: "generate_question",
description:
"Generate a question for the given answer and identify if the answer is noncommittal",
parameters: questionGenSchema,
}),
],
tool_choice: {
type: "function",
Expand Down Expand Up @@ -810,14 +788,11 @@ export const AnswerCorrectness: ScorerWithPartial<
},
],
tools: [
{
type: "function",
function: {
name: "classify_statements",
description: "Classify statements as TP, FP, or FN",
parameters: zodToJsonSchema(answerCorrectnessClassificationSchema),
},
},
zodFunction({
name: "classify_statements",
description: "Classify statements as TP, FP, or FN",
parameters: answerCorrectnessClassificationSchema,
}),
],
tool_choice: {
type: "function",
Expand Down
4 changes: 2 additions & 2 deletions js/templates.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { z } from "zod";
import { z } from "zod/v3";
import * as yaml from "js-yaml";

import battle from "../templates/battle.yaml";
Expand All @@ -13,7 +13,7 @@ import translation from "../templates/translation.yaml";

export const modelGradedSpecSchema = z.object({
prompt: z.string(),
choice_scores: z.record(z.number()),
choice_scores: z.record(z.string(), z.number()),
model: z.string().optional(),
use_cot: z.boolean().optional(),
temperature: z.number().optional(),
Expand Down
13 changes: 8 additions & 5 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "autoevals",
"version": "0.0.131",
"version": "0.1.0",
"description": "Universal library for evaluating AI models",
"repository": {
"type": "git",
Expand Down Expand Up @@ -44,7 +44,11 @@
"typedoc": "^0.25.13",
"typedoc-plugin-markdown": "^3.17.1",
"typescript": "^5.9.2",
"vitest": "^2.1.9"
"vitest": "^2.1.9",
"zod": "3.25.67"
},
"peerDependencies": {
"zod": "^3.0.0 || ^4.0.0"
},
"dependencies": {
"ajv": "^8.17.1",
Expand All @@ -53,9 +57,8 @@
"js-yaml": "^4.1.0",
"linear-sum-assignment": "^1.0.7",
"mustache": "^4.2.0",
"openai": "^6.3.0",
"zod": "^3.25.76",
"zod-to-json-schema": "^3.24.6"
"openai": "^6.7.0",
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

6.7.0 version is necessary in order to properly support zod 4 with fallbacks to zod 3

"zod-to-json-schema": "3.25.0"
},
"packageManager": "pnpm@10.26.2"
}
Loading