Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions packages/proxy/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"devDependencies": {
"@types/content-disposition": "^0.5.8",
"@types/jsonwebtoken": "^9.0.7",
"@types/node": "^20.10.5",
"@types/node": "^20.17.0",
"@types/uuid": "^9.0.7",
"@types/yargs": "^17.0.33",
"@typescript-eslint/eslint-plugin": "^8.21.0",
Expand All @@ -93,7 +93,7 @@
"openapi-zod-client": "^1.18.3",
"skott": "^0.35.4",
"tsup": "^8.4.0",
"typescript": "5.5.4",
"typescript": "5.9.3",
"vite-tsconfig-paths": "^4.3.2",
"vitest": "^2.1.9",
"yargs": "^17.7.2",
Expand Down
9 changes: 6 additions & 3 deletions packages/proxy/scripts/sync_models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ JSON array only:`;
const allGrokModels = [...grokModels, ...newModelNames];
if (
parsedOrder.length === allGrokModels.length &&
parsedOrder.every((name) => allGrokModels.includes(name)) &&
parsedOrder.every((name: string) => allGrokModels.includes(name)) &&
allGrokModels.every((name) => parsedOrder.includes(name))
) {
console.log("✅ Claude Code provided optimal Grok ordering");
Expand All @@ -408,7 +408,7 @@ JSON array only:`;
} catch (parseError) {
console.warn(
"Failed to parse Claude's JSON response:",
parseError.message,
parseError instanceof Error ? parseError.message : String(parseError),
);
}
} else {
Expand All @@ -420,7 +420,10 @@ JSON array only:`;
);
return getFallbackCompleteOrdering(existingModelNames, newModelNames);
} catch (error) {
console.warn("Failed to get ordering from Claude:", error.message);
console.warn(
"Failed to get ordering from Claude:",
error instanceof Error ? error.message : String(error),
);
return getFallbackCompleteOrdering(existingModelNames, newModelNames);
}
}
Expand Down
27 changes: 14 additions & 13 deletions packages/proxy/src/PrometheusSerializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import {
Histogram,
} from "@opentelemetry/sdk-metrics";
import { hrTimeToMilliseconds } from "@opentelemetry/core";
import { IResource } from "@opentelemetry/resources";
import { Resource } from "@opentelemetry/resources";

type PrometheusDataTypeLiteral =
| "counter"
Expand Down Expand Up @@ -217,7 +217,16 @@ export class PrometheusSerializer {
}
const dataPointType = metricData.dataPointType;

name = enforcePrometheusNamingConvention(name, metricData.descriptor.type);
// Get instrument type from the metric data point type
const instrumentType =
metricData.dataPointType === DataPointType.SUM && metricData.isMonotonic
? InstrumentType.COUNTER
: metricData.dataPointType === DataPointType.SUM
? InstrumentType.UP_DOWN_COUNTER
: metricData.dataPointType === DataPointType.GAUGE
? InstrumentType.GAUGE
: InstrumentType.HISTOGRAM;
name = enforcePrometheusNamingConvention(name, instrumentType);

const help = `# HELP ${name} ${escapeString(
metricData.descriptor.description || "description missing",
Expand All @@ -233,23 +242,15 @@ export class PrometheusSerializer {
case DataPointType.GAUGE: {
results = metricData.dataPoints
.map((it) =>
this._serializeSingularDataPoint(
name,
metricData.descriptor.type,
it,
),
this._serializeSingularDataPoint(name, instrumentType, it),
)
.join("");
break;
}
case DataPointType.HISTOGRAM: {
results = metricData.dataPoints
.map((it) =>
this._serializeHistogramDataPoint(
name,
metricData.descriptor.type,
it,
),
this._serializeHistogramDataPoint(name, instrumentType, it),
)
.join("");
break;
Expand Down Expand Up @@ -343,7 +344,7 @@ export class PrometheusSerializer {
return results;
}

protected _serializeResource(resource: IResource): string {
protected _serializeResource(resource: Resource): string {
const name = "target_info";
const help = `# HELP ${name} Target metadata`;
const type = `# TYPE ${name} gauge`;
Expand Down
12 changes: 10 additions & 2 deletions packages/proxy/src/providers/google.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
});

const streamedEvents = events();
expect(streamedEvents.length).toBeGreaterThan(0);

Check failure on line 62 in packages/proxy/src/providers/google.test.ts

View workflow job for this annotation

GitHub Actions / build (20)

packages/proxy/src/providers/google.test.ts > gemini-2.5-flash-preview-05-20 > should accept and should not return reasoning/thinking params and detail streaming

AssertionError: expected 0 to be greater than 0 ❯ packages/proxy/src/providers/google.test.ts:62:37

const hasContent = streamedEvents.some(
(event) => event.data.choices[0]?.delta?.content !== undefined,
Expand Down Expand Up @@ -107,7 +107,7 @@
},
});

expect(json()).toEqual({

Check failure on line 110 in packages/proxy/src/providers/google.test.ts

View workflow job for this annotation

GitHub Actions / build (20)

packages/proxy/src/providers/google.test.ts > gemini-2.5-flash-preview-05-20 > should accept and return reasoning/thinking params and detail non-streaming

AssertionError: expected { error: { code: 404, …(2) } } to deeply equal { choices: [ { …(4) } ], …(5) } - Expected + Received Object { - "choices": Array [ - Object { - "finish_reason": "stop", - "index": 0, - "logprobs": null, - "message": Object { - "content": Any<String>, - "reasoning": Array [ - Object { - "content": Any<String>, - "id": Any<String>, - }, - ], - "refusal": null, - "role": "assistant", - }, - }, - ], - "created": Any<Number>, - "id": Any<String>, - "model": "gemini-2.5-flash-preview-05-20", - "object": "chat.completion", - "usage": Object { - "completion_tokens": Any<Number>, - "completion_tokens_details": Object { - "reasoning_tokens": Any<Number>, - }, - "prompt_tokens": Any<Number>, - "total_tokens": Any<Number>, + "error": Object { + "code": 404, + "message": "models/gemini-2.5-flash-preview-05-20 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.", + "status": "NOT_FOUND", }, } ❯ packages/proxy/src/providers/google.test.ts:110:22
choices: [
{
finish_reason: "stop",
Expand Down Expand Up @@ -176,7 +176,7 @@
},
});

expect(json()).toEqual({

Check failure on line 179 in packages/proxy/src/providers/google.test.ts

View workflow job for this annotation

GitHub Actions / build (20)

packages/proxy/src/providers/google.test.ts > gemini-2.5-flash-preview-05-20 > should disable reasoning/thinking non-streaming

AssertionError: expected { error: { code: 404, …(2) } } to deeply equal { choices: [ { …(4) } ], …(5) } - Expected + Received Object { - "choices": Array [ - Object { - "finish_reason": "stop", - "index": 0, - "logprobs": null, - "message": Object { - "content": Any<String>, - "refusal": null, - "role": "assistant", - }, - }, - ], - "created": Any<Number>, - "id": Any<String>, - "model": "gemini-2.5-flash-preview-05-20", - "object": "chat.completion", - "usage": Object { - "completion_tokens": Any<Number>, - "prompt_tokens": Any<Number>, - "total_tokens": Any<Number>, + "error": Object { + "code": 404, + "message": "models/gemini-2.5-flash-preview-05-20 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.", + "status": "NOT_FOUND", }, } ❯ packages/proxy/src/providers/google.test.ts:179:22
choices: [
{
finish_reason: "stop",
Expand Down Expand Up @@ -254,7 +254,7 @@
body,
});

expect(result.json()).toMatchObject({

Check failure on line 257 in packages/proxy/src/providers/google.test.ts

View workflow job for this annotation

GitHub Actions / build (20)

packages/proxy/src/providers/google.test.ts > gemini-2.5-flash-preview-05-20 > should work with zod-json-schemafied parameters (convert to valid gemini (openapi 3) objects)

AssertionError: expected { error: { code: 404, …(2) } } to match object { id: Any<String>, …(5) } (4 matching properties omitted from actual) - Expected + Received Object { - "choices": Array [ - Object { - "finish_reason": "tool_calls", - "index": 0, - "logprobs": null, - "message": Object { - "content": "", - "refusal": null, - "role": "assistant", - "tool_calls": Array [ - Object { - "function": Object { - "arguments": Any<String>, - "name": "unionTool", - }, - "id": Any<String>, - "type": "function", - }, - ], - }, - }, - ], - "created": Any<Number>, - "id": Any<String>, - "model": "gemini-2.5-flash-preview-05-20", - "object": "chat.completion", - "usage": Object { - "completion_tokens": Any<Number>, - "completion_tokens_details": Object { - "reasoning_tokens": Any<Number>, - }, - "prompt_tokens": Any<Number>, - "total_tokens": Any<Number>, + "error": Object { + "code": 404, + "message": "models/gemini-2.5-flash-preview-05-20 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.", + "status": "NOT_FOUND", }, } ❯ packages/proxy/src/providers/google.test.ts:257:29
id: expect.any(String),
choices: [
{
Expand Down Expand Up @@ -353,7 +353,7 @@
body,
});

expect(result.json()).toMatchObject({

Check failure on line 356 in packages/proxy/src/providers/google.test.ts

View workflow job for this annotation

GitHub Actions / build (20)

packages/proxy/src/providers/google.test.ts > gemini-2.5-flash-preview-05-20 > should work with openapi 3 parameters

AssertionError: expected { error: { code: 404, …(2) } } to match object { id: Any<String>, …(5) } (4 matching properties omitted from actual) - Expected + Received Object { - "choices": Array [ - Object { - "finish_reason": "tool_calls", - "index": 0, - "logprobs": null, - "message": Object { - "content": "", - "refusal": null, - "role": "assistant", - "tool_calls": Array [ - Object { - "function": Object { - "arguments": Any<String>, - "name": "unionTool", - }, - "id": Any<String>, - "type": "function", - }, - ], - }, - }, - ], - "created": Any<Number>, - "id": Any<String>, - "model": "gemini-2.5-flash-preview-05-20", - "object": "chat.completion", - "usage": Object { - "completion_tokens": Any<Number>, - "completion_tokens_details": Object { - "reasoning_tokens": Any<Number>, - }, - "prompt_tokens": Any<Number>, - "total_tokens": Any<Number>, + "error": Object { + "code": 404, + "message": "models/gemini-2.5-flash-preview-05-20 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.", + "status": "NOT_FOUND", }, } ❯ packages/proxy/src/providers/google.test.ts:356:29
id: expect.any(String),
choices: [
{
Expand Down Expand Up @@ -442,7 +442,7 @@
});

const response = result.json();
expect(response).toMatchObject({

Check failure on line 445 in packages/proxy/src/providers/google.test.ts

View workflow job for this annotation

GitHub Actions / build (20)

packages/proxy/src/providers/google.test.ts > gemini-2.5-flash-preview-05-20 > should work with schemas containing $ref and $defs references

AssertionError: expected { error: { code: 404, …(2) } } to match object { id: Any<String>, …(2) } (4 matching properties omitted from actual) - Expected + Received Object { - "choices": ArrayContaining [ - ObjectContaining { - "message": ObjectContaining { - "content": Any<String>, - "role": "assistant", - }, + "error": Object { + "code": 404, + "message": "models/gemini-2.5-flash-preview-05-20 is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.", + "status": "NOT_FOUND", }, - ], - "id": Any<String>, - "model": StringContaining "gemini", } ❯ packages/proxy/src/providers/google.test.ts:445:24
id: expect.any(String),
choices: expect.arrayContaining([
expect.objectContaining({
Expand All @@ -456,8 +456,16 @@
});

// Parse and validate the response content
const messageContent = response.choices[0]?.message?.content;
if (messageContent) {
const choice = response.choices[0];
const messageContent =
choice &&
"message" in choice &&
typeof choice.message === "object" &&
choice.message !== null &&
"content" in choice.message
? (choice.message as { content?: unknown }).content
: undefined;
if (messageContent && typeof messageContent === "string") {
const parsed = JSON.parse(messageContent);
expect(parsed).toHaveProperty("name");
expect(parsed).toHaveProperty("addresses");
Expand Down
72 changes: 44 additions & 28 deletions packages/proxy/src/proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,7 @@ export async function proxyV1({
let spanType: SpanType | undefined = undefined;
const isStreaming = !!bodyData?.stream;

// TypeScript 5.9.3: Use ReadableStream<Uint8Array> - we'll cast pipeThrough results
let stream: ReadableStream<Uint8Array> | null = null;
if (readFromCache) {
const cached = await cacheGet(encryptionKey, cacheKey);
Expand Down Expand Up @@ -1412,7 +1413,7 @@ async function fetchModelLoop(
}
}

let stream = proxyResponse.stream;
let stream: ReadableStream<Uint8Array> | null = proxyResponse.stream;
if (!proxyResponse.response.ok) {
logHistogram?.({
name: "endpoint_failures",
Expand Down Expand Up @@ -1444,7 +1445,8 @@ async function fetchModelLoop(
controller.terminate();
},
});
stream = stream.pipeThrough(timingStream);
stream =
(stream.pipeThrough(timingStream) as ReadableStream<Uint8Array>) || null;
}
return {
modelResponse: {
Expand Down Expand Up @@ -1790,7 +1792,7 @@ async function fetchOpenAIResponsesTranslate({
body: JSON.stringify(responsesRequestFromChatCompletionsRequest(body)),
signal,
});
let stream = response.body;
let stream: ReadableStream<Uint8Array> | null = response.body;
if (response.ok && stream) {
const oaiResponse: OpenAIResponse = await collectStream(stream);
if (oaiResponse.error) {
Expand All @@ -1809,7 +1811,11 @@ async function fetchOpenAIResponsesTranslate({
if (body.stream) {
// Fake stream for now, since it looks like the entire text output is sent in one chunk,
// so we don't see any UX improvement.
stream = stream.pipeThrough(makeFakeOpenAIStreamTransformer());
// TypeScript 5.9.3: pipeThrough returns ReadableStream<Uint8Array<ArrayBufferLike>>
// but we need ReadableStream<Uint8Array<ArrayBuffer>>, so we cast it
stream = stream.pipeThrough(
makeFakeOpenAIStreamTransformer(),
) as ReadableStream<Uint8Array>;
}
}
return {
Expand Down Expand Up @@ -2136,10 +2142,11 @@ async function fetchOpenAI(
},
);

let stream = proxyResponse.body;
let stream: ReadableStream<Uint8Array> | null =
proxyResponse.body as ReadableStream<Uint8Array> | null;
if (isManagedStructuredOutput && stream) {
if (bodyData?.stream) {
stream = stream.pipeThrough(
const transformedStream = stream.pipeThrough(
createEventStreamTransformer((data) => {
const chunk: ChatCompletionChunk = JSON.parse(data);
const choice = chunk.choices[0];
Expand All @@ -2163,34 +2170,43 @@ async function fetchOpenAI(
};
}),
);
// TypeScript 5.9.3: pipeThrough returns ReadableStream<Uint8Array<ArrayBufferLike>>
// but we need ReadableStream<Uint8Array<ArrayBuffer>>, so we cast it
stream = (transformedStream as ReadableStream<Uint8Array>) || null;
} else {
const chunks: Uint8Array[] = [];
stream = stream.pipeThrough(
new TransformStream({
transform(chunk, _controller) {
chunks.push(chunk);
},
flush(controller) {
const data: ChatCompletion = JSON.parse(flattenChunks(chunks));
const choice = data.choices[0];
const toolCall = choice.message.tool_calls![0];
if (toolCall.type === "function") {
choice.message.content = toolCall.function.arguments;
}
choice.finish_reason = "stop";
delete choice.message.tool_calls;
controller.enqueue(new TextEncoder().encode(JSON.stringify(data)));
controller.terminate();
},
}),
);
stream =
(stream.pipeThrough(
new TransformStream({
transform(chunk, _controller) {
chunks.push(chunk);
},
flush(controller) {
const data: ChatCompletion = JSON.parse(flattenChunks(chunks));
const choice = data.choices[0];
const toolCall = choice.message.tool_calls![0];
if (toolCall.type === "function") {
choice.message.content = toolCall.function.arguments;
}
choice.finish_reason = "stop";
delete choice.message.tool_calls;
controller.enqueue(
new TextEncoder().encode(JSON.stringify(data)),
);
controller.terminate();
},
}),
) as ReadableStream<Uint8Array>) || null;
}
}

if (secret.type === "mistral" && stream && bodyData?.stream) {
stream = stream.pipeThrough(
createEventStreamTransformer(transformMistralThinkingChunks()),
);
// TypeScript 5.9.3: pipeThrough returns ReadableStream<Uint8Array<ArrayBufferLike>>
// but we need ReadableStream<Uint8Array<ArrayBuffer>>, so we cast it
stream =
(stream.pipeThrough(
createEventStreamTransformer(transformMistralThinkingChunks()),
) as ReadableStream<Uint8Array>) || null;
}

return {
Expand Down
1 change: 1 addition & 0 deletions packages/proxy/src/util.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import contentDisposition from "content-disposition";
export interface ModelResponse {
// TypeScript 5.9.3: Use ReadableStream<Uint8Array> - pipeThrough results are cast
stream: ReadableStream<Uint8Array> | null;
response: Response;
}
Expand Down
7 changes: 5 additions & 2 deletions packages/proxy/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
"declaration": true,
"lib": ["es2015", "dom"],
"target": "ES2018",
"module": "esnext",
"strict": true,
"moduleResolution": "node",
"moduleResolution": "bundler",
"jsx": "react",
"baseUrl": ".",
"paths": {
"@lib/*": ["src/*"],
Expand All @@ -13,7 +15,8 @@
"@types": ["types/index"]
},
"resolveJsonModule": true,
"esModuleInterop": true
"esModuleInterop": true,
"skipLibCheck": true
},
"include": ["."],
"exclude": ["node_modules/**", "**/dist/**"]
Expand Down
14 changes: 9 additions & 5 deletions packages/proxy/utils/audioEncoder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ export function makeWavFile(
byte_order: "little",
bits_per_sample: 16,
};
// TypeScript 5.9.3: Int16Array<ArrayBuffer> is compatible with ArrayBufferLike
buffers = buffers.map((buffer) =>
floatTo16BitPCM(new Float32Array(buffer)),
);
) as unknown as ArrayBufferLike[];
}

const dataLength = buffers.reduce((sum, b) => sum + b.byteLength, 0);
Expand Down Expand Up @@ -57,7 +58,8 @@ export function makeWavFile(
...buffers,
];

return new Blob(blobParts, { type: "audio/wav" });
// TypeScript 5.9.3: BlobPart accepts ArrayBufferLike, but type checking is stricter
return new Blob(blobParts as BlobPart[], { type: "audio/wav" });
}

function wavFormatCode(format: PcmAudioFormat) {
Expand Down Expand Up @@ -134,7 +136,7 @@ export function makeMp3File(
bitrate,
);

const blobParts: ArrayBuffer[] = [];
const blobParts: BlobPart[] = [];

for (const buffer of buffers) {
const int16Buffer =
Expand All @@ -143,11 +145,13 @@ export function makeMp3File(
: floatTo16BitPCM(new Float32Array(buffer));
const encoded = encoder.encodeBuffer(int16Buffer);
if (encoded.length) {
blobParts.push(encoded);
// TypeScript 5.9.3: Uint8Array<ArrayBufferLike> is compatible with BlobPart
blobParts.push(encoded as BlobPart);
}
}

blobParts.push(encoder.flush());
// TypeScript 5.9.3: encoder.flush() returns Uint8Array<ArrayBufferLike> which is compatible with BlobPart
blobParts.push(encoder.flush() as BlobPart);

return new Blob(blobParts, { type: "audio/mpeg" });
}
3 changes: 2 additions & 1 deletion packages/proxy/utils/encrypt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ export async function encryptMessage(
);

return {
iv: arrayBufferToBase64(new Uint8Array(iv)),
// TypeScript 5.9.3: Uint8Array.buffer is ArrayBufferLike, need explicit conversion
iv: arrayBufferToBase64(iv.buffer as ArrayBuffer),
data: arrayBufferToBase64(decoded),
};
}
5 changes: 2 additions & 3 deletions packages/proxy/utils/tempCredentials.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,8 @@ export function makeTempCredentialsJwt({

// Generate 256-bit key since our cache uses AES-256.
const keyLengthBytes = 256 / 8;
const cacheEncryptionKey = arrayBufferToBase64(
crypto.getRandomValues(new Uint8Array(keyLengthBytes)),
);
const randomValues = crypto.getRandomValues(new Uint8Array(keyLengthBytes));
const cacheEncryptionKey = arrayBufferToBase64(randomValues.buffer);

// The partial payload is missing timestamps (`iat`, `exp`). They will be
// populated at signing time with the `mutatePayload` option.
Expand Down
6 changes: 5 additions & 1 deletion packages/proxy/utils/tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { TextDecoder } from "util";
import { Buffer } from "node:buffer";
import { proxyV1 } from "../src/proxy";
import { getModelEndpointTypes } from "@schema";
import type { APISecret } from "@schema";
import { createParser, ParsedEvent, ParseEvent } from "eventsource-parser";

export function createResponseStream(): [
Expand Down Expand Up @@ -54,6 +55,7 @@ export const getKnownApiSecrets: Parameters<
useCache: boolean,
authToken: string,
model: string | null,
_org_name?: string,
) => {
const endpointTypes = model && getModelEndpointTypes(model);
if (!endpointTypes?.length) throw new Error(`Unknown model: ${model}`);
Expand Down Expand Up @@ -123,7 +125,9 @@ export const getKnownApiSecrets: Parameters<
excludeDefaultModels: true,
},
},
].filter((secret) => !!secret.secret && endpointTypes.includes(secret.type));
].filter(
(secret) => !!secret.secret && endpointTypes.includes(secret.type),
) as APISecret[];
};

export async function callProxyV1<Input extends object, Output extends object>({
Expand Down
Loading
Loading