diff --git a/openapi.yaml b/openapi.yaml index de97a6c..3fd8889 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -5298,6 +5298,616 @@ paths: } } ``` + /deployments: + get: + description: Get a list of all deployments in your project + responses: + "200": + description: List of deployments + content: + application/json: + schema: + $ref: "#/components/schemas/DeploymentListResponse" + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Get the list of deployments + tags: + - Deployments + post: + description: Create a new deployment with specified configuration + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/CreateDeploymentRequest" + description: Deployment configuration + required: true + responses: + "200": + description: Deployment created successfully + content: + application/json: + schema: + $ref: "#/components/schemas/DeploymentResponseItem" + "400": + description: Invalid request + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Create a new deployment + tags: + - Deployments + /deployments/{id}: + delete: + description: Delete an existing deployment + parameters: + - description: Deployment ID or name + in: path + name: id + required: true + schema: + type: string + responses: + "200": + description: Deployment deleted successfully + content: + application/json: + schema: + type: object + "404": + description: Deployment not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Delete a deployment + tags: + - Deployments + get: + description: Retrieve details of a specific deployment by its ID or name + parameters: + - description: Deployment ID or name + in: path + name: id + required: true + schema: + type: string + responses: + "200": + description: Deployment details + content: + application/json: + schema: + $ref: "#/components/schemas/DeploymentResponseItem" + "404": + description: Deployment not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Get a deployment by ID or name + tags: + - Deployments + patch: + description: Update an existing deployment configuration + parameters: + - description: Deployment ID or name + in: path + name: id + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/UpdateDeploymentRequest" + description: Updated deployment configuration + required: true + responses: + "200": + description: Deployment updated successfully + content: + application/json: + schema: + $ref: "#/components/schemas/DeploymentResponseItem" + "400": + description: Invalid request + content: + application/json: + schema: + type: object + "404": + description: Deployment not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Update a deployment + tags: + - Deployments + /deployments/{id}/logs: + get: + description: Retrieve logs from a deployment, optionally filtered by replica ID. + Use follow=true to stream logs in real-time. + parameters: + - description: Deployment ID or name + in: path + name: id + required: true + schema: + type: string + - description: Replica ID to filter logs + in: query + name: replica_id + schema: + type: string + - description: Stream logs in real-time (ndjson format) + in: query + name: follow + schema: + type: boolean + responses: + "200": + description: Deployment logs + content: + application/json: + schema: + $ref: "#/components/schemas/DeploymentLogs" + "404": + description: Deployment not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Get logs for a deployment + tags: + - Deployments + /image-repositories: + get: + description: Retrieve all container image repositories available in your project + responses: + "200": + description: List of repositories + content: + application/json: + schema: + $ref: "#/components/schemas/RepositoryListResponse" + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Get the list of image repositories in your project + tags: + - Images + /image-repositories/{id}/images: + get: + description: Retrieve all container images (tags) available in a specific repository + parameters: + - description: Repository ID + in: path + name: id + required: true + schema: + type: string + responses: + "200": + description: List of images + content: + application/json: + schema: + $ref: "#/components/schemas/ImageListResponse" + "404": + description: Repository not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Get the list of images available under a repository + tags: + - Images + /secrets: + get: + description: Retrieve all secrets in your project + responses: + "200": + description: List of secrets + content: + application/json: + schema: + $ref: "#/components/schemas/ListSecretsResponse" + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Get the list of project secrets + tags: + - Secrets + post: + description: Create a new secret to store sensitive configuration values + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/CreateSecretRequest" + description: Secret configuration + required: true + responses: + "200": + description: Secret created successfully + content: + application/json: + schema: + $ref: "#/components/schemas/SecretResponseItem" + "400": + description: Invalid request + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Create a new secret + tags: + - Secrets + /secrets/{id}: + delete: + description: Delete an existing secret + parameters: + - description: Secret ID or name + in: path + name: id + required: true + schema: + type: string + responses: + "200": + description: Secret deleted successfully + content: + application/json: + schema: + type: object + "404": + description: Secret not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Delete a secret + tags: + - Secrets + get: + description: Retrieve details of a specific secret by its ID or name + parameters: + - description: Secret ID or name + in: path + name: id + required: true + schema: + type: string + responses: + "200": + description: Secret details + content: + application/json: + schema: + $ref: "#/components/schemas/SecretResponseItem" + "404": + description: Secret not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Get a secret by ID or name + tags: + - Secrets + patch: + description: Update an existing secret's value or metadata + parameters: + - description: Secret ID or name + in: path + name: id + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/UpdateSecretRequest" + description: Updated secret configuration + required: true + responses: + "200": + description: Secret updated successfully + content: + application/json: + schema: + $ref: "#/components/schemas/SecretResponseItem" + "400": + description: Invalid request + content: + application/json: + schema: + type: object + "404": + description: Secret not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Update a secret + tags: + - Secrets + /storage/{filename}: + get: + description: Download a file by redirecting to a signed URL + parameters: + - description: Filename + in: path + name: filename + required: true + schema: + type: string + responses: + "307": + description: Redirect to signed download URL + content: + application/json: + schema: + type: string + "400": + description: Invalid request + content: + application/json: + schema: + additionalProperties: + type: string + type: object + "404": + description: File not found + content: + application/json: + schema: + additionalProperties: + type: string + type: object + "500": + description: Internal error + content: + application/json: + schema: + additionalProperties: + type: string + type: object + summary: Download a file + tags: + - files + /storage/volumes: + get: + description: Retrieve all volumes in your project + responses: + "200": + description: List of volumes + content: + application/json: + schema: + $ref: "#/components/schemas/ListVolumesResponse" + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Get the list of project volumes + tags: + - Volumes + post: + description: Create a new volume to preload files in deployments + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/CreateVolumeRequest" + description: Volume configuration + required: true + responses: + "200": + description: Volume created successfully + content: + application/json: + schema: + $ref: "#/components/schemas/VolumeResponseItem" + "400": + description: Invalid request + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Create a new volume + tags: + - Volumes + /storage/volumes/{id}: + delete: + description: Delete an existing volume + parameters: + - description: Volume ID or name + in: path + name: id + required: true + schema: + type: string + responses: + "200": + description: Volume deleted successfully + content: + application/json: + schema: + type: object + "404": + description: Volume not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Delete a volume + tags: + - Volumes + get: + description: Retrieve details of a specific volume by its ID or name + parameters: + - description: Volume ID or name + in: path + name: id + required: true + schema: + type: string + responses: + "200": + description: Volume details + content: + application/json: + schema: + $ref: "#/components/schemas/VolumeResponseItem" + "404": + description: Volume not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Get a volume by ID or name + tags: + - Volumes + patch: + description: Update an existing volume's configuration or contents + parameters: + - description: Volume ID or name + in: path + name: id + required: true + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: "#/components/schemas/UpdateVolumeRequest" + description: Updated volume configuration + required: true + responses: + "200": + description: Volume updated successfully + content: + application/json: + schema: + $ref: "#/components/schemas/VolumeResponseItem" + "400": + description: Invalid request + content: + application/json: + schema: + type: object + "404": + description: Volume not found + content: + application/json: + schema: + type: object + "500": + description: Internal server error + content: + application/json: + schema: + type: object + summary: Update a volume + tags: + - Volumes + components: securitySchemes: @@ -6459,41 +7069,283 @@ components: - zh response_encoding: type: string - description: Audio encoding of response - default: pcm_f32le - enum: - - pcm_f32le - - pcm_s16le - - pcm_mulaw - - pcm_alaw - sample_rate: + description: Audio encoding of response + default: pcm_f32le + enum: + - pcm_f32le + - pcm_s16le + - pcm_mulaw + - pcm_alaw + sample_rate: + type: integer + default: 44100 + description: Sampling rate to use for the output audio. The default sampling rate for canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for cartesia/sonic is 44100. + stream: + type: boolean + default: false + description: 'If true, output is streamed for several characters at a time instead of waiting for the full response. The stream terminates with `data: [DONE]`. If false, return the encoded audio as octet stream' + + AudioTranscriptionRequest: + type: object + required: + - file + properties: + file: + oneOf: + - $ref: '#/components/schemas/AudioFileBinary' + - $ref: '#/components/schemas/AudioFileUrl' + description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac. + model: + type: string + description: Model to use for transcription + default: openai/whisper-large-v3 + enum: + - openai/whisper-large-v3 + language: + type: string + description: Optional ISO 639-1 language code. If `auto` is provided, language is auto-detected. + default: en + example: en + prompt: + type: string + description: Optional text to bias decoding. + response_format: + type: string + description: The format of the response + default: json + enum: + - json + - verbose_json + temperature: + type: number + format: float + description: Sampling temperature between 0.0 and 1.0 + default: 0.0 + minimum: 0.0 + maximum: 1.0 + timestamp_granularities: + oneOf: + - type: string + enum: + - segment + - word + - type: array + items: + type: string + enum: + - segment + - word + uniqueItems: true + minItems: 1 + maxItems: 2 + description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels. + default: segment + example: ['word', 'segment'] + diarize: + type: boolean + description: > + Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription. + In the response, in the words array, you will get the speaker id for each word. + In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment. +
+
+ For eg - + ... + "speaker_segments": [ + "speaker_id": "SPEAKER_00", + "start": 0, + "end": 30.02, + "words": [ + { + "id": 0, + "word": "Tijana", + "start": 0, + "end": 11.475, + "speaker_id": "SPEAKER_00" + }, + ... + default: false + min_speakers: + type: integer + description: Minimum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known. + max_speakers: + type: integer + description: Maximum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known. + + AudioTranscriptionResponse: + oneOf: + - $ref: '#/components/schemas/AudioTranscriptionJsonResponse' + - $ref: '#/components/schemas/AudioTranscriptionVerboseJsonResponse' + + AudioTranscriptionJsonResponse: + type: object + required: + - text + properties: + text: + type: string + description: The transcribed text + example: Hello, world! + + AudioTranscriptionVerboseJsonResponse: + type: object + required: + - task + - language + - duration + - text + - segments + properties: + task: + type: string + description: The task performed + enum: + - transcribe + - translate + example: transcribe + language: + type: string + description: The language of the audio + example: english + duration: + type: number + format: float + description: The duration of the audio in seconds + example: 3.5 + text: + type: string + description: The transcribed text + example: Hello, world! + segments: + type: array + items: + $ref: '#/components/schemas/AudioTranscriptionSegment' + description: Array of transcription segments + words: + type: array + items: + $ref: '#/components/schemas/AudioTranscriptionWord' + description: Array of transcription words (only when timestamp_granularities includes 'word') + speaker_segments: + type: array + items: + $ref: '#/components/schemas/AudioTranscriptionSpeakerSegment' + description: Array of transcription speaker segments (only when diarize is enabled) + + AudioTranscriptionSegment: + type: object + required: + - id + - start + - end + - text + properties: + id: + type: integer + description: Unique identifier for the segment + example: 0 + start: + type: number + format: float + description: Start time of the segment in seconds + example: 0.0 + end: + type: number + format: float + description: End time of the segment in seconds + example: 3.5 + text: + type: string + description: The text content of the segment + example: Hello, world! + + AudioTranscriptionWord: + type: object + required: + - word + - start + - end + properties: + word: + type: string + description: The word + example: Hello + start: + type: number + format: float + description: Start time of the word in seconds + example: 0.0 + end: + type: number + format: float + description: End time of the word in seconds + example: 0.5 + speaker_id: + type: string + description: The speaker id for the word (only when diarize is enabled) + example: SPEAKER_00 + + AudioTranscriptionSpeakerSegment: + type: object + required: + - speaker_id + - start + - end + - words + - text + - id + properties: + speaker_id: + type: string + description: The speaker identifier + example: SPEAKER_00 + start: + type: number + format: float + description: Start time of the speaker segment in seconds + example: 0.0 + end: + type: number + format: float + description: End time of the speaker segment in seconds + example: 30.02 + words: + type: array + items: + $ref: '#/components/schemas/AudioTranscriptionWord' + description: Array of words spoken by this speaker in this segment + text: + type: string + description: The full text spoken by this speaker in this segment + example: "Hello, how are you doing today?" + id: type: integer - default: 44100 - description: Sampling rate to use for the output audio. The default sampling rate for canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for cartesia/sonic is 44100. - stream: - type: boolean - default: false - description: 'If true, output is streamed for several characters at a time instead of waiting for the full response. The stream terminates with `data: [DONE]`. If false, return the encoded audio as octet stream' + description: Unique identifier for the speaker segment + example: 1 - AudioTranscriptionRequest: + AudioTranslationRequest: type: object required: - file properties: file: oneOf: - - $ref: '#/components/schemas/AudioFileBinary' - - $ref: '#/components/schemas/AudioFileUrl' + - type: string + format: binary + description: Audio file to translate + - type: string + format: uri + description: Public HTTP/HTTPS URL to audio file description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac. model: type: string - description: Model to use for transcription + description: Model to use for translation default: openai/whisper-large-v3 enum: - openai/whisper-large-v3 language: type: string - description: Optional ISO 639-1 language code. If `auto` is provided, language is auto-detected. + description: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English. default: en example: en prompt: @@ -6531,53 +7383,23 @@ components: description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels. default: segment example: ['word', 'segment'] - diarize: - type: boolean - description: > - Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription. - In the response, in the words array, you will get the speaker id for each word. - In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment. -
-
- For eg - - ... - "speaker_segments": [ - "speaker_id": "SPEAKER_00", - "start": 0, - "end": 30.02, - "words": [ - { - "id": 0, - "word": "Tijana", - "start": 0, - "end": 11.475, - "speaker_id": "SPEAKER_00" - }, - ... - default: false - min_speakers: - type: integer - description: Minimum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known. - max_speakers: - type: integer - description: Maximum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known. - AudioTranscriptionResponse: + AudioTranslationResponse: oneOf: - - $ref: '#/components/schemas/AudioTranscriptionJsonResponse' - - $ref: '#/components/schemas/AudioTranscriptionVerboseJsonResponse' + - $ref: '#/components/schemas/AudioTranslationJsonResponse' + - $ref: '#/components/schemas/AudioTranslationVerboseJsonResponse' - AudioTranscriptionJsonResponse: + AudioTranslationJsonResponse: type: object required: - text properties: text: type: string - description: The transcribed text + description: The translated text example: Hello, world! - AudioTranscriptionVerboseJsonResponse: + AudioTranslationVerboseJsonResponse: type: object required: - task @@ -6592,623 +7414,865 @@ components: enum: - transcribe - translate - example: transcribe + example: translate language: type: string - description: The language of the audio - example: english - duration: - type: number - format: float - description: The duration of the audio in seconds - example: 3.5 - text: + description: The target language of the translation + example: english + duration: + type: number + format: float + description: The duration of the audio in seconds + example: 3.5 + text: + type: string + description: The translated text + example: Hello, world! + segments: + type: array + items: + $ref: '#/components/schemas/AudioTranscriptionSegment' + description: Array of translation segments + words: + type: array + items: + $ref: '#/components/schemas/AudioTranscriptionWord' + description: Array of translation words (only when timestamp_granularities includes 'word') + + AudioSpeechStreamResponse: + oneOf: + - $ref: '#/components/schemas/AudioSpeechStreamEvent' + - $ref: '#/components/schemas/StreamSentinel' + + AudioSpeechStreamEvent: + type: object + required: [data] + properties: + data: + $ref: '#/components/schemas/AudioSpeechStreamChunk' + + AudioSpeechStreamChunk: + type: object + required: [object, model, b64] + properties: + object: + type: string + enum: + - audio.tts.chunk + model: + type: string + example: cartesia/sonic + b64: + type: string + description: base64 encoded audio stream + + StreamSentinel: + type: object + required: [data] + properties: + data: + title: stream_signal + type: string + enum: + - '[DONE]' + + ChatCompletionToken: + type: object + required: [id, text, logprob, special] + properties: + id: + type: integer + text: + type: string + logprob: + type: number + special: + type: boolean + + ChatCompletionChoice: + type: object + required: [index, delta, finish_reason] + properties: + index: + type: integer + finish_reason: + $ref: '#/components/schemas/FinishReason' + logprobs: + $ref: '#/components/schemas/LogprobsPart' + delta: + title: ChatCompletionChoiceDelta + type: object + required: [role] + properties: + token_id: + type: integer + role: + type: string + enum: ['system', 'user', 'assistant', 'function', 'tool'] + content: + type: string + nullable: true + tool_calls: + type: array + items: + $ref: '#/components/schemas/ToolChoice' + function_call: + type: object + deprecated: true + nullable: true + properties: + arguments: + type: string + name: + type: string + required: + - arguments + - name + reasoning: + type: string + nullable: true + + EmbeddingsRequest: + type: object + required: + - model + - input + properties: + model: + type: string + description: > + The name of the embedding model to use.
+
+ [See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models) + example: togethercomputer/m2-bert-80M-8k-retrieval + anyOf: + - type: string + enum: + - WhereIsAI/UAE-Large-V1 + - BAAI/bge-large-en-v1.5 + - BAAI/bge-base-en-v1.5 + - togethercomputer/m2-bert-80M-8k-retrieval + - type: string + input: + oneOf: + - type: string + description: A string providing the text for the model to embed. + example: Our solar system orbits the Milky Way galaxy at about 515,000 mph + - type: array + items: + type: string + description: A string providing the text for the model to embed. + example: Our solar system orbits the Milky Way galaxy at about 515,000 mph + example: Our solar system orbits the Milky Way galaxy at about 515,000 mph + + EmbeddingsResponse: + type: object + required: + - object + - model + - data + properties: + object: + type: string + enum: + - list + model: type: string - description: The transcribed text - example: Hello, world! - segments: - type: array - items: - $ref: '#/components/schemas/AudioTranscriptionSegment' - description: Array of transcription segments - words: - type: array - items: - $ref: '#/components/schemas/AudioTranscriptionWord' - description: Array of transcription words (only when timestamp_granularities includes 'word') - speaker_segments: + data: type: array items: - $ref: '#/components/schemas/AudioTranscriptionSpeakerSegment' - description: Array of transcription speaker segments (only when diarize is enabled) + type: object + required: [index, object, embedding] + properties: + object: + type: string + enum: + - embedding + embedding: + type: array + items: + type: number + index: + type: integer - AudioTranscriptionSegment: + ModelInfoList: + type: array + items: + $ref: '#/components/schemas/ModelInfo' + ModelInfo: type: object - required: - - id - - start - - end - - text + required: [id, object, created, type] properties: id: + type: string + example: 'Austism/chronos-hermes-13b' + object: + type: string + example: 'model' + created: type: integer - description: Unique identifier for the segment - example: 0 - start: - type: number - format: float - description: Start time of the segment in seconds - example: 0.0 - end: - type: number - format: float - description: End time of the segment in seconds - example: 3.5 - text: + example: 1692896905 + type: + enum: + - chat + - language + - code + - image + - embedding + - moderation + - rerank + example: 'chat' + display_name: type: string - description: The text content of the segment - example: Hello, world! + example: 'Chronos Hermes (13B)' + organization: + type: string + example: 'Austism' + link: + type: string + license: + type: string + example: 'other' + context_length: + type: integer + example: 2048 + pricing: + $ref: '#/components/schemas/Pricing' - AudioTranscriptionWord: + ModelUploadRequest: type: object required: - - word - - start - - end + - model_name + - model_source properties: - word: + model_name: type: string - description: The word - example: Hello - start: - type: number - format: float - description: Start time of the word in seconds - example: 0.0 - end: - type: number - format: float - description: End time of the word in seconds - example: 0.5 - speaker_id: + description: The name to give to your uploaded model + example: 'Qwen2.5-72B-Instruct' + model_source: type: string - description: The speaker id for the word (only when diarize is enabled) - example: SPEAKER_00 + description: The source location of the model (Hugging Face repo or S3 path) + example: 'unsloth/Qwen2.5-72B-Instruct' + model_type: + type: string + description: Whether the model is a full model or an adapter + default: 'model' + enum: + - model + - adapter + example: 'model' + hf_token: + type: string + description: Hugging Face token (if uploading from Hugging Face) + example: 'hf_examplehuggingfacetoken' + description: + type: string + description: A description of your model + example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth' + base_model: + type: string + description: The base model to use for an adapter if setting it to run against a serverless pool. Only used for model_type `adapter`. + example: 'Qwen/Qwen2.5-72B-Instruct' + lora_model: + type: string + description: The lora pool to use for an adapter if setting it to run against, say, a dedicated pool. Only used for model_type `adapter`. + example: 'my_username/Qwen2.5-72B-Instruct-lora' - AudioTranscriptionSpeakerSegment: + ModelUploadSuccessResponse: type: object required: - - speaker_id - - start - - end - - words - - text - - id + - data + - message properties: - speaker_id: - type: string - description: The speaker identifier - example: SPEAKER_00 - start: - type: number - format: float - description: Start time of the speaker segment in seconds - example: 0.0 - end: - type: number - format: float - description: End time of the speaker segment in seconds - example: 30.02 - words: - type: array - items: - $ref: '#/components/schemas/AudioTranscriptionWord' - description: Array of words spoken by this speaker in this segment - text: + data: + type: object + required: + - job_id + - model_name + - model_id + - model_source + properties: + job_id: + type: string + example: 'job-a15dad11-8d8e-4007-97c5-a211304de284' + model_name: + type: string + example: 'necolinehubner/Qwen2.5-72B-Instruct' + model_id: + type: string + example: 'model-c0e32dfc-637e-47b2-bf4e-e9b2e58c9da7' + model_source: + type: string + example: 'huggingface' + message: type: string - description: The full text spoken by this speaker in this segment - example: "Hello, how are you doing today?" - id: - type: integer - description: Unique identifier for the speaker segment - example: 1 + example: 'Processing model weights. Job created.' - AudioTranslationRequest: + ImageResponse: type: object - required: - - file properties: - file: - oneOf: - - type: string - format: binary - description: Audio file to translate - - type: string - format: uri - description: Public HTTP/HTTPS URL to audio file - description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac. + id: + type: string model: type: string - description: Model to use for translation - default: openai/whisper-large-v3 + object: enum: - - openai/whisper-large-v3 - language: - type: string - description: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English. - default: en - example: en - prompt: + - list + example: 'list' + data: + type: array + items: + oneOf: + - $ref: '#/components/schemas/ImageResponseDataB64' + - $ref: '#/components/schemas/ImageResponseDataUrl' + discriminator: + propertyName: type + required: + - id + - model + - object + - data + + ImageResponseDataB64: + type: object + required: [index, b64_json, type] + properties: + index: + type: integer + b64_json: type: string - description: Optional text to bias decoding. - response_format: + type: type: string - description: The format of the response - default: json - enum: - - json - - verbose_json - temperature: - type: number - format: float - description: Sampling temperature between 0.0 and 1.0 - default: 0.0 - minimum: 0.0 - maximum: 1.0 - timestamp_granularities: - oneOf: - - type: string - enum: - - segment - - word - - type: array - items: - type: string - enum: - - segment - - word - uniqueItems: true - minItems: 1 - maxItems: 2 - description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels. - default: segment - example: ['word', 'segment'] - - AudioTranslationResponse: - oneOf: - - $ref: '#/components/schemas/AudioTranslationJsonResponse' - - $ref: '#/components/schemas/AudioTranslationVerboseJsonResponse' + enum: [b64_json] - AudioTranslationJsonResponse: + ImageResponseDataUrl: type: object - required: - - text + required: [index, url, type] properties: - text: + index: + type: integer + url: type: string - description: The translated text - example: Hello, world! + type: + type: string + enum: [url] - AudioTranslationVerboseJsonResponse: + JobInfoSuccessResponse: type: object required: - - task - - language - - duration - - text - - segments + - type + - job_id + - status + - status_updates + - args + - created_at + - updated_at properties: - task: + type: type: string - description: The task performed - enum: - - transcribe - - translate - example: translate - language: + example: 'model_upload' + job_id: type: string - description: The target language of the translation - example: english - duration: - type: number - format: float - description: The duration of the audio in seconds - example: 3.5 - text: + example: 'job-a15dad11-8d8e-4007-97c5-a211304de284' + status: type: string - description: The translated text - example: Hello, world! - segments: + enum: ['Queued', 'Running', 'Complete', 'Failed'] + example: 'Complete' + status_updates: type: array items: - $ref: '#/components/schemas/AudioTranscriptionSegment' - description: Array of translation segments - words: + type: object + required: + - status + - message + - timestamp + properties: + status: + type: string + example: 'Complete' + message: + type: string + example: 'Job is Complete' + timestamp: + type: string + format: date-time + example: '2025-03-11T22:36:12Z' + args: + type: object + properties: + description: + type: string + example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth' + modelName: + type: string + example: 'necolinehubner/Qwen2.5-72B-Instruct' + modelSource: + type: string + example: 'unsloth/Qwen2.5-72B-Instruct' + created_at: + type: string + format: date-time + example: '2025-03-11T22:05:43Z' + updated_at: + type: string + format: date-time + example: '2025-03-11T22:36:12Z' + + JobsInfoSuccessResponse: + type: object + required: + - data + properties: + data: type: array items: - $ref: '#/components/schemas/AudioTranscriptionWord' - description: Array of translation words (only when timestamp_granularities includes 'word') - - AudioSpeechStreamResponse: - oneOf: - - $ref: '#/components/schemas/AudioSpeechStreamEvent' - - $ref: '#/components/schemas/StreamSentinel' + $ref: '#/components/schemas/JobInfoSuccessResponse' - AudioSpeechStreamEvent: + Pricing: type: object - required: [data] + required: [hourly, input, output, base, finetune] properties: - data: - $ref: '#/components/schemas/AudioSpeechStreamChunk' + hourly: + type: number + example: 0 + input: + type: number + example: 0.3 + output: + type: number + example: 0.3 + base: + type: number + example: 0 + finetune: + type: number + example: 0 - AudioSpeechStreamChunk: + ToolsPart: type: object - required: [object, model, b64] properties: - object: + type: type: string - enum: - - audio.tts.chunk - model: + example: 'tool_type' + function: + type: object + properties: + description: + type: string + example: 'A description of the function.' + name: + type: string + example: 'function_name' + parameters: + type: object + additionalProperties: true + description: 'A map of parameter names to their values.' + ToolChoice: + type: object + required: [id, type, function, index] + properties: + # TODO: is this the right place for index? + index: + type: number + id: type: string - example: cartesia/sonic - b64: + type: type: string - description: base64 encoded audio stream + enum: ['function'] + function: + type: object + required: [name, arguments] + properties: + name: + type: string + example: 'function_name' + arguments: + type: string - StreamSentinel: + FileResponse: + type: object + required: + - id + - object + - created_at + - filename + - bytes + - purpose + - FileType + - Processed + - LineCount + properties: + id: + type: string + object: + type: string + example: 'file' + created_at: + type: integer + example: 1715021438 + filename: + type: string + example: 'my_file.jsonl' + bytes: + type: integer + example: 2664 + purpose: + $ref: '#/components/schemas/FilePurpose' + Processed: + type: boolean + FileType: + $ref: '#/components/schemas/FileType' + LineCount: + type: integer + FileList: + required: + - data type: object - required: [data] properties: data: - title: stream_signal - type: string - enum: - - '[DONE]' - - ChatCompletionToken: + type: array + items: + $ref: '#/components/schemas/FileResponse' + FileObject: type: object - required: [id, text, logprob, special] properties: + object: + type: string id: - type: integer - text: type: string - logprob: - type: number - special: - type: boolean - - ChatCompletionChoice: + filename: + type: string + size: + type: integer + FilePurpose: + type: string + description: The purpose of the file + example: 'fine-tune' + enum: + - fine-tune + - eval + - eval-sample + - eval-output + - eval-summary + - batch-generated + - batch-api + FileType: + type: string + description: The type of the file + default: 'jsonl' + example: 'jsonl' + enum: + - 'csv' + - 'jsonl' + - 'parquet' + FileDeleteResponse: type: object - required: [index, delta, finish_reason] properties: - index: - type: integer - finish_reason: - $ref: '#/components/schemas/FinishReason' - logprobs: - $ref: '#/components/schemas/LogprobsPart' - delta: - title: ChatCompletionChoiceDelta - type: object - required: [role] - properties: - token_id: - type: integer - role: - type: string - enum: ['system', 'user', 'assistant', 'function', 'tool'] - content: - type: string - nullable: true - tool_calls: - type: array - items: - $ref: '#/components/schemas/ToolChoice' - function_call: - type: object - deprecated: true - nullable: true - properties: - arguments: - type: string - name: - type: string - required: - - arguments - - name - reasoning: - type: string - nullable: true - - EmbeddingsRequest: + id: + type: string + deleted: + type: boolean + FinetuneResponse: type: object required: - - model - - input + - id + - status properties: + id: + type: string + format: uuid + training_file: + type: string + validation_file: + type: string model: type: string - description: > - The name of the embedding model to use.
-
- [See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models) - example: togethercomputer/m2-bert-80M-8k-retrieval - anyOf: + model_output_name: + type: string + model_output_path: + type: string + trainingfile_numlines: + type: integer + trainingfile_size: + type: integer + created_at: + type: string + format: date-time + updated_at: + type: string + format: date-time + n_epochs: + type: integer + n_checkpoints: + type: integer + n_evals: + type: integer + batch_size: + oneOf: + - type: integer - type: string enum: - - WhereIsAI/UAE-Large-V1 - - BAAI/bge-large-en-v1.5 - - BAAI/bge-base-en-v1.5 - - togethercomputer/m2-bert-80M-8k-retrieval - - type: string - input: + - max + default: 'max' + learning_rate: + type: number + lr_scheduler: + type: object + $ref: '#/components/schemas/LRScheduler' + warmup_ratio: + type: number + max_grad_norm: + type: number + format: float + weight_decay: + type: number + format: float + eval_steps: + type: integer + train_on_inputs: oneOf: + - type: boolean - type: string - description: A string providing the text for the model to embed. - example: Our solar system orbits the Milky Way galaxy at about 515,000 mph - - type: array - items: - type: string - description: A string providing the text for the model to embed. - example: Our solar system orbits the Milky Way galaxy at about 515,000 mph - example: Our solar system orbits the Milky Way galaxy at about 515,000 mph - - EmbeddingsResponse: - type: object - required: - - object - - model - - data - properties: - object: - type: string - enum: - - list - model: + enum: + - auto + default: auto + training_method: + type: object + oneOf: + - $ref: '#/components/schemas/TrainingMethodSFT' + - $ref: '#/components/schemas/TrainingMethodDPO' + training_type: + type: object + oneOf: + - $ref: '#/components/schemas/FullTrainingType' + - $ref: '#/components/schemas/LoRATrainingType' + status: + $ref: '#/components/schemas/FinetuneJobStatus' + job_id: type: string - data: + events: type: array items: - type: object - required: [index, object, embedding] - properties: - object: - type: string - enum: - - embedding - embedding: - type: array - items: - type: number - index: - type: integer - - ModelInfoList: - type: array - items: - $ref: '#/components/schemas/ModelInfo' - ModelInfo: - type: object - required: [id, object, created, type] - properties: - id: - type: string - example: 'Austism/chronos-hermes-13b' - object: - type: string - example: 'model' - created: + $ref: '#/components/schemas/FineTuneEvent' + token_count: type: integer - example: 1692896905 - type: - enum: - - chat - - language - - code - - image - - embedding - - moderation - - rerank - example: 'chat' - display_name: + param_count: + type: integer + total_price: + type: integer + epochs_completed: + type: integer + queue_depth: + type: integer + wandb_project_name: type: string - example: 'Chronos Hermes (13B)' - organization: + wandb_url: type: string - example: 'Austism' - link: + from_checkpoint: type: string - license: + from_hf_model: type: string - example: 'other' - context_length: - type: integer - example: 2048 - pricing: - $ref: '#/components/schemas/Pricing' + hf_model_revision: + type: string + progress: + $ref: '#/components/schemas/FineTuneProgress' - ModelUploadRequest: + FinetuneResponseTruncated: type: object + description: A truncated version of the fine-tune response, used for POST /fine-tunes, GET /fine-tunes and POST /fine-tunes/{id}/cancel endpoints required: - - model_name - - model_source + - id + - status + - created_at + - updated_at + example: + id: ft-01234567890123456789 + status: completed + created_at: '2023-05-17T17:35:45.123Z' + updated_at: '2023-05-17T18:46:23.456Z' + user_id: 'user_01234567890123456789' + owner_address: 'user@example.com' + total_price: 1500 + token_count: 850000 + events: [] # FineTuneTruncated object has no events + model: 'meta-llama/Llama-2-7b-hf' + model_output_name: 'mynamespace/meta-llama/Llama-2-7b-hf-32162631' + n_epochs: 3 + training_file: 'file-01234567890123456789' + wandb_project_name: 'my-finetune-project' properties: - model_name: + id: type: string - description: The name to give to your uploaded model - example: 'Qwen2.5-72B-Instruct' - model_source: + description: Unique identifier for the fine-tune job + status: + $ref: '#/components/schemas/FinetuneJobStatus' + created_at: type: string - description: The source location of the model (Hugging Face repo or S3 path) - example: 'unsloth/Qwen2.5-72B-Instruct' - model_type: + format: date-time + description: Creation timestamp of the fine-tune job + updated_at: type: string - description: Whether the model is a full model or an adapter - default: 'model' - enum: - - model - - adapter - example: 'model' - hf_token: + format: date-time + description: Last update timestamp of the fine-tune job + user_id: type: string - description: Hugging Face token (if uploading from Hugging Face) - example: 'hf_examplehuggingfacetoken' - description: + description: Identifier for the user who created the job + owner_address: type: string - description: A description of your model - example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth' - base_model: + description: Owner address information + total_price: + type: integer + description: Total price for the fine-tuning job + token_count: + type: integer + description: Count of tokens processed + events: + type: array + items: + $ref: '#/components/schemas/FineTuneEvent' + description: Events related to this fine-tune job + # FineTuneUserParams fields + training_file: type: string - description: The base model to use for an adapter if setting it to run against a serverless pool. Only used for model_type `adapter`. - example: 'Qwen/Qwen2.5-72B-Instruct' - lora_model: + description: File-ID of the training file + validation_file: type: string - description: The lora pool to use for an adapter if setting it to run against, say, a dedicated pool. Only used for model_type `adapter`. - example: 'my_username/Qwen2.5-72B-Instruct-lora' - - ModelUploadSuccessResponse: - type: object - required: - - data - - message - properties: - data: - type: object - required: - - job_id - - model_name - - model_id - - model_source - properties: - job_id: - type: string - example: 'job-a15dad11-8d8e-4007-97c5-a211304de284' - model_name: - type: string - example: 'necolinehubner/Qwen2.5-72B-Instruct' - model_id: - type: string - example: 'model-c0e32dfc-637e-47b2-bf4e-e9b2e58c9da7' - model_source: - type: string - example: 'huggingface' - message: + description: File-ID of the validation file + model: type: string - example: 'Processing model weights. Job created.' - - ImageResponse: - type: object - properties: - id: + description: Base model used for fine-tuning + model_output_name: type: string - model: + suffix: type: string - object: - enum: - - list - example: 'list' - data: - type: array - items: - oneOf: - - $ref: '#/components/schemas/ImageResponseDataB64' - - $ref: '#/components/schemas/ImageResponseDataUrl' - discriminator: - propertyName: type - required: - - id - - model - - object - - data - - ImageResponseDataB64: - type: object - required: [index, b64_json, type] - properties: - index: + description: Suffix added to the fine-tuned model name + n_epochs: type: integer - b64_json: + description: Number of training epochs + n_evals: + type: integer + description: Number of evaluations during training + n_checkpoints: + type: integer + description: Number of checkpoints saved during training + batch_size: + type: integer + description: Batch size used for training + training_type: + oneOf: + - $ref: '#/components/schemas/FullTrainingType' + - $ref: '#/components/schemas/LoRATrainingType' + description: Type of training used (full or LoRA) + training_method: + oneOf: + - $ref: '#/components/schemas/TrainingMethodSFT' + - $ref: '#/components/schemas/TrainingMethodDPO' + description: Method of training used + learning_rate: + type: number + format: float + description: Learning rate used for training + lr_scheduler: + $ref: '#/components/schemas/LRScheduler' + description: Learning rate scheduler configuration + warmup_ratio: + type: number + format: float + description: Ratio of warmup steps + max_grad_norm: + type: number + format: float + description: Maximum gradient norm for clipping + weight_decay: + type: number + format: float + description: Weight decay value used + wandb_project_name: type: string - type: + description: Weights & Biases project name + wandb_name: type: string - enum: [b64_json] - - ImageResponseDataUrl: + description: Weights & Biases run name + from_checkpoint: + type: string + description: Checkpoint used to continue training + from_hf_model: + type: string + description: Hugging Face Hub repo to start training from + hf_model_revision: + type: string + description: The revision of the Hugging Face Hub model to continue training from + progress: + $ref: '#/components/schemas/FineTuneProgress' + description: Progress information for the fine-tuning job + FinetuneDeleteResponse: type: object - required: [index, url, type] properties: - index: - type: integer - url: - type: string - type: + message: type: string - enum: [url] + description: Message indicating the result of the deletion + FinetuneJobStatus: + type: string + enum: + - pending + - queued + - running + - compressing + - uploading + - cancel_requested + - cancelled + - error + - completed + + FinetuneEventLevels: + type: string + enum: + - null + - info + - warning + - error + - legacy_info + - legacy_iwarning + - legacy_ierror + FinetuneEventType: + type: string + enum: + - job_pending + - job_start + - job_stopped + - model_downloading + - model_download_complete + - training_data_downloading + - training_data_download_complete + - validation_data_downloading + - validation_data_download_complete + - wandb_init + - training_start + - checkpoint_save + - billing_limit + - epoch_complete + - training_complete + - model_compressing + - model_compression_complete + - model_uploading + - model_upload_complete + - job_complete + - job_error + - cancel_requested + - job_restarted + - refund + - warning - JobInfoSuccessResponse: + FinetuneTruncatedList: type: object required: - - type - - job_id - - status - - status_updates - - args - - created_at - - updated_at + - data properties: - type: - type: string - example: 'model_upload' - job_id: - type: string - example: 'job-a15dad11-8d8e-4007-97c5-a211304de284' - status: - type: string - enum: ['Queued', 'Running', 'Complete', 'Failed'] - example: 'Complete' - status_updates: + data: type: array items: - type: object - required: - - status - - message - - timestamp - properties: - status: - type: string - example: 'Complete' - message: - type: string - example: 'Job is Complete' - timestamp: - type: string - format: date-time - example: '2025-03-11T22:36:12Z' - args: - type: object - properties: - description: - type: string - example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth' - modelName: - type: string - example: 'necolinehubner/Qwen2.5-72B-Instruct' - modelSource: - type: string - example: 'unsloth/Qwen2.5-72B-Instruct' - created_at: - type: string - format: date-time - example: '2025-03-11T22:05:43Z' - updated_at: - type: string - format: date-time - example: '2025-03-11T22:36:12Z' - - JobsInfoSuccessResponse: + $ref: '#/components/schemas/FinetuneResponseTruncated' + FinetuneListEvents: type: object required: - data @@ -7216,1967 +8280,2207 @@ components: data: type: array items: - $ref: '#/components/schemas/JobInfoSuccessResponse' - - Pricing: - type: object - required: [hourly, input, output, base, finetune] - properties: - hourly: - type: number - example: 0 - input: - type: number - example: 0.3 - output: - type: number - example: 0.3 - base: - type: number - example: 0 - finetune: - type: number - example: 0 - - ToolsPart: - type: object - properties: - type: - type: string - example: 'tool_type' - function: - type: object - properties: - description: - type: string - example: 'A description of the function.' - name: - type: string - example: 'function_name' - parameters: - type: object - additionalProperties: true - description: 'A map of parameter names to their values.' - ToolChoice: - type: object - required: [id, type, function, index] - properties: - # TODO: is this the right place for index? - index: - type: number - id: - type: string - type: - type: string - enum: ['function'] - function: - type: object - required: [name, arguments] - properties: - name: - type: string - example: 'function_name' - arguments: - type: string - - FileResponse: + $ref: '#/components/schemas/FineTuneEvent' + FineTuneEvent: type: object required: - - id - object - created_at - - filename - - bytes - - purpose - - FileType - - Processed - - LineCount + - message + - type + - param_count + - token_count + - total_steps + - wandb_url + - step + - checkpoint_path + - model_path + - training_offset + - hash properties: - id: - type: string object: type: string - example: 'file' + enum: [fine-tune-event] created_at: - type: integer - example: 1715021438 - filename: type: string - example: 'my_file.jsonl' - bytes: + level: + anyOf: + - $ref: '#/components/schemas/FinetuneEventLevels' + message: + type: string + type: + $ref: '#/components/schemas/FinetuneEventType' + param_count: type: integer - example: 2664 - purpose: - $ref: '#/components/schemas/FilePurpose' - Processed: - type: boolean - FileType: - $ref: '#/components/schemas/FileType' - LineCount: + token_count: type: integer - FileList: - required: - - data - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/FileResponse' - FileObject: - type: object - properties: - object: + total_steps: + type: integer + wandb_url: type: string - id: + step: + type: integer + checkpoint_path: type: string - filename: + model_path: type: string - size: + training_offset: type: integer - FilePurpose: - type: string - description: The purpose of the file - example: 'fine-tune' - enum: - - fine-tune - - eval - - eval-sample - - eval-output - - eval-summary - - batch-generated - - batch-api - FileType: - type: string - description: The type of the file - default: 'jsonl' - example: 'jsonl' - enum: - - 'csv' - - 'jsonl' - - 'parquet' - FileDeleteResponse: + hash: + type: string + FineTuneProgress: type: object + description: Progress information for a fine-tuning job + required: + - estimate_available + - seconds_remaining properties: - id: - type: string - deleted: + estimate_available: type: boolean - FinetuneResponse: + description: Whether time estimate is available + seconds_remaining: + type: integer + description: Estimated time remaining in seconds for the fine-tuning job to next state + FinetuneListCheckpoints: + type: object + required: + - data + properties: + data: + type: array + items: + $ref: '#/components/schemas/FineTuneCheckpoint' + FineTuneCheckpoint: type: object required: - - id - - status + - step + - path + - created_at + - checkpoint_type properties: - id: - type: string - format: uuid - training_file: + step: + type: integer + created_at: type: string - validation_file: + path: type: string - model: + checkpoint_type: type: string - model_output_name: + + FullTrainingType: + type: object + properties: + type: type: string - model_output_path: + enum: ['Full'] + required: + - type + LoRATrainingType: + type: object + properties: + type: type: string - trainingfile_numlines: + enum: ['Lora'] + lora_r: type: integer - trainingfile_size: + lora_alpha: type: integer - created_at: + lora_dropout: + type: number + format: float + default: 0.0 + lora_trainable_modules: type: string - format: date-time - updated_at: + default: 'all-linear' + required: + - type + - lora_r + - lora_alpha + + TrainingMethodSFT: + type: object + properties: + method: type: string - format: date-time - n_epochs: - type: integer - n_checkpoints: - type: integer - n_evals: - type: integer - batch_size: + enum: ['sft'] + train_on_inputs: oneOf: - - type: integer + - type: boolean - type: string enum: - - max - default: 'max' - learning_rate: + - auto + type: boolean + default: auto + description: Whether to mask the user messages in conversational data or prompts in instruction data. + required: + - method + - train_on_inputs + TrainingMethodDPO: + type: object + properties: + method: + type: string + enum: ['dpo'] + dpo_beta: type: number - lr_scheduler: - type: object - $ref: '#/components/schemas/LRScheduler' - warmup_ratio: + format: float + default: 0.1 + rpo_alpha: type: number - max_grad_norm: + format: float + default: 0.0 + dpo_normalize_logratios_by_length: + type: boolean + default: false + dpo_reference_free: + type: boolean + default: false + simpo_gamma: type: number format: float - weight_decay: + default: 0.0 + required: + - method + + LRScheduler: + type: object + properties: + lr_scheduler_type: + type: string + enum: + - linear + - cosine + lr_scheduler_args: + oneOf: + - $ref: '#/components/schemas/LinearLRSchedulerArgs' + - $ref: '#/components/schemas/CosineLRSchedulerArgs' + required: + - lr_scheduler_type + CosineLRSchedulerArgs: + type: object + properties: + min_lr_ratio: type: number format: float - eval_steps: + default: 0.0 + description: The ratio of the final learning rate to the peak learning rate + num_cycles: + type: number + format: float + default: 0.5 + description: Number or fraction of cycles for the cosine learning rate scheduler + required: + - min_lr_ratio + - num_cycles + LinearLRSchedulerArgs: + type: object + properties: + min_lr_ratio: + type: number + format: float + default: 0.0 + description: The ratio of the final learning rate to the peak learning rate + + Autoscaling: + type: object + description: Configuration for automatic scaling of replicas based on demand. + required: + - min_replicas + - max_replicas + properties: + min_replicas: type: integer - train_on_inputs: - oneOf: - - type: boolean - - type: string - enum: - - auto - default: auto - training_method: - type: object - oneOf: - - $ref: '#/components/schemas/TrainingMethodSFT' - - $ref: '#/components/schemas/TrainingMethodDPO' - training_type: - type: object - oneOf: - - $ref: '#/components/schemas/FullTrainingType' - - $ref: '#/components/schemas/LoRATrainingType' + format: int32 + description: The minimum number of replicas to maintain, even when there is no load + examples: + - 2 + max_replicas: + type: integer + format: int32 + description: The maximum number of replicas to scale up to under load + examples: + - 5 + + HardwareSpec: + type: object + description: Detailed specifications of a hardware configuration + required: + - gpu_type + - gpu_link + - gpu_memory + - gpu_count + properties: + gpu_type: + type: string + description: The type/model of GPU + examples: + - a100-80gb + gpu_link: + type: string + description: The GPU interconnect technology + examples: + - sxm + gpu_memory: + type: number + format: float + description: Amount of GPU memory in GB + examples: + - 80 + gpu_count: + type: integer + format: int32 + description: Number of GPUs in this configuration + examples: + - 2 + + EndpointPricing: + type: object + description: Pricing details for using an endpoint + required: + - cents_per_minute + properties: + cents_per_minute: + type: number + format: float + description: Cost per minute of endpoint uptime in cents + examples: + - 5.42 + + HardwareAvailability: + type: object + description: Indicates the current availability status of a hardware configuration + required: + - status + properties: status: - $ref: '#/components/schemas/FinetuneJobStatus' - job_id: type: string - events: - type: array - items: - $ref: '#/components/schemas/FineTuneEvent' - token_count: - type: integer - param_count: - type: integer - total_price: - type: integer - epochs_completed: - type: integer - queue_depth: - type: integer - wandb_project_name: + description: The availability status of the hardware configuration + enum: + - available + - unavailable + - insufficient + + HardwareWithStatus: + type: object + description: Hardware configuration details with optional availability status + required: + - object + - id + - pricing + - specs + - updated_at + properties: + object: + type: string + enum: + - hardware + id: type: string - wandb_url: + description: Unique identifier for the hardware configuration + examples: + - 2x_nvidia_a100_80gb_sxm + pricing: + $ref: '#/components/schemas/EndpointPricing' + specs: + $ref: '#/components/schemas/HardwareSpec' + availability: + $ref: '#/components/schemas/HardwareAvailability' + updated_at: type: string - from_checkpoint: + format: date-time + description: Timestamp of when the hardware status was last updated + + CreateEndpointRequest: + type: object + required: + - model + - hardware + - autoscaling + properties: + display_name: type: string - from_hf_model: + description: A human-readable name for the endpoint + examples: + - My Llama3 70b endpoint + model: type: string - hf_model_revision: + description: The model to deploy on this endpoint + examples: + - meta-llama/Llama-3-8b-chat-hf + hardware: type: string - progress: - $ref: '#/components/schemas/FineTuneProgress' + description: The hardware configuration to use for this endpoint + examples: + - 1x_nvidia_a100_80gb_sxm + autoscaling: + $ref: '#/components/schemas/Autoscaling' + description: Configuration for automatic scaling of the endpoint + disable_prompt_cache: + type: boolean + description: Whether to disable the prompt cache for this endpoint + default: false + disable_speculative_decoding: + type: boolean + description: Whether to disable speculative decoding for this endpoint + default: false + state: + type: string + description: The desired state of the endpoint + enum: + - STARTED + - STOPPED + default: STARTED + example: STARTED + inactive_timeout: + type: integer + description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to null, omit or set to 0 to disable automatic timeout. + nullable: true + example: 60 + availability_zone: + type: string + description: Create the endpoint in a specified availability zone (e.g., us-central-4b) - FinetuneResponseTruncated: + DedicatedEndpoint: type: object - description: A truncated version of the fine-tune response, used for POST /fine-tunes, GET /fine-tunes and POST /fine-tunes/{id}/cancel endpoints + description: Details about a dedicated endpoint deployment required: + - object - id - - status + - name + - display_name + - model + - hardware + - type + - owner + - state + - autoscaling - created_at - - updated_at - example: - id: ft-01234567890123456789 - status: completed - created_at: '2023-05-17T17:35:45.123Z' - updated_at: '2023-05-17T18:46:23.456Z' - user_id: 'user_01234567890123456789' - owner_address: 'user@example.com' - total_price: 1500 - token_count: 850000 - events: [] # FineTuneTruncated object has no events - model: 'meta-llama/Llama-2-7b-hf' - model_output_name: 'mynamespace/meta-llama/Llama-2-7b-hf-32162631' - n_epochs: 3 - training_file: 'file-01234567890123456789' - wandb_project_name: 'my-finetune-project' properties: + object: + type: string + enum: + - endpoint + description: The type of object + example: endpoint id: type: string - description: Unique identifier for the fine-tune job - status: - $ref: '#/components/schemas/FinetuneJobStatus' - created_at: + description: Unique identifier for the endpoint + example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7 + name: type: string - format: date-time - description: Creation timestamp of the fine-tune job - updated_at: + description: System name for the endpoint + example: devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1 + display_name: type: string - format: date-time - description: Last update timestamp of the fine-tune job - user_id: + description: Human-readable name for the endpoint + example: My Llama3 70b endpoint + model: type: string - description: Identifier for the user who created the job - owner_address: + description: The model deployed on this endpoint + example: meta-llama/Llama-3-8b-chat-hf + hardware: type: string - description: Owner address information - total_price: - type: integer - description: Total price for the fine-tuning job - token_count: - type: integer - description: Count of tokens processed - events: - type: array - items: - $ref: '#/components/schemas/FineTuneEvent' - description: Events related to this fine-tune job - # FineTuneUserParams fields - training_file: + description: The hardware configuration used for this endpoint + example: 1x_nvidia_a100_80gb_sxm + type: type: string - description: File-ID of the training file - validation_file: + enum: + - dedicated + description: The type of endpoint + example: dedicated + owner: type: string - description: File-ID of the validation file - model: + description: The owner of this endpoint + example: devuser + state: type: string - description: Base model used for fine-tuning - model_output_name: + enum: + - PENDING + - STARTING + - STARTED + - STOPPING + - STOPPED + - ERROR + description: Current state of the endpoint + example: STARTED + autoscaling: + $ref: '#/components/schemas/Autoscaling' + description: Configuration for automatic scaling of the endpoint + created_at: type: string - suffix: + format: date-time + description: Timestamp when the endpoint was created + example: 2025-02-04T10:43:55.405Z + + ListEndpoint: + type: object + description: Details about an endpoint when listed via the list endpoint + required: + - id + - object + - name + - model + - type + - owner + - state + - created_at + properties: + object: type: string - description: Suffix added to the fine-tuned model name - n_epochs: - type: integer - description: Number of training epochs - n_evals: - type: integer - description: Number of evaluations during training - n_checkpoints: - type: integer - description: Number of checkpoints saved during training - batch_size: - type: integer - description: Batch size used for training - training_type: - oneOf: - - $ref: '#/components/schemas/FullTrainingType' - - $ref: '#/components/schemas/LoRATrainingType' - description: Type of training used (full or LoRA) - training_method: - oneOf: - - $ref: '#/components/schemas/TrainingMethodSFT' - - $ref: '#/components/schemas/TrainingMethodDPO' - description: Method of training used - learning_rate: - type: number - format: float - description: Learning rate used for training - lr_scheduler: - $ref: '#/components/schemas/LRScheduler' - description: Learning rate scheduler configuration - warmup_ratio: - type: number - format: float - description: Ratio of warmup steps - max_grad_norm: - type: number - format: float - description: Maximum gradient norm for clipping - weight_decay: - type: number - format: float - description: Weight decay value used - wandb_project_name: + enum: + - endpoint + description: The type of object + example: endpoint + id: type: string - description: Weights & Biases project name - wandb_name: + description: Unique identifier for the endpoint + example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7 + name: type: string - description: Weights & Biases run name - from_checkpoint: + description: System name for the endpoint + example: allenai/OLMo-7B + model: type: string - description: Checkpoint used to continue training - from_hf_model: + description: The model deployed on this endpoint + example: allenai/OLMo-7B + type: type: string - description: Hugging Face Hub repo to start training from - hf_model_revision: + enum: + - serverless + - dedicated + description: The type of endpoint + example: serverless + owner: type: string - description: The revision of the Hugging Face Hub model to continue training from - progress: - $ref: '#/components/schemas/FineTuneProgress' - description: Progress information for the fine-tuning job - FinetuneDeleteResponse: - type: object + description: The owner of this endpoint + example: together + state: + type: string + enum: + - PENDING + - STARTING + - STARTED + - STOPPING + - STOPPED + - ERROR + description: Current state of the endpoint + example: STARTED + created_at: + type: string + format: date-time + description: Timestamp when the endpoint was created + example: 2024-02-28T21:34:35.444Z + + DisplayorExecuteOutput: properties: - message: + data: + properties: + application/geo+json: + type: object + application/javascript: + type: string + application/json: + type: object + application/pdf: + format: byte + type: string + application/vnd.vega.v5+json: + type: object + application/vnd.vegalite.v4+json: + type: object + image/gif: + format: byte + type: string + image/jpeg: + format: byte + type: string + image/png: + format: byte + type: string + image/svg+xml: + type: string + text/html: + type: string + text/latex: + type: string + text/markdown: + type: string + text/plain: + type: string + type: object + type: + enum: + - display_data + - execute_result type: string - description: Message indicating the result of the deletion - FinetuneJobStatus: - type: string - enum: - - pending - - queued - - running - - compressing - - uploading - - cancel_requested - - cancelled - - error - - completed + required: + - type + - data + title: DisplayorExecuteOutput + + Error: + oneOf: + - type: string + - additionalProperties: true + type: object + title: Error + + ErrorOutput: + title: ErrorOutput + description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully. + properties: + data: + type: string + type: + enum: + - error + type: string + required: + - type + - data + + ExecuteRequest: + title: ExecuteRequest + required: + - language + - code + properties: + code: + description: 'Code snippet to execute.' + example: "print('Hello, world!')" + type: string + files: + description: Files to upload to the session. If present, files will be uploaded before executing the given code. + items: + properties: + content: + type: string + encoding: + description: Encoding of the file content. Use `string` for text files such as code, and `base64` for binary files, such as images. + enum: + - string + - base64 + type: string + name: + type: string + required: + - name + - encoding + - content + type: object + type: array + language: + default: python + description: Programming language for the code to execute. Currently only supports Python, but more will be added. + enum: + - python + session_id: + description: Identifier of the current session. Used to make follow-up calls. Requests will return an error if the session does not belong to the caller or has expired. + example: ses_abcDEF123 + nullable: false + type: string + + ExecuteResponse: + title: ExecuteResponse + type: object + description: 'The result of the execution. If successful, `data` contains the result and `errors` will be null. If unsuccessful, `data` will be null and `errors` will contain the errors.' + oneOf: + - title: SuccessfulExecution + type: object + required: [data, errors] + properties: + errors: + type: 'null' + data: + type: object + nullable: false + required: [session_id, outputs] + properties: + outputs: + type: array + items: + discriminator: + propertyName: type + oneOf: + - title: StreamOutput + description: Outputs that were printed to stdout or stderr + type: object + required: [type, data] + properties: + type: + enum: + - stdout + - stderr + type: string + data: + type: string + - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully. + properties: + data: + type: string + type: + enum: + - error + type: string + required: + - type + - data + title: ErrorOutput + - properties: + data: + properties: + application/geo+json: + type: object + additionalProperties: true + application/javascript: + type: string + application/json: + type: object + additionalProperties: true + application/pdf: + format: byte + type: string + application/vnd.vega.v5+json: + type: object + additionalProperties: true + application/vnd.vegalite.v4+json: + type: object + additionalProperties: true + image/gif: + format: byte + type: string + image/jpeg: + format: byte + type: string + image/png: + format: byte + type: string + image/svg+xml: + type: string + text/html: + type: string + text/latex: + type: string + text/markdown: + type: string + text/plain: + type: string + type: object + type: + enum: + - display_data + - execute_result + type: string + required: + - type + - data + title: DisplayorExecuteOutput + title: InterpreterOutput + session_id: + type: string + description: Identifier of the current session. Used to make follow-up calls. + example: ses_abcDEF123 + nullable: false + status: + type: string + enum: + - success + description: Status of the execution. Currently only supports success. + - title: FailedExecution + type: object + required: [data, errors] + properties: + data: + type: 'null' + errors: + type: array + items: + title: Error + oneOf: + - type: string + - type: object + additionalProperties: true - FinetuneEventLevels: - type: string - enum: - - null - - info - - warning - - error - - legacy_info - - legacy_iwarning - - legacy_ierror - FinetuneEventType: - type: string - enum: - - job_pending - - job_start - - job_stopped - - model_downloading - - model_download_complete - - training_data_downloading - - training_data_download_complete - - validation_data_downloading - - validation_data_download_complete - - wandb_init - - training_start - - checkpoint_save - - billing_limit - - epoch_complete - - training_complete - - model_compressing - - model_compression_complete - - model_uploading - - model_upload_complete - - job_complete - - job_error - - cancel_requested - - job_restarted - - refund - - warning + InterpreterOutput: + discriminator: + propertyName: type + oneOf: + - description: Outputs that were printed to stdout or stderr + properties: + data: + type: string + type: + enum: + - stdout + - stderr + type: string + required: + - type + - data + title: StreamOutput + - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully. + properties: + data: + type: string + type: + enum: + - error + type: string + required: + - type + - data + title: ErrorOutput + - properties: + data: + properties: + application/geo+json: + type: object + application/javascript: + type: string + application/json: + type: object + application/pdf: + format: byte + type: string + application/vnd.vega.v5+json: + type: object + application/vnd.vegalite.v4+json: + type: object + image/gif: + format: byte + type: string + image/jpeg: + format: byte + type: string + image/png: + format: byte + type: string + image/svg+xml: + type: string + text/html: + type: string + text/latex: + type: string + text/markdown: + type: string + text/plain: + type: string + type: object + type: + enum: + - display_data + - execute_result + type: string + required: + - type + - data + title: DisplayorExecuteOutput + title: InterpreterOutput - FinetuneTruncatedList: - type: object - required: - - data + Response: properties: - data: - type: array + errors: items: - $ref: '#/components/schemas/FinetuneResponseTruncated' - FinetuneListEvents: - type: object - required: - - data - properties: - data: + oneOf: + - type: string + - additionalProperties: true + type: object + title: Error type: array - items: - $ref: '#/components/schemas/FineTuneEvent' - FineTuneEvent: - type: object - required: - - object - - created_at - - message - - type - - param_count - - token_count - - total_steps - - wandb_url - - step - - checkpoint_path - - model_path - - training_offset - - hash - properties: - object: - type: string - enum: [fine-tune-event] - created_at: - type: string - level: - anyOf: - - $ref: '#/components/schemas/FinetuneEventLevels' - message: - type: string - type: - $ref: '#/components/schemas/FinetuneEventType' - param_count: - type: integer - token_count: - type: integer - total_steps: - type: integer - wandb_url: - type: string - step: - type: integer - checkpoint_path: - type: string - model_path: - type: string - training_offset: - type: integer - hash: - type: string - FineTuneProgress: + title: Response type: object - description: Progress information for a fine-tuning job - required: - - estimate_available - - seconds_remaining - properties: - estimate_available: - type: boolean - description: Whether time estimate is available - seconds_remaining: - type: integer - description: Estimated time remaining in seconds for the fine-tuning job to next state - FinetuneListCheckpoints: + + SessionListResponse: + allOf: + - properties: + errors: + items: + oneOf: + - type: string + - additionalProperties: true + type: object + title: Error + type: array + title: Response + type: object + - properties: + data: + properties: + sessions: + items: + properties: + execute_count: + type: integer + expires_at: + format: date-time + type: string + id: + description: Session Identifier. Used to make follow-up calls. + example: ses_abcDEF123 + type: string + last_execute_at: + format: date-time + type: string + started_at: + format: date-time + type: string + required: + - execute_count + - expires_at + - id + - last_execute_at + - started_at + type: object + type: array + required: + - sessions + type: object + title: SessionListResponse type: object - required: - - data + + StreamOutput: + description: Outputs that were printed to stdout or stderr properties: data: - type: array - items: - $ref: '#/components/schemas/FineTuneCheckpoint' - FineTuneCheckpoint: - type: object - required: - - step - - path - - created_at - - checkpoint_type - properties: - step: - type: integer - created_at: - type: string - path: - type: string - checkpoint_type: type: string - - FullTrainingType: - type: object - properties: type: + enum: + - stdout + - stderr type: string - enum: ['Full'] required: - type - LoRATrainingType: + - data + title: StreamOutput + + CreateBatchRequest: type: object + required: [endpoint, input_file_id] properties: - type: + endpoint: type: string - enum: ['Lora'] - lora_r: - type: integer - lora_alpha: + description: The endpoint to use for batch processing + example: '/v1/chat/completions' + input_file_id: + type: string + description: ID of the uploaded input file containing batch requests + example: 'file-abc123def456ghi789' + completion_window: + type: string + description: Time window for batch completion (optional) + example: '24h' + priority: type: integer - lora_dropout: - type: number - format: float - default: 0.0 - lora_trainable_modules: + description: Priority for batch processing (optional) + example: 1 + model_id: type: string - default: 'all-linear' - required: - - type - - lora_r - - lora_alpha - - TrainingMethodSFT: + description: 'Model to use for processing batch requests' + example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' + BatchErrorResponse: type: object properties: - method: + error: type: string - enum: ['sft'] - train_on_inputs: - oneOf: - - type: boolean - - type: string - enum: - - auto - type: boolean - default: auto - description: Whether to mask the user messages in conversational data or prompts in instruction data. - required: - - method - - train_on_inputs - TrainingMethodDPO: + BatchJobWithWarning: type: object properties: - method: + job: + $ref: '#/components/schemas/BatchJob' + warning: type: string - enum: ['dpo'] - dpo_beta: - type: number - format: float - default: 0.1 - rpo_alpha: - type: number - format: float - default: 0.0 - dpo_normalize_logratios_by_length: - type: boolean - default: false - dpo_reference_free: - type: boolean - default: false - simpo_gamma: - type: number - format: float - default: 0.0 - required: - - method - - LRScheduler: + BatchJob: type: object properties: - lr_scheduler_type: + id: type: string - enum: - - linear - - cosine - lr_scheduler_args: - oneOf: - - $ref: '#/components/schemas/LinearLRSchedulerArgs' - - $ref: '#/components/schemas/CosineLRSchedulerArgs' - required: - - lr_scheduler_type - CosineLRSchedulerArgs: - type: object - properties: - min_lr_ratio: - type: number - format: float - default: 0.0 - description: The ratio of the final learning rate to the peak learning rate - num_cycles: - type: number - format: float - default: 0.5 - description: Number or fraction of cycles for the cosine learning rate scheduler - required: - - min_lr_ratio - - num_cycles - LinearLRSchedulerArgs: - type: object - properties: - min_lr_ratio: - type: number - format: float - default: 0.0 - description: The ratio of the final learning rate to the peak learning rate - - Autoscaling: - type: object - description: Configuration for automatic scaling of replicas based on demand. - required: - - min_replicas - - max_replicas - properties: - min_replicas: - type: integer - format: int32 - description: The minimum number of replicas to maintain, even when there is no load - examples: - - 2 - max_replicas: + format: uuid + example: '01234567-8901-2345-6789-012345678901' + user_id: + type: string + example: 'user_789xyz012' + input_file_id: + type: string + example: 'file-input123abc456def' + file_size_bytes: type: integer - format: int32 - description: The maximum number of replicas to scale up to under load - examples: - - 5 - - HardwareSpec: - type: object - description: Detailed specifications of a hardware configuration - required: - - gpu_type - - gpu_link - - gpu_memory - - gpu_count - properties: - gpu_type: + format: int64 + example: 1048576 + description: 'Size of input file in bytes' + status: + $ref: '#/components/schemas/BatchJobStatus' + job_deadline: type: string - description: The type/model of GPU - examples: - - a100-80gb - gpu_link: + format: date-time + example: '2024-01-15T15:30:00Z' + created_at: type: string - description: The GPU interconnect technology - examples: - - sxm - gpu_memory: + format: date-time + example: '2024-01-15T14:30:00Z' + endpoint: + type: string + example: '/v1/chat/completions' + progress: type: number - format: float - description: Amount of GPU memory in GB - examples: - - 80 - gpu_count: - type: integer - format: int32 - description: Number of GPUs in this configuration - examples: - - 2 + format: float64 + example: 75.0 + description: 'Completion progress (0.0 to 100)' + model_id: + type: string + example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' + description: 'Model used for processing requests' + output_file_id: + type: string + example: 'file-output789xyz012ghi' + error_file_id: + type: string + example: 'file-errors456def789jkl' + error: + type: string + completed_at: + type: string + format: date-time + example: '2024-01-15T15:45:30Z' + BatchJobStatus: + type: string + enum: + - VALIDATING + - IN_PROGRESS + - COMPLETED + - FAILED + - EXPIRED + - CANCELLED + example: 'IN_PROGRESS' + description: 'Current status of the batch job' - EndpointPricing: + EvaluationTypedRequest: type: object - description: Pricing details for using an endpoint required: - - cents_per_minute + - type + - parameters properties: - cents_per_minute: - type: number - format: float - description: Cost per minute of endpoint uptime in cents - examples: - - 5.42 + type: + type: string + enum: [classify, score, compare] + description: The type of evaluation to perform + example: 'classify' + parameters: + oneOf: + - $ref: '#/components/schemas/EvaluationClassifyParameters' + - $ref: '#/components/schemas/EvaluationScoreParameters' + - $ref: '#/components/schemas/EvaluationCompareParameters' + description: Type-specific parameters for the evaluation - HardwareAvailability: + EvaluationClassifyParameters: type: object - description: Indicates the current availability status of a hardware configuration required: - - status + - judge + - labels + - pass_labels + - input_data_file_path properties: - status: + judge: + $ref: '#/components/schemas/EvaluationJudgeModelConfig' + labels: + type: array + items: + type: string + minItems: 2 + description: List of possible classification labels + example: ['yes', 'no'] + pass_labels: + type: array + items: + type: string + minItems: 1 + description: List of labels that are considered passing + example: ['yes'] + model_to_evaluate: + $ref: '#/components/schemas/EvaluationModelOrString' + input_data_file_path: type: string - description: The availability status of the hardware configuration - enum: - - available - - unavailable - - insufficient + description: Data file ID + example: 'file-1234-aefd' - HardwareWithStatus: + EvaluationScoreParameters: type: object - description: Hardware configuration details with optional availability status required: - - object - - id - - pricing - - specs - - updated_at + - judge + - min_score + - max_score + - pass_threshold + - input_data_file_path properties: - object: - type: string - enum: - - hardware - id: - type: string - description: Unique identifier for the hardware configuration - examples: - - 2x_nvidia_a100_80gb_sxm - pricing: - $ref: '#/components/schemas/EndpointPricing' - specs: - $ref: '#/components/schemas/HardwareSpec' - availability: - $ref: '#/components/schemas/HardwareAvailability' - updated_at: + judge: + $ref: '#/components/schemas/EvaluationJudgeModelConfig' + min_score: + type: number + format: float + example: 0.0 + description: Minimum possible score + max_score: + type: number + format: float + example: 10.0 + description: Maximum possible score + pass_threshold: + type: number + format: float + example: 7.0 + description: Score threshold for passing + model_to_evaluate: + $ref: '#/components/schemas/EvaluationModelOrString' + input_data_file_path: type: string - format: date-time - description: Timestamp of when the hardware status was last updated + example: 'file-01234567890123456789' + description: Data file ID - CreateEndpointRequest: + EvaluationCompareParameters: type: object required: - - model - - hardware - - autoscaling + - judge + - input_data_file_path properties: - display_name: - type: string - description: A human-readable name for the endpoint - examples: - - My Llama3 70b endpoint - model: - type: string - description: The model to deploy on this endpoint - examples: - - meta-llama/Llama-3-8b-chat-hf - hardware: - type: string - description: The hardware configuration to use for this endpoint - examples: - - 1x_nvidia_a100_80gb_sxm - autoscaling: - $ref: '#/components/schemas/Autoscaling' - description: Configuration for automatic scaling of the endpoint - disable_prompt_cache: - type: boolean - description: Whether to disable the prompt cache for this endpoint - default: false - disable_speculative_decoding: - type: boolean - description: Whether to disable speculative decoding for this endpoint - default: false - state: - type: string - description: The desired state of the endpoint - enum: - - STARTED - - STOPPED - default: STARTED - example: STARTED - inactive_timeout: - type: integer - description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to null, omit or set to 0 to disable automatic timeout. - nullable: true - example: 60 - availability_zone: + judge: + $ref: '#/components/schemas/EvaluationJudgeModelConfig' + model_a: + $ref: '#/components/schemas/EvaluationModelOrString' + model_b: + $ref: '#/components/schemas/EvaluationModelOrString' + input_data_file_path: type: string - description: Create the endpoint in a specified availability zone (e.g., us-central-4b) + description: Data file name - DedicatedEndpoint: + EvaluationJudgeModelConfig: type: object - description: Details about a dedicated endpoint deployment required: - - object - - id - - name - - display_name - model - - hardware - - type - - owner - - state - - autoscaling - - created_at + - system_template + - model_source properties: - object: - type: string - enum: - - endpoint - description: The type of object - example: endpoint - id: - type: string - description: Unique identifier for the endpoint - example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7 - name: - type: string - description: System name for the endpoint - example: devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1 - display_name: - type: string - description: Human-readable name for the endpoint - example: My Llama3 70b endpoint model: type: string - description: The model deployed on this endpoint - example: meta-llama/Llama-3-8b-chat-hf - hardware: - type: string - description: The hardware configuration used for this endpoint - example: 1x_nvidia_a100_80gb_sxm - type: - type: string - enum: - - dedicated - description: The type of endpoint - example: dedicated - owner: + description: Name of the judge model + example: 'meta-llama/Llama-3-70B-Instruct-Turbo' + system_template: type: string - description: The owner of this endpoint - example: devuser - state: + description: System prompt template for the judge + example: 'Imagine you are a helpful assistant' + model_source: type: string - enum: - - PENDING - - STARTING - - STARTED - - STOPPING - - STOPPED - - ERROR - description: Current state of the endpoint - example: STARTED - autoscaling: - $ref: '#/components/schemas/Autoscaling' - description: Configuration for automatic scaling of the endpoint - created_at: + description: "Source of the judge model." + enum: [serverless, dedicated, external] + external_api_token: type: string - format: date-time - description: Timestamp when the endpoint was created - example: 2025-02-04T10:43:55.405Z + description: "Bearer/API token for external judge models." + external_base_url: + type: string + description: "Base URL for external judge models. Must be OpenAI-compatible base URL." - ListEndpoint: + EvaluationModelOrString: + oneOf: + - type: string + description: Field name in the input data + - $ref: '#/components/schemas/EvaluationModelRequest' + + EvaluationModelRequest: type: object - description: Details about an endpoint when listed via the list endpoint required: - - id - - object - - name - model - - type - - owner - - state - - created_at + - max_tokens + - temperature + - system_template + - input_template + - model_source properties: - object: + model: type: string - enum: - - endpoint - description: The type of object - example: endpoint - id: + description: Name of the model to evaluate + example: 'meta-llama/Llama-3-70B-Instruct-Turbo' + max_tokens: + type: integer + minimum: 1 + description: Maximum number of tokens to generate + example: 512 + temperature: + type: number + format: float + minimum: 0 + maximum: 2 + description: Sampling temperature + example: 0.7 + system_template: type: string - description: Unique identifier for the endpoint - example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7 - name: + description: System prompt template + example: 'Imagine you are helpful assistant' + input_template: type: string - description: System name for the endpoint - example: allenai/OLMo-7B - model: + description: Input prompt template + example: 'Please classify {{prompt}} based on the labels below' + model_source: type: string - description: The model deployed on this endpoint - example: allenai/OLMo-7B - type: + description: "Source of the model." + enum: [serverless, dedicated, external] + external_api_token: type: string - enum: - - serverless - - dedicated - description: The type of endpoint - example: serverless - owner: + description: "Bearer/API token for external models." + external_base_url: type: string - description: The owner of this endpoint - example: together - state: + description: "Base URL for external models. Must be OpenAI-compatible base URL" + + EvaluationResponse: + type: object + properties: + workflow_id: type: string - enum: - - PENDING - - STARTING - - STARTED - - STOPPING - - STOPPED - - ERROR - description: Current state of the endpoint - example: STARTED - created_at: + description: The ID of the created evaluation job + example: 'eval-1234-1244513' + status: type: string - format: date-time - description: Timestamp when the endpoint was created - example: 2024-02-28T21:34:35.444Z + enum: [pending] + description: Initial status of the job - DisplayorExecuteOutput: + EvaluationJob: + type: object properties: - data: - properties: - application/geo+json: - type: object - application/javascript: - type: string - application/json: - type: object - application/pdf: - format: byte - type: string - application/vnd.vega.v5+json: - type: object - application/vnd.vegalite.v4+json: - type: object - image/gif: - format: byte - type: string - image/jpeg: - format: byte - type: string - image/png: - format: byte - type: string - image/svg+xml: - type: string - text/html: - type: string - text/latex: - type: string - text/markdown: - type: string - text/plain: - type: string - type: object + workflow_id: + type: string + description: The evaluation job ID + example: 'eval-1234aedf' type: - enum: - - display_data - - execute_result type: string - required: - - type - - data - title: DisplayorExecuteOutput - - Error: - oneOf: - - type: string - - additionalProperties: true + enum: [classify, score, compare] + description: The type of evaluation + example: classify + owner_id: + type: string + description: ID of the job owner (admin only) + status: + type: string + enum: [pending, queued, running, completed, error, user_error] + description: Current status of the job + example: completed + status_updates: + type: array + items: + $ref: '#/components/schemas/EvaluationJobStatusUpdate' + description: History of status updates (admin only) + parameters: type: object - title: Error - - ErrorOutput: - title: ErrorOutput - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully. - properties: - data: + description: The parameters used for this evaluation + additionalProperties: true + created_at: type: string - type: - enum: - - error + format: date-time + description: When the job was created + example: '2025-07-23T17:10:04.837888Z' + updated_at: type: string - required: - - type - - data + format: date-time + description: When the job was last updated + example: '2025-07-23T17:10:04.837888Z' + results: + oneOf: + - $ref: '#/components/schemas/EvaluationClassifyResults' + - $ref: '#/components/schemas/EvaluationScoreResults' + - $ref: '#/components/schemas/EvaluationCompareResults' + - type: object + properties: + error: + type: string + nullable: true + description: Results of the evaluation (when completed) - ExecuteRequest: - title: ExecuteRequest - required: - - language - - code + EvaluationJobStatusUpdate: + type: object properties: - code: - description: 'Code snippet to execute.' - example: "print('Hello, world!')" + status: type: string - files: - description: Files to upload to the session. If present, files will be uploaded before executing the given code. - items: - properties: - content: - type: string - encoding: - description: Encoding of the file content. Use `string` for text files such as code, and `base64` for binary files, such as images. - enum: - - string - - base64 - type: string - name: - type: string - required: - - name - - encoding - - content - type: object - type: array - language: - default: python - description: Programming language for the code to execute. Currently only supports Python, but more will be added. - enum: - - python - session_id: - description: Identifier of the current session. Used to make follow-up calls. Requests will return an error if the session does not belong to the caller or has expired. - example: ses_abcDEF123 - nullable: false + description: The status at this update + example: pending + message: + type: string + description: Additional message for this update + example: Job is pending evaluation + timestamp: + type: string + format: date-time + description: When this update occurred + example: '2025-07-23T17:10:04.837888Z' + + EvaluationClassifyResults: + type: object + properties: + generation_fail_count: + type: number + format: integer + nullable: true + description: Number of failed generations. + example: 0 + judge_fail_count: + type: number + format: integer + nullable: true + description: Number of failed judge generations + example: 0 + invalid_label_count: + type: number + format: float + nullable: true + description: Number of invalid labels + example: 0 + result_file_id: + type: string + description: Data File ID + example: file-1234-aefd + pass_percentage: + type: number + format: integer + nullable: true + description: Pecentage of pass labels. + example: 10 + label_counts: type: string + description: JSON string representing label counts + example: '{"yes": 10, "no": 0}' - ExecuteResponse: - title: ExecuteResponse + EvaluationScoreResults: type: object - description: 'The result of the execution. If successful, `data` contains the result and `errors` will be null. If unsuccessful, `data` will be null and `errors` will contain the errors.' - oneOf: - - title: SuccessfulExecution - type: object - required: [data, errors] - properties: - errors: - type: 'null' - data: - type: object - nullable: false - required: [session_id, outputs] - properties: - outputs: - type: array - items: - discriminator: - propertyName: type - oneOf: - - title: StreamOutput - description: Outputs that were printed to stdout or stderr - type: object - required: [type, data] - properties: - type: - enum: - - stdout - - stderr - type: string - data: - type: string - - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully. - properties: - data: - type: string - type: - enum: - - error - type: string - required: - - type - - data - title: ErrorOutput - - properties: - data: - properties: - application/geo+json: - type: object - additionalProperties: true - application/javascript: - type: string - application/json: - type: object - additionalProperties: true - application/pdf: - format: byte - type: string - application/vnd.vega.v5+json: - type: object - additionalProperties: true - application/vnd.vegalite.v4+json: - type: object - additionalProperties: true - image/gif: - format: byte - type: string - image/jpeg: - format: byte - type: string - image/png: - format: byte - type: string - image/svg+xml: - type: string - text/html: - type: string - text/latex: - type: string - text/markdown: - type: string - text/plain: - type: string - type: object - type: - enum: - - display_data - - execute_result - type: string - required: - - type - - data - title: DisplayorExecuteOutput - title: InterpreterOutput - session_id: - type: string - description: Identifier of the current session. Used to make follow-up calls. - example: ses_abcDEF123 - nullable: false - status: - type: string - enum: - - success - description: Status of the execution. Currently only supports success. - - title: FailedExecution + properties: + aggregated_scores: type: object - required: [data, errors] - properties: - data: - type: 'null' - errors: - type: array - items: - title: Error - oneOf: - - type: string - - type: object - additionalProperties: true - - InterpreterOutput: - discriminator: - propertyName: type - oneOf: - - description: Outputs that were printed to stdout or stderr - properties: - data: - type: string - type: - enum: - - stdout - - stderr - type: string - required: - - type - - data - title: StreamOutput - - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully. properties: - data: - type: string - type: - enum: - - error - type: string - required: - - type - - data - title: ErrorOutput - - properties: - data: - properties: - application/geo+json: - type: object - application/javascript: - type: string - application/json: - type: object - application/pdf: - format: byte - type: string - application/vnd.vega.v5+json: - type: object - application/vnd.vegalite.v4+json: - type: object - image/gif: - format: byte - type: string - image/jpeg: - format: byte - type: string - image/png: - format: byte - type: string - image/svg+xml: - type: string - text/html: - type: string - text/latex: - type: string - text/markdown: - type: string - text/plain: - type: string - type: object - type: - enum: - - display_data - - execute_result - type: string - required: - - type - - data - title: DisplayorExecuteOutput - title: InterpreterOutput + mean_score: + type: number + format: float + std_score: + type: number + format: float + pass_percentage: + type: number + format: float + generation_fail_count: + type: number + format: integer + nullable: true + description: Number of failed generations. + example: 0 + judge_fail_count: + type: number + format: integer + nullable: true + description: Number of failed judge generations + example: 0 + invalid_score_count: + type: number + format: integer + description: number of invalid scores generated from model + failed_samples: + type: number + format: integer + description: number of failed samples generated from model + result_file_id: + type: string + description: Data File ID + example: file-1234-aefd - Response: + EvaluationCompareResults: + type: object properties: - errors: + num_samples: + type: integer + description: Total number of samples compared + A_wins: + type: integer + description: Number of times model A won + B_wins: + type: integer + description: Number of times model B won + Ties: + type: integer + description: Number of ties + generation_fail_count: + type: number + format: integer + nullable: true + description: Number of failed generations. + example: 0 + judge_fail_count: + type: number + format: integer + nullable: true + description: Number of failed judge generations + example: 0 + result_file_id: + type: string + description: Data File ID + + AudioFileBinary: + type: string + format: binary + description: Audio file to transcribe + + AudioFileUrl: + type: string + format: uri + description: Public HTTPS URL to audio file + + CreateVideoBody: + title: Create video request + description: Parameters for creating a new video generation job. + type: object + required: + - model + properties: + model: + type: string + description: The model to be used for the video creation request. + prompt: + type: string + maxLength: 32000 + minLength: 1 + description: Text prompt that describes the video to generate. + height: + type: integer + width: + type: integer + seconds: + type: string + description: Clip duration in seconds. + fps: + type: integer + description: Frames per second. Defaults to 24. + steps: + type: integer + minimum: 10 + maximum: 50 + description: The number of denoising steps the model performs during video generation. More steps typically result in higher quality output but require longer processing time. + seed: + type: integer + description: Seed to use in initializing the video generation. Using the same seed allows deterministic video generation. If not provided a random seed is generated for each request. + guidance_scale: + type: integer + description: Controls how closely the video generation follows your prompt. Higher values make the model adhere more strictly to your text description, while lower values allow more creative freedom. guidence_scale affects both visual content and temporal consistency.Recommended range is 6.0-10.0 for most video models. Values above 12 may cause over-guidance artifacts or unnatural motion patterns. + output_format: + $ref: '#/components/schemas/VideoOutputFormat' + description: Specifies the format of the output video. Defaults to MP4. + output_quality: + type: integer + description: Compression quality. Defaults to 20. + negative_prompt: + type: string + description: Similar to prompt, but specifies what to avoid instead of what to include + frame_images: + description: Array of images to guide video generation, similar to keyframes. + example: + - [ + { + "input_image": "aac49721-1964-481a-ae78-8a4e29b91402", + "frame": 0 + }, + { + "input_image": "c00abf5f-6cdb-4642-a01d-1bfff7bc3cf7", + "frame": 48 + }, + { + "input_image": "3ad204c3-a9de-4963-8a1a-c3911e3afafe", + "frame": "last" + } + ] + type: array items: - oneOf: - - type: string - - additionalProperties: true - type: object - title: Error + $ref: '#/components/schemas/VideoFrameImageInput' + reference_images: + description: Unlike frame_images which constrain specific timeline positions, reference images guide the general appearance that should appear consistently across the video. type: array - title: Response - type: object + items: + type: string + VideoStatus: + description: Current lifecycle status of the video job. + type: string + enum: + - in_progress + - completed + - failed - SessionListResponse: - allOf: - - properties: - errors: - items: - oneOf: - - type: string - - additionalProperties: true - type: object - title: Error - type: array - title: Response - type: object - - properties: - data: - properties: - sessions: - items: - properties: - execute_count: - type: integer - expires_at: - format: date-time - type: string - id: - description: Session Identifier. Used to make follow-up calls. - example: ses_abcDEF123 - type: string - last_execute_at: - format: date-time - type: string - started_at: - format: date-time - type: string - required: - - execute_count - - expires_at - - id - - last_execute_at - - started_at - type: object - type: array - required: - - sessions - type: object - title: SessionListResponse + VideoFrameImageInput: type: object + required: ['input_image'] + properties: + input_image: + type: string + description: URL path to hosted image that is used for a frame + frame: + description: | + Optional param to specify where to insert the frame. If this is omitted, the following heuristics are applied: + - frame_images size is one, frame is first. + - If size is two, frames are first and last. + - If size is larger, frames are first, last and evenly spaced between. + anyOf: + - type: number + - type: string + enum: + - first + - last - StreamOutput: - description: Outputs that were printed to stdout or stderr + VideoOutputFormat: + type: string + enum: + - MP4 + - WEBM + + VideoJob: properties: - data: + id: + type: string + description: Unique identifier for the video job. + object: + description: The object type, which is always video. type: string - type: enum: - - stdout - - stderr + - video + model: type: string - required: - - type - - data - title: StreamOutput - - CreateBatchRequest: + description: The video generation model that produced the job. + status: + $ref: '#/components/schemas/VideoStatus' + description: Current lifecycle status of the video job. + created_at: + type: number + description: Unix timestamp (seconds) for when the job was created. + completed_at: + type: number + description: Unix timestamp (seconds) for when the job completed, if finished. + size: + type: string + description: The resolution of the generated video. + seconds: + type: string + description: Duration of the generated clip in seconds. + error: + description: Error payload that explains why generation failed, if applicable. + type: object + properties: + code: + type: string + message: + type: string + required: + - message + outputs: + description: Available upon completion, the outputs provides the cost charged and the hosted url to access the video + type: object + properties: + cost: + type: integer + description: The cost of generated video charged to the owners account. + video_url: + type: string + description: URL hosting the generated video + required: + - cost + - video_url type: object - required: [endpoint, input_file_id] + required: + - id + - model + - status + - size + - seconds + - created_at + title: Video job + description: Structured information describing a generated video job. + ContainerStatus: properties: - endpoint: + finishedAt: + description: FinishedAt is the timestamp when the container finished execution + (if terminated) type: string - description: The endpoint to use for batch processing - example: '/v1/chat/completions' - input_file_id: + message: + description: Message provides a human-readable message with details about the + container's status type: string - description: ID of the uploaded input file containing batch requests - example: 'file-abc123def456ghi789' - completion_window: + name: + description: Name is the name of the container type: string - description: Time window for batch completion (optional) - example: '24h' - priority: - type: integer - description: Priority for batch processing (optional) - example: 1 - model_id: + reason: + description: Reason provides a brief machine-readable reason for the container's + current status type: string - description: 'Model to use for processing batch requests' - example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' - BatchErrorResponse: - type: object - properties: - error: + startedAt: + description: StartedAt is the timestamp when the container started execution type: string - BatchJobWithWarning: - type: object - properties: - job: - $ref: '#/components/schemas/BatchJob' - warning: + status: + description: Status is the current state of the container (e.g., "Running", + "Terminated", "Waiting") type: string - BatchJob: type: object + CreateDeploymentRequest: properties: - id: - type: string - format: uuid - example: '01234567-8901-2345-6789-012345678901' - user_id: - type: string - example: 'user_789xyz012' - input_file_id: + args: + description: Args overrides the container's CMD. Provide as an array of + arguments (e.g., ["python", "app.py"]) + items: + type: string + type: array + autoscaling: + additionalProperties: + type: string + description: 'Autoscaling configuration as key-value pairs. Example: {"metric": + "QueueBacklogPerWorker", "target": "10"} to scale based on queue + backlog' + type: object + command: + description: Command overrides the container's ENTRYPOINT. Provide as an array + (e.g., ["/bin/sh", "-c"]) + items: + type: string + type: array + cpu: + description: CPU is the number of CPU cores to allocate per container instance + (e.g., 0.1 = 100 milli cores) + minimum: 0.1 + type: number + description: + description: Description is an optional human-readable description of your + deployment type: string - example: 'file-input123abc456def' - file_size_bytes: + environment_variables: + description: EnvironmentVariables is a list of environment variables to set in + the container. Each must have a name and either a value or + value_from_secret + items: + $ref: "#/components/schemas/EnvironmentVariable" + type: array + gpu_count: + description: GPUCount is the number of GPUs to allocate per container instance. + Defaults to 0 if not specified type: integer - format: int64 - example: 1048576 - description: 'Size of input file in bytes' - status: - $ref: '#/components/schemas/BatchJobStatus' - job_deadline: + gpu_type: + description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb"). + enum: + - h100-80gb + - " a100-80gb" type: string - format: date-time - example: '2024-01-15T15:30:00Z' - created_at: + health_check_path: + description: HealthCheckPath is the HTTP path for health checks (e.g., + "/health"). If set, the platform will check this endpoint to + determine container health type: string - format: date-time - example: '2024-01-15T14:30:00Z' - endpoint: + image: + description: Image is the container image to deploy from registry.together.ai. type: string - example: '/v1/chat/completions' - progress: + max_replicas: + description: MaxReplicas is the maximum number of container instances that can + be scaled up to. If not set, will be set to MinReplicas + type: integer + memory: + description: Memory is the amount of RAM to allocate per container instance in + GiB (e.g., 0.5 = 512MiB) + minimum: 0.1 type: number - format: float64 - example: 75.0 - description: 'Completion progress (0.0 to 100)' - model_id: + min_replicas: + description: MinReplicas is the minimum number of container instances to run. + Defaults to 1 if not specified + type: integer + name: + description: Name is the unique identifier for your deployment. Must contain + only alphanumeric characters, underscores, or hyphens (1-100 + characters) + maxLength: 100 + minLength: 1 type: string - example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' - description: 'Model used for processing requests' - output_file_id: + port: + description: Port is the container port your application listens on (e.g., 8080 + for web servers). Required if your application serves traffic + type: integer + storage: + description: Storage is the amount of ephemeral disk storage to allocate per + container instance (e.g., 10 = 10GiB) + type: integer + termination_grace_period_seconds: + description: TerminationGracePeriodSeconds is the time in seconds to wait for + graceful shutdown before forcefully terminating the replica + type: integer + volumes: + description: Volumes is a list of volume mounts to attach to the container. Each + mount must reference an existing volume by name + items: + $ref: "#/components/schemas/VolumeMount" + type: array + required: + - gpu_type + - image + - name + type: object + CreateSecretRequest: + properties: + description: + description: Description is an optional human-readable description of the + secret's purpose (max 500 characters) + maxLength: 500 type: string - example: 'file-output789xyz012ghi' - error_file_id: + name: + description: Name is the unique identifier for the secret. Can contain + alphanumeric characters, underscores, hyphens, forward slashes, and + periods (1-100 characters) + maxLength: 100 + minLength: 1 type: string - example: 'file-errors456def789jkl' - error: + project_id: + description: ProjectID is ignored - the project is automatically determined from + your authentication type: string - completed_at: + value: + description: Value is the sensitive data to store securely (e.g., API keys, + passwords, tokens). This value will be encrypted at rest + minLength: 1 type: string - format: date-time - example: '2024-01-15T15:45:30Z' - BatchJobStatus: - type: string - enum: - - VALIDATING - - IN_PROGRESS - - COMPLETED - - FAILED - - EXPIRED - - CANCELLED - example: 'IN_PROGRESS' - description: 'Current status of the batch job' - - EvaluationTypedRequest: + required: + - name + - value type: object + CreateVolumeRequest: + properties: + content: + allOf: + - $ref: "#/components/schemas/VolumeContent" + description: Content specifies the content configuration for this volume + name: + description: Name is the unique identifier for the volume within the project + type: string + type: + allOf: + - $ref: "#/components/schemas/VolumeType" + description: Type is the volume type (currently only "readOnly" is supported) required: + - content + - name - type - - parameters + type: object + DeploymentListResponse: properties: - type: + data: + description: Data is the array of deployment items + items: + $ref: "#/components/schemas/DeploymentResponseItem" + type: array + object: + description: Object is the type identifier for this response (always "list") type: string - enum: [classify, score, compare] - description: The type of evaluation to perform - example: 'classify' - parameters: - oneOf: - - $ref: '#/components/schemas/EvaluationClassifyParameters' - - $ref: '#/components/schemas/EvaluationScoreParameters' - - $ref: '#/components/schemas/EvaluationCompareParameters' - description: Type-specific parameters for the evaluation - - EvaluationClassifyParameters: type: object - required: - - judge - - labels - - pass_labels - - input_data_file_path + DeploymentLogs: properties: - judge: - $ref: '#/components/schemas/EvaluationJudgeModelConfig' - labels: + lines: + items: + type: string type: array + type: object + DeploymentResponseItem: + properties: + args: + description: Args are the arguments passed to the container's command items: type: string - minItems: 2 - description: List of possible classification labels - example: ['yes', 'no'] - pass_labels: type: array + autoscaling: + additionalProperties: + type: string + description: Autoscaling contains autoscaling configuration parameters for this + deployment + type: object + command: + description: Command is the entrypoint command run in the container items: type: string - minItems: 1 - description: List of labels that are considered passing - example: ['yes'] - model_to_evaluate: - $ref: '#/components/schemas/EvaluationModelOrString' - input_data_file_path: - type: string - description: Data file ID - example: 'file-1234-aefd' - - EvaluationScoreParameters: - type: object - required: - - judge - - min_score - - max_score - - pass_threshold - - input_data_file_path - properties: - judge: - $ref: '#/components/schemas/EvaluationJudgeModelConfig' - min_score: - type: number - format: float - example: 0.0 - description: Minimum possible score - max_score: + type: array + cpu: + description: CPU is the amount of CPU resource allocated to each replica in + cores (fractional value is allowed) type: number - format: float - example: 10.0 - description: Maximum possible score - pass_threshold: + created_at: + description: CreatedAt is the ISO8601 timestamp when this deployment was created + type: string + description: + description: Description provides a human-readable explanation of the + deployment's purpose or content + type: string + desired_replicas: + description: DesiredReplicas is the number of replicas that the orchestrator is + targeting + type: integer + environment_variables: + description: EnvironmentVariables is a list of environment variables set in the + container + items: + $ref: "#/components/schemas/EnvironmentVariable" + type: array + gpu_count: + description: GPUCount is the number of GPUs allocated to each replica in this + deployment + type: integer + gpu_type: + description: GPUType specifies the type of GPU requested (if any) for this + deployment + enum: + - h100-80gb + - " a100-80gb" + type: string + health_check_path: + description: HealthCheckPath is the HTTP path used for health checks of the + application + type: string + id: + description: ID is the unique identifier of the deployment + type: string + image: + description: Image specifies the container image used for this deployment + type: string + max_replicas: + description: MaxReplicas is the maximum number of replicas to run for this + deployment + type: integer + memory: + description: Memory is the amount of memory allocated to each replica in GiB + (fractional value is allowed) type: number - format: float - example: 7.0 - description: Score threshold for passing - model_to_evaluate: - $ref: '#/components/schemas/EvaluationModelOrString' - input_data_file_path: + min_replicas: + description: MinReplicas is the minimum number of replicas to run for this + deployment + type: integer + name: + description: Name is the name of the deployment + type: string + object: + description: Object is the type identifier for this response (always "deployment") + type: string + port: + description: Port is the container port that the deployment exposes + type: integer + ready_replicas: + description: ReadyReplicas is the current number of replicas that are in the + Ready state + type: integer + replica_events: + additionalProperties: + $ref: "#/components/schemas/ReplicaEvent" + description: ReplicaEvents is a mapping of replica names or IDs to their status + events + type: object + status: + allOf: + - $ref: "#/components/schemas/DeploymentStatus" + description: Status represents the overall status of the deployment (e.g., + Updating, Scaling, Ready, Failed) + enum: + - Updating + - Scaling + - Ready + - Failed + storage: + description: Storage is the amount of storage (in MB or units as defined by the + platform) allocated to each replica + type: integer + updated_at: + description: UpdatedAt is the ISO8601 timestamp when this deployment was last + updated type: string - example: 'file-01234567890123456789' - description: Data file ID - - EvaluationCompareParameters: + volumes: + description: Volumes is a list of volume mounts for this deployment + items: + $ref: "#/components/schemas/VolumeMount" + type: array type: object - required: - - judge - - input_data_file_path + DeploymentStatus: + enum: + - Updating + - Scaling + - Ready + - Failed + type: string + x-enum-varnames: + - DeploymentStatusUpdating + - DeploymentStatusScaling + - DeploymentStatusReady + - DeploymentStatusFailed + EnvironmentVariable: properties: - judge: - $ref: '#/components/schemas/EvaluationJudgeModelConfig' - model_a: - $ref: '#/components/schemas/EvaluationModelOrString' - model_b: - $ref: '#/components/schemas/EvaluationModelOrString' - input_data_file_path: + name: + description: Name is the environment variable name (e.g., "DATABASE_URL"). Must + start with a letter or underscore, followed by letters, numbers, or + underscores + type: string + value: + description: Value is the plain text value for the environment variable. Use + this for non-sensitive values. Either Value or ValueFromSecret must + be set, but not both + type: string + value_from_secret: + description: ValueFromSecret references a secret by name or ID to use as the + value. Use this for sensitive values like API keys or passwords. + Either Value or ValueFromSecret must be set, but not both type: string - description: Data file name - - EvaluationJudgeModelConfig: - type: object required: - - model - - system_template - - model_source + - name + type: object + ImageListResponse: properties: - model: - type: string - description: Name of the judge model - example: 'meta-llama/Llama-3-70B-Instruct-Turbo' - system_template: + data: + description: Data is the array of image items + items: + $ref: "#/components/schemas/ImageResponseItem" + type: array + object: + description: Object is the type identifier for this response (always "list") type: string - description: System prompt template for the judge - example: 'Imagine you are a helpful assistant' - model_source: + type: object + ImageResponseItem: + properties: + object: + description: Object is the type identifier for this response (always "image") type: string - description: "Source of the judge model." - enum: [serverless, dedicated, external] - external_api_token: + tag: + description: Tag is the image tag/version identifier (e.g., "latest", "v1.0.0") type: string - description: "Bearer/API token for external judge models." - external_base_url: + url: + description: URL is the full registry URL for this image including tag (e.g., + "registry.together.ai/project-id/repository:tag") type: string - description: "Base URL for external judge models. Must be OpenAI-compatible base URL." - - EvaluationModelOrString: - oneOf: - - type: string - description: Field name in the input data - - $ref: '#/components/schemas/EvaluationModelRequest' - - EvaluationModelRequest: type: object - required: - - model - - max_tokens - - temperature - - system_template - - input_template - - model_source + KubernetesEvent: properties: - model: + action: + description: Action is the action taken or reported by this event type: string - description: Name of the model to evaluate - example: 'meta-llama/Llama-3-70B-Instruct-Turbo' - max_tokens: + count: + description: Count is the number of times this event has occurred type: integer - minimum: 1 - description: Maximum number of tokens to generate - example: 512 - temperature: - type: number - format: float - minimum: 0 - maximum: 2 - description: Sampling temperature - example: 0.7 - system_template: - type: string - description: System prompt template - example: 'Imagine you are helpful assistant' - input_template: + first_seen: + description: FirstSeen is the timestamp when this event was first observed type: string - description: Input prompt template - example: 'Please classify {{prompt}} based on the labels below' - model_source: + last_seen: + description: LastSeen is the timestamp when this event was last observed type: string - description: "Source of the model." - enum: [serverless, dedicated, external] - external_api_token: + message: + description: Message is a human-readable description of the event type: string - description: "Bearer/API token for external models." - external_base_url: + reason: + description: Reason is a brief machine-readable reason for this event (e.g., + "Pulling", "Started", "Failed") type: string - description: "Base URL for external models. Must be OpenAI-compatible base URL" - - EvaluationResponse: type: object + ListSecretsResponse: properties: - workflow_id: + data: + description: Data is the array of secret items + items: + $ref: "#/components/schemas/SecretResponseItem" + type: array + object: + description: Object is the type identifier for this response (always "list") type: string - description: The ID of the created evaluation job - example: 'eval-1234-1244513' - status: + type: object + ListVolumesResponse: + properties: + data: + description: Data is the array of volume items + items: + $ref: "#/components/schemas/VolumeResponseItem" + type: array + object: + description: Object is the type identifier for this response (always "list") type: string - enum: [pending] - description: Initial status of the job - - EvaluationJob: type: object + ReplicaEvent: properties: - workflow_id: + container_status: + allOf: + - $ref: "#/components/schemas/ContainerStatus" + description: ContainerStatus provides detailed status information about the + container within this replica + events: + description: Events is a list of Kubernetes events related to this replica for + troubleshooting + items: + $ref: "#/components/schemas/KubernetesEvent" + type: array + replica_completed_at: + description: ReplicaCompletedAt is the timestamp when the replica finished + execution type: string - description: The evaluation job ID - example: 'eval-1234aedf' - type: + replica_marked_for_termination_at: + description: ReplicaMarkedForTerminationAt is the timestamp when the replica was + marked for termination type: string - enum: [classify, score, compare] - description: The type of evaluation - example: classify - owner_id: + replica_ready_since: + description: ReplicaReadySince is the timestamp when the replica became ready to + serve traffic type: string - description: ID of the job owner (admin only) - status: + replica_running_since: + description: ReplicaRunningSince is the timestamp when the replica entered the + running state type: string - enum: [pending, queued, running, completed, error, user_error] - description: Current status of the job - example: completed - status_updates: - type: array - items: - $ref: '#/components/schemas/EvaluationJobStatusUpdate' - description: History of status updates (admin only) - parameters: - type: object - description: The parameters used for this evaluation - additionalProperties: true - created_at: + replica_started_at: + description: ReplicaStartedAt is the timestamp when the replica was created type: string - format: date-time - description: When the job was created - example: '2025-07-23T17:10:04.837888Z' - updated_at: + replica_status: + description: ReplicaStatus is the current status of the replica (e.g., + "Running", "Pending", "Failed") type: string - format: date-time - description: When the job was last updated - example: '2025-07-23T17:10:04.837888Z' - results: - oneOf: - - $ref: '#/components/schemas/EvaluationClassifyResults' - - $ref: '#/components/schemas/EvaluationScoreResults' - - $ref: '#/components/schemas/EvaluationCompareResults' - - type: object - properties: - error: - type: string - nullable: true - description: Results of the evaluation (when completed) - - EvaluationJobStatusUpdate: - type: object - properties: - status: + replica_status_message: + description: ReplicaStatusMessage provides a human-readable message explaining + the replica's status type: string - description: The status at this update - example: pending - message: + replica_status_reason: + description: ReplicaStatusReason provides a brief machine-readable reason for + the replica's status type: string - description: Additional message for this update - example: Job is pending evaluation - timestamp: + scheduled_on_cluster: + description: ScheduledOnCluster identifies which cluster this replica is + scheduled on type: string - format: date-time - description: When this update occurred - example: '2025-07-23T17:10:04.837888Z' - - EvaluationClassifyResults: type: object + RepositoryListResponse: properties: - generation_fail_count: - type: number - format: integer - nullable: true - description: Number of failed generations. - example: 0 - judge_fail_count: - type: number - format: integer - nullable: true - description: Number of failed judge generations - example: 0 - invalid_label_count: - type: number - format: float - nullable: true - description: Number of invalid labels - example: 0 - result_file_id: - type: string - description: Data File ID - example: file-1234-aefd - pass_percentage: - type: number - format: integer - nullable: true - description: Pecentage of pass labels. - example: 10 - label_counts: + data: + description: Data is the array of repository items + items: + $ref: "#/components/schemas/RepositoryResponseItem" + type: array + object: + description: Object is the type identifier for this response (always "list") type: string - description: JSON string representing label counts - example: '{"yes": 10, "no": 0}' - - EvaluationScoreResults: type: object + RepositoryResponseItem: properties: - aggregated_scores: - type: object - properties: - mean_score: - type: number - format: float - std_score: - type: number - format: float - pass_percentage: - type: number - format: float - generation_fail_count: - type: number - format: integer - nullable: true - description: Number of failed generations. - example: 0 - judge_fail_count: - type: number - format: integer - nullable: true - description: Number of failed judge generations - example: 0 - invalid_score_count: - type: number - format: integer - description: number of invalid scores generated from model - failed_samples: - type: number - format: integer - description: number of failed samples generated from model - result_file_id: + id: + description: ID is the unique identifier for this repository (repository name + with slashes replaced by "___") + type: string + object: + description: Object is the type identifier for this response (always + "image-repository") + type: string + url: + description: URL is the full registry URL for this repository (e.g., + "registry.together.ai/project-id/repository-name") type: string - description: Data File ID - example: file-1234-aefd - - EvaluationCompareResults: type: object + SecretResponseItem: properties: - num_samples: - type: integer - description: Total number of samples compared - A_wins: - type: integer - description: Number of times model A won - B_wins: - type: integer - description: Number of times model B won - Ties: - type: integer - description: Number of ties - generation_fail_count: - type: number - format: integer - nullable: true - description: Number of failed generations. - example: 0 - judge_fail_count: - type: number - format: integer - nullable: true - description: Number of failed judge generations - example: 0 - result_file_id: + created_at: + description: CreatedAt is the ISO8601 timestamp when this secret was created + type: string + created_by: + description: CreatedBy is the identifier of the user who created this secret + type: string + description: + description: Description is a human-readable description of the secret's purpose + type: string + id: + description: ID is the unique identifier for this secret + type: string + last_updated_by: + description: LastUpdatedBy is the identifier of the user who last updated this + secret + type: string + name: + description: Name is the name/key of the secret + type: string + object: + description: Object is the type identifier for this response (always "secret") + type: string + updated_at: + description: UpdatedAt is the ISO8601 timestamp when this secret was last updated type: string - description: Data File ID - - AudioFileBinary: - type: string - format: binary - description: Audio file to transcribe - - AudioFileUrl: - type: string - format: uri - description: Public HTTPS URL to audio file - - CreateVideoBody: - title: Create video request - description: Parameters for creating a new video generation job. type: object - required: - - model + UpdateDeploymentRequest: properties: - model: - type: string - description: The model to be used for the video creation request. - prompt: + args: + description: Args overrides the container's CMD. Provide as an array of + arguments (e.g., ["python", "app.py"]) + items: + type: string + type: array + autoscaling: + additionalProperties: + type: string + description: 'Autoscaling configuration as key-value pairs. Example: {"metric": + "QueueBacklogPerWorker", "target": "10"} to scale based on queue + backlog' + type: object + command: + description: Command overrides the container's ENTRYPOINT. Provide as an array + (e.g., ["/bin/sh", "-c"]) + items: + type: string + type: array + cpu: + description: CPU is the number of CPU cores to allocate per container instance + (e.g., 0.1 = 100 milli cores) + minimum: 0.1 + type: number + description: + description: Description is an optional human-readable description of your + deployment type: string - maxLength: 32000 - minLength: 1 - description: Text prompt that describes the video to generate. - height: - type: integer - width: + environment_variables: + description: EnvironmentVariables is a list of environment variables to set in + the container. This will replace all existing environment variables + items: + $ref: "#/components/schemas/EnvironmentVariable" + type: array + gpu_count: + description: GPUCount is the number of GPUs to allocate per container instance type: integer - seconds: + gpu_type: + description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb") + enum: + - h100-80gb + - " a100-80gb" type: string - description: Clip duration in seconds. - fps: + health_check_path: + description: HealthCheckPath is the HTTP path for health checks (e.g., + "/health"). Set to empty string to disable health checks + type: string + image: + description: Image is the container image to deploy from registry.together.ai. + type: string + max_replicas: + description: MaxReplicas is the maximum number of replicas that can be scaled up + to. type: integer - description: Frames per second. Defaults to 24. - steps: + memory: + description: Memory is the amount of RAM to allocate per container instance in + GiB (e.g., 0.5 = 512MiB) + minimum: 0.1 + type: number + min_replicas: + description: MinReplicas is the minimum number of replicas to run type: integer - minimum: 10 - maximum: 50 - description: The number of denoising steps the model performs during video generation. More steps typically result in higher quality output but require longer processing time. - seed: + name: + description: Name is the new unique identifier for your deployment. Must contain + only alphanumeric characters, underscores, or hyphens (1-100 + characters) + maxLength: 100 + minLength: 1 + type: string + port: + description: Port is the container port your application listens on (e.g., 8080 + for web servers) type: integer - description: Seed to use in initializing the video generation. Using the same seed allows deterministic video generation. If not provided a random seed is generated for each request. - guidance_scale: + storage: + description: Storage is the amount of ephemeral disk storage to allocate per + container instance (e.g., 10 = 10GiB) type: integer - description: Controls how closely the video generation follows your prompt. Higher values make the model adhere more strictly to your text description, while lower values allow more creative freedom. guidence_scale affects both visual content and temporal consistency.Recommended range is 6.0-10.0 for most video models. Values above 12 may cause over-guidance artifacts or unnatural motion patterns. - output_format: - $ref: '#/components/schemas/VideoOutputFormat' - description: Specifies the format of the output video. Defaults to MP4. - output_quality: + termination_grace_period_seconds: + description: TerminationGracePeriodSeconds is the time in seconds to wait for + graceful shutdown before forcefully terminating the replica type: integer - description: Compression quality. Defaults to 20. - negative_prompt: - type: string - description: Similar to prompt, but specifies what to avoid instead of what to include - frame_images: - description: Array of images to guide video generation, similar to keyframes. - example: - - [ - { - "input_image": "aac49721-1964-481a-ae78-8a4e29b91402", - "frame": 0 - }, - { - "input_image": "c00abf5f-6cdb-4642-a01d-1bfff7bc3cf7", - "frame": 48 - }, - { - "input_image": "3ad204c3-a9de-4963-8a1a-c3911e3afafe", - "frame": "last" - } - ] - type: array + volumes: + description: Volumes is a list of volume mounts to attach to the container. This + will replace all existing volumes items: - $ref: '#/components/schemas/VideoFrameImageInput' - reference_images: - description: Unlike frame_images which constrain specific timeline positions, reference images guide the general appearance that should appear consistently across the video. + $ref: "#/components/schemas/VolumeMount" type: array - items: - type: string - VideoStatus: - description: Current lifecycle status of the video job. - type: string - enum: - - in_progress - - completed - - failed - - VideoFrameImageInput: type: object - required: ['input_image'] + UpdateSecretRequest: properties: - input_image: + description: + description: Description is an optional human-readable description of the + secret's purpose (max 500 characters) + maxLength: 500 type: string - description: URL path to hosted image that is used for a frame - frame: - description: | - Optional param to specify where to insert the frame. If this is omitted, the following heuristics are applied: - - frame_images size is one, frame is first. - - If size is two, frames are first and last. - - If size is larger, frames are first, last and evenly spaced between. - anyOf: - - type: number - - type: string - enum: - - first - - last - - VideoOutputFormat: - type: string - enum: - - MP4 - - WEBM - - VideoJob: + name: + description: Name is the new unique identifier for the secret. Can contain + alphanumeric characters, underscores, hyphens, forward slashes, and + periods (1-100 characters) + maxLength: 100 + minLength: 1 + type: string + project_id: + description: ProjectID is ignored - the project is automatically determined from + your authentication + type: string + value: + description: Value is the new sensitive data to store securely. Updating this + will replace the existing secret value + minLength: 1 + type: string + type: object + UpdateVolumeRequest: + properties: + content: + allOf: + - $ref: "#/components/schemas/VolumeContent" + description: Content specifies the new content that will be preloaded to this + volume + name: + description: Name is the new unique identifier for the volume within the project + type: string + type: + allOf: + - $ref: "#/components/schemas/VolumeType" + description: Type is the new volume type (currently only "readOnly" is supported) + type: object + VolumeMount: + properties: + mount_path: + description: MountPath is the path in the container where the volume will be + mounted (e.g., "/data") + type: string + name: + description: Name is the name of the volume to mount. Must reference an existing + volume by name or ID + type: string + required: + - mount_path + - name + type: object + VolumeResponseItem: properties: + content: + allOf: + - $ref: "#/components/schemas/VolumeContent" + description: Content specifies the content that will be preloaded to this volume + created_at: + description: CreatedAt is the ISO8601 timestamp when this volume was created + type: string id: + description: ID is the unique identifier for this volume + type: string + name: + description: Name is the name of the volume type: string - description: Unique identifier for the video job. object: - description: The object type, which is always video. + description: Object is the type identifier for this response (always "volume") type: string - enum: - - video - model: + type: + allOf: + - $ref: "#/components/schemas/VolumeType" + description: Type is the volume type (e.g., "readOnly") + updated_at: + description: UpdatedAt is the ISO8601 timestamp when this volume was last updated type: string - description: The video generation model that produced the job. - status: - $ref: '#/components/schemas/VideoStatus' - description: Current lifecycle status of the video job. - created_at: - type: number - description: Unix timestamp (seconds) for when the job was created. - completed_at: - type: number - description: Unix timestamp (seconds) for when the job completed, if finished. - size: + type: object + VolumeContent: + properties: + source_prefix: + description: SourcePrefix is the file path prefix for the content to be + preloaded into the volume + example: models/ type: string - description: The resolution of the generated video. - seconds: + type: + description: Type is the content type (currently only "files" is supported which + allows preloading files uploaded via Files API into the volume) + enum: + - files + example: files type: string - description: Duration of the generated clip in seconds. - error: - description: Error payload that explains why generation failed, if applicable. - type: object - properties: - code: - type: string - message: - type: string - required: - - message - outputs: - description: Available upon completion, the outputs provides the cost charged and the hosted url to access the video - type: object - properties: - cost: - type: integer - description: The cost of generated video charged to the owners account. - video_url: - type: string - description: URL hosting the generated video - required: - - cost - - video_url type: object - required: - - id - - model - - status - - size - - seconds - - created_at - title: Video job - description: Structured information describing a generated video job. + VolumeType: + enum: + - readOnly + type: string + x-enum-varnames: + - VolumeTypeReadOnly