diff --git a/openapi.yaml b/openapi.yaml
index de97a6c..3fd8889 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -5298,6 +5298,616 @@ paths:
}
}
```
+ /deployments:
+ get:
+ description: Get a list of all deployments in your project
+ responses:
+ "200":
+ description: List of deployments
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DeploymentListResponse"
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Get the list of deployments
+ tags:
+ - Deployments
+ post:
+ description: Create a new deployment with specified configuration
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/CreateDeploymentRequest"
+ description: Deployment configuration
+ required: true
+ responses:
+ "200":
+ description: Deployment created successfully
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DeploymentResponseItem"
+ "400":
+ description: Invalid request
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Create a new deployment
+ tags:
+ - Deployments
+ /deployments/{id}:
+ delete:
+ description: Delete an existing deployment
+ parameters:
+ - description: Deployment ID or name
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Deployment deleted successfully
+ content:
+ application/json:
+ schema:
+ type: object
+ "404":
+ description: Deployment not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Delete a deployment
+ tags:
+ - Deployments
+ get:
+ description: Retrieve details of a specific deployment by its ID or name
+ parameters:
+ - description: Deployment ID or name
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Deployment details
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DeploymentResponseItem"
+ "404":
+ description: Deployment not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Get a deployment by ID or name
+ tags:
+ - Deployments
+ patch:
+ description: Update an existing deployment configuration
+ parameters:
+ - description: Deployment ID or name
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/UpdateDeploymentRequest"
+ description: Updated deployment configuration
+ required: true
+ responses:
+ "200":
+ description: Deployment updated successfully
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DeploymentResponseItem"
+ "400":
+ description: Invalid request
+ content:
+ application/json:
+ schema:
+ type: object
+ "404":
+ description: Deployment not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Update a deployment
+ tags:
+ - Deployments
+ /deployments/{id}/logs:
+ get:
+ description: Retrieve logs from a deployment, optionally filtered by replica ID.
+ Use follow=true to stream logs in real-time.
+ parameters:
+ - description: Deployment ID or name
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ - description: Replica ID to filter logs
+ in: query
+ name: replica_id
+ schema:
+ type: string
+ - description: Stream logs in real-time (ndjson format)
+ in: query
+ name: follow
+ schema:
+ type: boolean
+ responses:
+ "200":
+ description: Deployment logs
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/DeploymentLogs"
+ "404":
+ description: Deployment not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Get logs for a deployment
+ tags:
+ - Deployments
+ /image-repositories:
+ get:
+ description: Retrieve all container image repositories available in your project
+ responses:
+ "200":
+ description: List of repositories
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/RepositoryListResponse"
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Get the list of image repositories in your project
+ tags:
+ - Images
+ /image-repositories/{id}/images:
+ get:
+ description: Retrieve all container images (tags) available in a specific repository
+ parameters:
+ - description: Repository ID
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: List of images
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ImageListResponse"
+ "404":
+ description: Repository not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Get the list of images available under a repository
+ tags:
+ - Images
+ /secrets:
+ get:
+ description: Retrieve all secrets in your project
+ responses:
+ "200":
+ description: List of secrets
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ListSecretsResponse"
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Get the list of project secrets
+ tags:
+ - Secrets
+ post:
+ description: Create a new secret to store sensitive configuration values
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/CreateSecretRequest"
+ description: Secret configuration
+ required: true
+ responses:
+ "200":
+ description: Secret created successfully
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SecretResponseItem"
+ "400":
+ description: Invalid request
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Create a new secret
+ tags:
+ - Secrets
+ /secrets/{id}:
+ delete:
+ description: Delete an existing secret
+ parameters:
+ - description: Secret ID or name
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Secret deleted successfully
+ content:
+ application/json:
+ schema:
+ type: object
+ "404":
+ description: Secret not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Delete a secret
+ tags:
+ - Secrets
+ get:
+ description: Retrieve details of a specific secret by its ID or name
+ parameters:
+ - description: Secret ID or name
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Secret details
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SecretResponseItem"
+ "404":
+ description: Secret not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Get a secret by ID or name
+ tags:
+ - Secrets
+ patch:
+ description: Update an existing secret's value or metadata
+ parameters:
+ - description: Secret ID or name
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/UpdateSecretRequest"
+ description: Updated secret configuration
+ required: true
+ responses:
+ "200":
+ description: Secret updated successfully
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/SecretResponseItem"
+ "400":
+ description: Invalid request
+ content:
+ application/json:
+ schema:
+ type: object
+ "404":
+ description: Secret not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Update a secret
+ tags:
+ - Secrets
+ /storage/{filename}:
+ get:
+ description: Download a file by redirecting to a signed URL
+ parameters:
+ - description: Filename
+ in: path
+ name: filename
+ required: true
+ schema:
+ type: string
+ responses:
+ "307":
+ description: Redirect to signed download URL
+ content:
+ application/json:
+ schema:
+ type: string
+ "400":
+ description: Invalid request
+ content:
+ application/json:
+ schema:
+ additionalProperties:
+ type: string
+ type: object
+ "404":
+ description: File not found
+ content:
+ application/json:
+ schema:
+ additionalProperties:
+ type: string
+ type: object
+ "500":
+ description: Internal error
+ content:
+ application/json:
+ schema:
+ additionalProperties:
+ type: string
+ type: object
+ summary: Download a file
+ tags:
+ - files
+ /storage/volumes:
+ get:
+ description: Retrieve all volumes in your project
+ responses:
+ "200":
+ description: List of volumes
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/ListVolumesResponse"
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Get the list of project volumes
+ tags:
+ - Volumes
+ post:
+ description: Create a new volume to preload files in deployments
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/CreateVolumeRequest"
+ description: Volume configuration
+ required: true
+ responses:
+ "200":
+ description: Volume created successfully
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/VolumeResponseItem"
+ "400":
+ description: Invalid request
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Create a new volume
+ tags:
+ - Volumes
+ /storage/volumes/{id}:
+ delete:
+ description: Delete an existing volume
+ parameters:
+ - description: Volume ID or name
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Volume deleted successfully
+ content:
+ application/json:
+ schema:
+ type: object
+ "404":
+ description: Volume not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Delete a volume
+ tags:
+ - Volumes
+ get:
+ description: Retrieve details of a specific volume by its ID or name
+ parameters:
+ - description: Volume ID or name
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ responses:
+ "200":
+ description: Volume details
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/VolumeResponseItem"
+ "404":
+ description: Volume not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Get a volume by ID or name
+ tags:
+ - Volumes
+ patch:
+ description: Update an existing volume's configuration or contents
+ parameters:
+ - description: Volume ID or name
+ in: path
+ name: id
+ required: true
+ schema:
+ type: string
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/UpdateVolumeRequest"
+ description: Updated volume configuration
+ required: true
+ responses:
+ "200":
+ description: Volume updated successfully
+ content:
+ application/json:
+ schema:
+ $ref: "#/components/schemas/VolumeResponseItem"
+ "400":
+ description: Invalid request
+ content:
+ application/json:
+ schema:
+ type: object
+ "404":
+ description: Volume not found
+ content:
+ application/json:
+ schema:
+ type: object
+ "500":
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ type: object
+ summary: Update a volume
+ tags:
+ - Volumes
+
components:
securitySchemes:
@@ -6459,41 +7069,283 @@ components:
- zh
response_encoding:
type: string
- description: Audio encoding of response
- default: pcm_f32le
- enum:
- - pcm_f32le
- - pcm_s16le
- - pcm_mulaw
- - pcm_alaw
- sample_rate:
+ description: Audio encoding of response
+ default: pcm_f32le
+ enum:
+ - pcm_f32le
+ - pcm_s16le
+ - pcm_mulaw
+ - pcm_alaw
+ sample_rate:
+ type: integer
+ default: 44100
+ description: Sampling rate to use for the output audio. The default sampling rate for canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for cartesia/sonic is 44100.
+ stream:
+ type: boolean
+ default: false
+ description: 'If true, output is streamed for several characters at a time instead of waiting for the full response. The stream terminates with `data: [DONE]`. If false, return the encoded audio as octet stream'
+
+ AudioTranscriptionRequest:
+ type: object
+ required:
+ - file
+ properties:
+ file:
+ oneOf:
+ - $ref: '#/components/schemas/AudioFileBinary'
+ - $ref: '#/components/schemas/AudioFileUrl'
+ description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac.
+ model:
+ type: string
+ description: Model to use for transcription
+ default: openai/whisper-large-v3
+ enum:
+ - openai/whisper-large-v3
+ language:
+ type: string
+ description: Optional ISO 639-1 language code. If `auto` is provided, language is auto-detected.
+ default: en
+ example: en
+ prompt:
+ type: string
+ description: Optional text to bias decoding.
+ response_format:
+ type: string
+ description: The format of the response
+ default: json
+ enum:
+ - json
+ - verbose_json
+ temperature:
+ type: number
+ format: float
+ description: Sampling temperature between 0.0 and 1.0
+ default: 0.0
+ minimum: 0.0
+ maximum: 1.0
+ timestamp_granularities:
+ oneOf:
+ - type: string
+ enum:
+ - segment
+ - word
+ - type: array
+ items:
+ type: string
+ enum:
+ - segment
+ - word
+ uniqueItems: true
+ minItems: 1
+ maxItems: 2
+ description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels.
+ default: segment
+ example: ['word', 'segment']
+ diarize:
+ type: boolean
+ description: >
+ Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
+ In the response, in the words array, you will get the speaker id for each word.
+ In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
+
+
+ For eg -
+ ...
+ "speaker_segments": [
+ "speaker_id": "SPEAKER_00",
+ "start": 0,
+ "end": 30.02,
+ "words": [
+ {
+ "id": 0,
+ "word": "Tijana",
+ "start": 0,
+ "end": 11.475,
+ "speaker_id": "SPEAKER_00"
+ },
+ ...
+ default: false
+ min_speakers:
+ type: integer
+ description: Minimum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
+ max_speakers:
+ type: integer
+ description: Maximum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
+
+ AudioTranscriptionResponse:
+ oneOf:
+ - $ref: '#/components/schemas/AudioTranscriptionJsonResponse'
+ - $ref: '#/components/schemas/AudioTranscriptionVerboseJsonResponse'
+
+ AudioTranscriptionJsonResponse:
+ type: object
+ required:
+ - text
+ properties:
+ text:
+ type: string
+ description: The transcribed text
+ example: Hello, world!
+
+ AudioTranscriptionVerboseJsonResponse:
+ type: object
+ required:
+ - task
+ - language
+ - duration
+ - text
+ - segments
+ properties:
+ task:
+ type: string
+ description: The task performed
+ enum:
+ - transcribe
+ - translate
+ example: transcribe
+ language:
+ type: string
+ description: The language of the audio
+ example: english
+ duration:
+ type: number
+ format: float
+ description: The duration of the audio in seconds
+ example: 3.5
+ text:
+ type: string
+ description: The transcribed text
+ example: Hello, world!
+ segments:
+ type: array
+ items:
+ $ref: '#/components/schemas/AudioTranscriptionSegment'
+ description: Array of transcription segments
+ words:
+ type: array
+ items:
+ $ref: '#/components/schemas/AudioTranscriptionWord'
+ description: Array of transcription words (only when timestamp_granularities includes 'word')
+ speaker_segments:
+ type: array
+ items:
+ $ref: '#/components/schemas/AudioTranscriptionSpeakerSegment'
+ description: Array of transcription speaker segments (only when diarize is enabled)
+
+ AudioTranscriptionSegment:
+ type: object
+ required:
+ - id
+ - start
+ - end
+ - text
+ properties:
+ id:
+ type: integer
+ description: Unique identifier for the segment
+ example: 0
+ start:
+ type: number
+ format: float
+ description: Start time of the segment in seconds
+ example: 0.0
+ end:
+ type: number
+ format: float
+ description: End time of the segment in seconds
+ example: 3.5
+ text:
+ type: string
+ description: The text content of the segment
+ example: Hello, world!
+
+ AudioTranscriptionWord:
+ type: object
+ required:
+ - word
+ - start
+ - end
+ properties:
+ word:
+ type: string
+ description: The word
+ example: Hello
+ start:
+ type: number
+ format: float
+ description: Start time of the word in seconds
+ example: 0.0
+ end:
+ type: number
+ format: float
+ description: End time of the word in seconds
+ example: 0.5
+ speaker_id:
+ type: string
+ description: The speaker id for the word (only when diarize is enabled)
+ example: SPEAKER_00
+
+ AudioTranscriptionSpeakerSegment:
+ type: object
+ required:
+ - speaker_id
+ - start
+ - end
+ - words
+ - text
+ - id
+ properties:
+ speaker_id:
+ type: string
+ description: The speaker identifier
+ example: SPEAKER_00
+ start:
+ type: number
+ format: float
+ description: Start time of the speaker segment in seconds
+ example: 0.0
+ end:
+ type: number
+ format: float
+ description: End time of the speaker segment in seconds
+ example: 30.02
+ words:
+ type: array
+ items:
+ $ref: '#/components/schemas/AudioTranscriptionWord'
+ description: Array of words spoken by this speaker in this segment
+ text:
+ type: string
+ description: The full text spoken by this speaker in this segment
+ example: "Hello, how are you doing today?"
+ id:
type: integer
- default: 44100
- description: Sampling rate to use for the output audio. The default sampling rate for canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for cartesia/sonic is 44100.
- stream:
- type: boolean
- default: false
- description: 'If true, output is streamed for several characters at a time instead of waiting for the full response. The stream terminates with `data: [DONE]`. If false, return the encoded audio as octet stream'
+ description: Unique identifier for the speaker segment
+ example: 1
- AudioTranscriptionRequest:
+ AudioTranslationRequest:
type: object
required:
- file
properties:
file:
oneOf:
- - $ref: '#/components/schemas/AudioFileBinary'
- - $ref: '#/components/schemas/AudioFileUrl'
+ - type: string
+ format: binary
+ description: Audio file to translate
+ - type: string
+ format: uri
+ description: Public HTTP/HTTPS URL to audio file
description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac.
model:
type: string
- description: Model to use for transcription
+ description: Model to use for translation
default: openai/whisper-large-v3
enum:
- openai/whisper-large-v3
language:
type: string
- description: Optional ISO 639-1 language code. If `auto` is provided, language is auto-detected.
+ description: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English.
default: en
example: en
prompt:
@@ -6531,53 +7383,23 @@ components:
description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels.
default: segment
example: ['word', 'segment']
- diarize:
- type: boolean
- description: >
- Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
- In the response, in the words array, you will get the speaker id for each word.
- In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
-
-
- For eg -
- ...
- "speaker_segments": [
- "speaker_id": "SPEAKER_00",
- "start": 0,
- "end": 30.02,
- "words": [
- {
- "id": 0,
- "word": "Tijana",
- "start": 0,
- "end": 11.475,
- "speaker_id": "SPEAKER_00"
- },
- ...
- default: false
- min_speakers:
- type: integer
- description: Minimum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
- max_speakers:
- type: integer
- description: Maximum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
- AudioTranscriptionResponse:
+ AudioTranslationResponse:
oneOf:
- - $ref: '#/components/schemas/AudioTranscriptionJsonResponse'
- - $ref: '#/components/schemas/AudioTranscriptionVerboseJsonResponse'
+ - $ref: '#/components/schemas/AudioTranslationJsonResponse'
+ - $ref: '#/components/schemas/AudioTranslationVerboseJsonResponse'
- AudioTranscriptionJsonResponse:
+ AudioTranslationJsonResponse:
type: object
required:
- text
properties:
text:
type: string
- description: The transcribed text
+ description: The translated text
example: Hello, world!
- AudioTranscriptionVerboseJsonResponse:
+ AudioTranslationVerboseJsonResponse:
type: object
required:
- task
@@ -6592,623 +7414,865 @@ components:
enum:
- transcribe
- translate
- example: transcribe
+ example: translate
language:
type: string
- description: The language of the audio
- example: english
- duration:
- type: number
- format: float
- description: The duration of the audio in seconds
- example: 3.5
- text:
+ description: The target language of the translation
+ example: english
+ duration:
+ type: number
+ format: float
+ description: The duration of the audio in seconds
+ example: 3.5
+ text:
+ type: string
+ description: The translated text
+ example: Hello, world!
+ segments:
+ type: array
+ items:
+ $ref: '#/components/schemas/AudioTranscriptionSegment'
+ description: Array of translation segments
+ words:
+ type: array
+ items:
+ $ref: '#/components/schemas/AudioTranscriptionWord'
+ description: Array of translation words (only when timestamp_granularities includes 'word')
+
+ AudioSpeechStreamResponse:
+ oneOf:
+ - $ref: '#/components/schemas/AudioSpeechStreamEvent'
+ - $ref: '#/components/schemas/StreamSentinel'
+
+ AudioSpeechStreamEvent:
+ type: object
+ required: [data]
+ properties:
+ data:
+ $ref: '#/components/schemas/AudioSpeechStreamChunk'
+
+ AudioSpeechStreamChunk:
+ type: object
+ required: [object, model, b64]
+ properties:
+ object:
+ type: string
+ enum:
+ - audio.tts.chunk
+ model:
+ type: string
+ example: cartesia/sonic
+ b64:
+ type: string
+ description: base64 encoded audio stream
+
+ StreamSentinel:
+ type: object
+ required: [data]
+ properties:
+ data:
+ title: stream_signal
+ type: string
+ enum:
+ - '[DONE]'
+
+ ChatCompletionToken:
+ type: object
+ required: [id, text, logprob, special]
+ properties:
+ id:
+ type: integer
+ text:
+ type: string
+ logprob:
+ type: number
+ special:
+ type: boolean
+
+ ChatCompletionChoice:
+ type: object
+ required: [index, delta, finish_reason]
+ properties:
+ index:
+ type: integer
+ finish_reason:
+ $ref: '#/components/schemas/FinishReason'
+ logprobs:
+ $ref: '#/components/schemas/LogprobsPart'
+ delta:
+ title: ChatCompletionChoiceDelta
+ type: object
+ required: [role]
+ properties:
+ token_id:
+ type: integer
+ role:
+ type: string
+ enum: ['system', 'user', 'assistant', 'function', 'tool']
+ content:
+ type: string
+ nullable: true
+ tool_calls:
+ type: array
+ items:
+ $ref: '#/components/schemas/ToolChoice'
+ function_call:
+ type: object
+ deprecated: true
+ nullable: true
+ properties:
+ arguments:
+ type: string
+ name:
+ type: string
+ required:
+ - arguments
+ - name
+ reasoning:
+ type: string
+ nullable: true
+
+ EmbeddingsRequest:
+ type: object
+ required:
+ - model
+ - input
+ properties:
+ model:
+ type: string
+ description: >
+ The name of the embedding model to use.
+
+ [See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models)
+ example: togethercomputer/m2-bert-80M-8k-retrieval
+ anyOf:
+ - type: string
+ enum:
+ - WhereIsAI/UAE-Large-V1
+ - BAAI/bge-large-en-v1.5
+ - BAAI/bge-base-en-v1.5
+ - togethercomputer/m2-bert-80M-8k-retrieval
+ - type: string
+ input:
+ oneOf:
+ - type: string
+ description: A string providing the text for the model to embed.
+ example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
+ - type: array
+ items:
+ type: string
+ description: A string providing the text for the model to embed.
+ example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
+ example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
+
+ EmbeddingsResponse:
+ type: object
+ required:
+ - object
+ - model
+ - data
+ properties:
+ object:
+ type: string
+ enum:
+ - list
+ model:
type: string
- description: The transcribed text
- example: Hello, world!
- segments:
- type: array
- items:
- $ref: '#/components/schemas/AudioTranscriptionSegment'
- description: Array of transcription segments
- words:
- type: array
- items:
- $ref: '#/components/schemas/AudioTranscriptionWord'
- description: Array of transcription words (only when timestamp_granularities includes 'word')
- speaker_segments:
+ data:
type: array
items:
- $ref: '#/components/schemas/AudioTranscriptionSpeakerSegment'
- description: Array of transcription speaker segments (only when diarize is enabled)
+ type: object
+ required: [index, object, embedding]
+ properties:
+ object:
+ type: string
+ enum:
+ - embedding
+ embedding:
+ type: array
+ items:
+ type: number
+ index:
+ type: integer
- AudioTranscriptionSegment:
+ ModelInfoList:
+ type: array
+ items:
+ $ref: '#/components/schemas/ModelInfo'
+ ModelInfo:
type: object
- required:
- - id
- - start
- - end
- - text
+ required: [id, object, created, type]
properties:
id:
+ type: string
+ example: 'Austism/chronos-hermes-13b'
+ object:
+ type: string
+ example: 'model'
+ created:
type: integer
- description: Unique identifier for the segment
- example: 0
- start:
- type: number
- format: float
- description: Start time of the segment in seconds
- example: 0.0
- end:
- type: number
- format: float
- description: End time of the segment in seconds
- example: 3.5
- text:
+ example: 1692896905
+ type:
+ enum:
+ - chat
+ - language
+ - code
+ - image
+ - embedding
+ - moderation
+ - rerank
+ example: 'chat'
+ display_name:
type: string
- description: The text content of the segment
- example: Hello, world!
+ example: 'Chronos Hermes (13B)'
+ organization:
+ type: string
+ example: 'Austism'
+ link:
+ type: string
+ license:
+ type: string
+ example: 'other'
+ context_length:
+ type: integer
+ example: 2048
+ pricing:
+ $ref: '#/components/schemas/Pricing'
- AudioTranscriptionWord:
+ ModelUploadRequest:
type: object
required:
- - word
- - start
- - end
+ - model_name
+ - model_source
properties:
- word:
+ model_name:
type: string
- description: The word
- example: Hello
- start:
- type: number
- format: float
- description: Start time of the word in seconds
- example: 0.0
- end:
- type: number
- format: float
- description: End time of the word in seconds
- example: 0.5
- speaker_id:
+ description: The name to give to your uploaded model
+ example: 'Qwen2.5-72B-Instruct'
+ model_source:
type: string
- description: The speaker id for the word (only when diarize is enabled)
- example: SPEAKER_00
+ description: The source location of the model (Hugging Face repo or S3 path)
+ example: 'unsloth/Qwen2.5-72B-Instruct'
+ model_type:
+ type: string
+ description: Whether the model is a full model or an adapter
+ default: 'model'
+ enum:
+ - model
+ - adapter
+ example: 'model'
+ hf_token:
+ type: string
+ description: Hugging Face token (if uploading from Hugging Face)
+ example: 'hf_examplehuggingfacetoken'
+ description:
+ type: string
+ description: A description of your model
+ example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
+ base_model:
+ type: string
+ description: The base model to use for an adapter if setting it to run against a serverless pool. Only used for model_type `adapter`.
+ example: 'Qwen/Qwen2.5-72B-Instruct'
+ lora_model:
+ type: string
+ description: The lora pool to use for an adapter if setting it to run against, say, a dedicated pool. Only used for model_type `adapter`.
+ example: 'my_username/Qwen2.5-72B-Instruct-lora'
- AudioTranscriptionSpeakerSegment:
+ ModelUploadSuccessResponse:
type: object
required:
- - speaker_id
- - start
- - end
- - words
- - text
- - id
+ - data
+ - message
properties:
- speaker_id:
- type: string
- description: The speaker identifier
- example: SPEAKER_00
- start:
- type: number
- format: float
- description: Start time of the speaker segment in seconds
- example: 0.0
- end:
- type: number
- format: float
- description: End time of the speaker segment in seconds
- example: 30.02
- words:
- type: array
- items:
- $ref: '#/components/schemas/AudioTranscriptionWord'
- description: Array of words spoken by this speaker in this segment
- text:
+ data:
+ type: object
+ required:
+ - job_id
+ - model_name
+ - model_id
+ - model_source
+ properties:
+ job_id:
+ type: string
+ example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
+ model_name:
+ type: string
+ example: 'necolinehubner/Qwen2.5-72B-Instruct'
+ model_id:
+ type: string
+ example: 'model-c0e32dfc-637e-47b2-bf4e-e9b2e58c9da7'
+ model_source:
+ type: string
+ example: 'huggingface'
+ message:
type: string
- description: The full text spoken by this speaker in this segment
- example: "Hello, how are you doing today?"
- id:
- type: integer
- description: Unique identifier for the speaker segment
- example: 1
+ example: 'Processing model weights. Job created.'
- AudioTranslationRequest:
+ ImageResponse:
type: object
- required:
- - file
properties:
- file:
- oneOf:
- - type: string
- format: binary
- description: Audio file to translate
- - type: string
- format: uri
- description: Public HTTP/HTTPS URL to audio file
- description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac.
+ id:
+ type: string
model:
type: string
- description: Model to use for translation
- default: openai/whisper-large-v3
+ object:
enum:
- - openai/whisper-large-v3
- language:
- type: string
- description: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English.
- default: en
- example: en
- prompt:
+ - list
+ example: 'list'
+ data:
+ type: array
+ items:
+ oneOf:
+ - $ref: '#/components/schemas/ImageResponseDataB64'
+ - $ref: '#/components/schemas/ImageResponseDataUrl'
+ discriminator:
+ propertyName: type
+ required:
+ - id
+ - model
+ - object
+ - data
+
+ ImageResponseDataB64:
+ type: object
+ required: [index, b64_json, type]
+ properties:
+ index:
+ type: integer
+ b64_json:
type: string
- description: Optional text to bias decoding.
- response_format:
+ type:
type: string
- description: The format of the response
- default: json
- enum:
- - json
- - verbose_json
- temperature:
- type: number
- format: float
- description: Sampling temperature between 0.0 and 1.0
- default: 0.0
- minimum: 0.0
- maximum: 1.0
- timestamp_granularities:
- oneOf:
- - type: string
- enum:
- - segment
- - word
- - type: array
- items:
- type: string
- enum:
- - segment
- - word
- uniqueItems: true
- minItems: 1
- maxItems: 2
- description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels.
- default: segment
- example: ['word', 'segment']
-
- AudioTranslationResponse:
- oneOf:
- - $ref: '#/components/schemas/AudioTranslationJsonResponse'
- - $ref: '#/components/schemas/AudioTranslationVerboseJsonResponse'
+ enum: [b64_json]
- AudioTranslationJsonResponse:
+ ImageResponseDataUrl:
type: object
- required:
- - text
+ required: [index, url, type]
properties:
- text:
+ index:
+ type: integer
+ url:
type: string
- description: The translated text
- example: Hello, world!
+ type:
+ type: string
+ enum: [url]
- AudioTranslationVerboseJsonResponse:
+ JobInfoSuccessResponse:
type: object
required:
- - task
- - language
- - duration
- - text
- - segments
+ - type
+ - job_id
+ - status
+ - status_updates
+ - args
+ - created_at
+ - updated_at
properties:
- task:
+ type:
type: string
- description: The task performed
- enum:
- - transcribe
- - translate
- example: translate
- language:
+ example: 'model_upload'
+ job_id:
type: string
- description: The target language of the translation
- example: english
- duration:
- type: number
- format: float
- description: The duration of the audio in seconds
- example: 3.5
- text:
+ example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
+ status:
type: string
- description: The translated text
- example: Hello, world!
- segments:
+ enum: ['Queued', 'Running', 'Complete', 'Failed']
+ example: 'Complete'
+ status_updates:
type: array
items:
- $ref: '#/components/schemas/AudioTranscriptionSegment'
- description: Array of translation segments
- words:
+ type: object
+ required:
+ - status
+ - message
+ - timestamp
+ properties:
+ status:
+ type: string
+ example: 'Complete'
+ message:
+ type: string
+ example: 'Job is Complete'
+ timestamp:
+ type: string
+ format: date-time
+ example: '2025-03-11T22:36:12Z'
+ args:
+ type: object
+ properties:
+ description:
+ type: string
+ example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
+ modelName:
+ type: string
+ example: 'necolinehubner/Qwen2.5-72B-Instruct'
+ modelSource:
+ type: string
+ example: 'unsloth/Qwen2.5-72B-Instruct'
+ created_at:
+ type: string
+ format: date-time
+ example: '2025-03-11T22:05:43Z'
+ updated_at:
+ type: string
+ format: date-time
+ example: '2025-03-11T22:36:12Z'
+
+ JobsInfoSuccessResponse:
+ type: object
+ required:
+ - data
+ properties:
+ data:
type: array
items:
- $ref: '#/components/schemas/AudioTranscriptionWord'
- description: Array of translation words (only when timestamp_granularities includes 'word')
-
- AudioSpeechStreamResponse:
- oneOf:
- - $ref: '#/components/schemas/AudioSpeechStreamEvent'
- - $ref: '#/components/schemas/StreamSentinel'
+ $ref: '#/components/schemas/JobInfoSuccessResponse'
- AudioSpeechStreamEvent:
+ Pricing:
type: object
- required: [data]
+ required: [hourly, input, output, base, finetune]
properties:
- data:
- $ref: '#/components/schemas/AudioSpeechStreamChunk'
+ hourly:
+ type: number
+ example: 0
+ input:
+ type: number
+ example: 0.3
+ output:
+ type: number
+ example: 0.3
+ base:
+ type: number
+ example: 0
+ finetune:
+ type: number
+ example: 0
- AudioSpeechStreamChunk:
+ ToolsPart:
type: object
- required: [object, model, b64]
properties:
- object:
+ type:
type: string
- enum:
- - audio.tts.chunk
- model:
+ example: 'tool_type'
+ function:
+ type: object
+ properties:
+ description:
+ type: string
+ example: 'A description of the function.'
+ name:
+ type: string
+ example: 'function_name'
+ parameters:
+ type: object
+ additionalProperties: true
+ description: 'A map of parameter names to their values.'
+ ToolChoice:
+ type: object
+ required: [id, type, function, index]
+ properties:
+ # TODO: is this the right place for index?
+ index:
+ type: number
+ id:
type: string
- example: cartesia/sonic
- b64:
+ type:
type: string
- description: base64 encoded audio stream
+ enum: ['function']
+ function:
+ type: object
+ required: [name, arguments]
+ properties:
+ name:
+ type: string
+ example: 'function_name'
+ arguments:
+ type: string
- StreamSentinel:
+ FileResponse:
+ type: object
+ required:
+ - id
+ - object
+ - created_at
+ - filename
+ - bytes
+ - purpose
+ - FileType
+ - Processed
+ - LineCount
+ properties:
+ id:
+ type: string
+ object:
+ type: string
+ example: 'file'
+ created_at:
+ type: integer
+ example: 1715021438
+ filename:
+ type: string
+ example: 'my_file.jsonl'
+ bytes:
+ type: integer
+ example: 2664
+ purpose:
+ $ref: '#/components/schemas/FilePurpose'
+ Processed:
+ type: boolean
+ FileType:
+ $ref: '#/components/schemas/FileType'
+ LineCount:
+ type: integer
+ FileList:
+ required:
+ - data
type: object
- required: [data]
properties:
data:
- title: stream_signal
- type: string
- enum:
- - '[DONE]'
-
- ChatCompletionToken:
+ type: array
+ items:
+ $ref: '#/components/schemas/FileResponse'
+ FileObject:
type: object
- required: [id, text, logprob, special]
properties:
+ object:
+ type: string
id:
- type: integer
- text:
type: string
- logprob:
- type: number
- special:
- type: boolean
-
- ChatCompletionChoice:
+ filename:
+ type: string
+ size:
+ type: integer
+ FilePurpose:
+ type: string
+ description: The purpose of the file
+ example: 'fine-tune'
+ enum:
+ - fine-tune
+ - eval
+ - eval-sample
+ - eval-output
+ - eval-summary
+ - batch-generated
+ - batch-api
+ FileType:
+ type: string
+ description: The type of the file
+ default: 'jsonl'
+ example: 'jsonl'
+ enum:
+ - 'csv'
+ - 'jsonl'
+ - 'parquet'
+ FileDeleteResponse:
type: object
- required: [index, delta, finish_reason]
properties:
- index:
- type: integer
- finish_reason:
- $ref: '#/components/schemas/FinishReason'
- logprobs:
- $ref: '#/components/schemas/LogprobsPart'
- delta:
- title: ChatCompletionChoiceDelta
- type: object
- required: [role]
- properties:
- token_id:
- type: integer
- role:
- type: string
- enum: ['system', 'user', 'assistant', 'function', 'tool']
- content:
- type: string
- nullable: true
- tool_calls:
- type: array
- items:
- $ref: '#/components/schemas/ToolChoice'
- function_call:
- type: object
- deprecated: true
- nullable: true
- properties:
- arguments:
- type: string
- name:
- type: string
- required:
- - arguments
- - name
- reasoning:
- type: string
- nullable: true
-
- EmbeddingsRequest:
+ id:
+ type: string
+ deleted:
+ type: boolean
+ FinetuneResponse:
type: object
required:
- - model
- - input
+ - id
+ - status
properties:
+ id:
+ type: string
+ format: uuid
+ training_file:
+ type: string
+ validation_file:
+ type: string
model:
type: string
- description: >
- The name of the embedding model to use.
-
- [See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models)
- example: togethercomputer/m2-bert-80M-8k-retrieval
- anyOf:
+ model_output_name:
+ type: string
+ model_output_path:
+ type: string
+ trainingfile_numlines:
+ type: integer
+ trainingfile_size:
+ type: integer
+ created_at:
+ type: string
+ format: date-time
+ updated_at:
+ type: string
+ format: date-time
+ n_epochs:
+ type: integer
+ n_checkpoints:
+ type: integer
+ n_evals:
+ type: integer
+ batch_size:
+ oneOf:
+ - type: integer
- type: string
enum:
- - WhereIsAI/UAE-Large-V1
- - BAAI/bge-large-en-v1.5
- - BAAI/bge-base-en-v1.5
- - togethercomputer/m2-bert-80M-8k-retrieval
- - type: string
- input:
+ - max
+ default: 'max'
+ learning_rate:
+ type: number
+ lr_scheduler:
+ type: object
+ $ref: '#/components/schemas/LRScheduler'
+ warmup_ratio:
+ type: number
+ max_grad_norm:
+ type: number
+ format: float
+ weight_decay:
+ type: number
+ format: float
+ eval_steps:
+ type: integer
+ train_on_inputs:
oneOf:
+ - type: boolean
- type: string
- description: A string providing the text for the model to embed.
- example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
- - type: array
- items:
- type: string
- description: A string providing the text for the model to embed.
- example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
- example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
-
- EmbeddingsResponse:
- type: object
- required:
- - object
- - model
- - data
- properties:
- object:
- type: string
- enum:
- - list
- model:
+ enum:
+ - auto
+ default: auto
+ training_method:
+ type: object
+ oneOf:
+ - $ref: '#/components/schemas/TrainingMethodSFT'
+ - $ref: '#/components/schemas/TrainingMethodDPO'
+ training_type:
+ type: object
+ oneOf:
+ - $ref: '#/components/schemas/FullTrainingType'
+ - $ref: '#/components/schemas/LoRATrainingType'
+ status:
+ $ref: '#/components/schemas/FinetuneJobStatus'
+ job_id:
type: string
- data:
+ events:
type: array
items:
- type: object
- required: [index, object, embedding]
- properties:
- object:
- type: string
- enum:
- - embedding
- embedding:
- type: array
- items:
- type: number
- index:
- type: integer
-
- ModelInfoList:
- type: array
- items:
- $ref: '#/components/schemas/ModelInfo'
- ModelInfo:
- type: object
- required: [id, object, created, type]
- properties:
- id:
- type: string
- example: 'Austism/chronos-hermes-13b'
- object:
- type: string
- example: 'model'
- created:
+ $ref: '#/components/schemas/FineTuneEvent'
+ token_count:
type: integer
- example: 1692896905
- type:
- enum:
- - chat
- - language
- - code
- - image
- - embedding
- - moderation
- - rerank
- example: 'chat'
- display_name:
+ param_count:
+ type: integer
+ total_price:
+ type: integer
+ epochs_completed:
+ type: integer
+ queue_depth:
+ type: integer
+ wandb_project_name:
type: string
- example: 'Chronos Hermes (13B)'
- organization:
+ wandb_url:
type: string
- example: 'Austism'
- link:
+ from_checkpoint:
type: string
- license:
+ from_hf_model:
type: string
- example: 'other'
- context_length:
- type: integer
- example: 2048
- pricing:
- $ref: '#/components/schemas/Pricing'
+ hf_model_revision:
+ type: string
+ progress:
+ $ref: '#/components/schemas/FineTuneProgress'
- ModelUploadRequest:
+ FinetuneResponseTruncated:
type: object
+ description: A truncated version of the fine-tune response, used for POST /fine-tunes, GET /fine-tunes and POST /fine-tunes/{id}/cancel endpoints
required:
- - model_name
- - model_source
+ - id
+ - status
+ - created_at
+ - updated_at
+ example:
+ id: ft-01234567890123456789
+ status: completed
+ created_at: '2023-05-17T17:35:45.123Z'
+ updated_at: '2023-05-17T18:46:23.456Z'
+ user_id: 'user_01234567890123456789'
+ owner_address: 'user@example.com'
+ total_price: 1500
+ token_count: 850000
+ events: [] # FineTuneTruncated object has no events
+ model: 'meta-llama/Llama-2-7b-hf'
+ model_output_name: 'mynamespace/meta-llama/Llama-2-7b-hf-32162631'
+ n_epochs: 3
+ training_file: 'file-01234567890123456789'
+ wandb_project_name: 'my-finetune-project'
properties:
- model_name:
+ id:
type: string
- description: The name to give to your uploaded model
- example: 'Qwen2.5-72B-Instruct'
- model_source:
+ description: Unique identifier for the fine-tune job
+ status:
+ $ref: '#/components/schemas/FinetuneJobStatus'
+ created_at:
type: string
- description: The source location of the model (Hugging Face repo or S3 path)
- example: 'unsloth/Qwen2.5-72B-Instruct'
- model_type:
+ format: date-time
+ description: Creation timestamp of the fine-tune job
+ updated_at:
type: string
- description: Whether the model is a full model or an adapter
- default: 'model'
- enum:
- - model
- - adapter
- example: 'model'
- hf_token:
+ format: date-time
+ description: Last update timestamp of the fine-tune job
+ user_id:
type: string
- description: Hugging Face token (if uploading from Hugging Face)
- example: 'hf_examplehuggingfacetoken'
- description:
+ description: Identifier for the user who created the job
+ owner_address:
type: string
- description: A description of your model
- example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
- base_model:
+ description: Owner address information
+ total_price:
+ type: integer
+ description: Total price for the fine-tuning job
+ token_count:
+ type: integer
+ description: Count of tokens processed
+ events:
+ type: array
+ items:
+ $ref: '#/components/schemas/FineTuneEvent'
+ description: Events related to this fine-tune job
+ # FineTuneUserParams fields
+ training_file:
type: string
- description: The base model to use for an adapter if setting it to run against a serverless pool. Only used for model_type `adapter`.
- example: 'Qwen/Qwen2.5-72B-Instruct'
- lora_model:
+ description: File-ID of the training file
+ validation_file:
type: string
- description: The lora pool to use for an adapter if setting it to run against, say, a dedicated pool. Only used for model_type `adapter`.
- example: 'my_username/Qwen2.5-72B-Instruct-lora'
-
- ModelUploadSuccessResponse:
- type: object
- required:
- - data
- - message
- properties:
- data:
- type: object
- required:
- - job_id
- - model_name
- - model_id
- - model_source
- properties:
- job_id:
- type: string
- example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
- model_name:
- type: string
- example: 'necolinehubner/Qwen2.5-72B-Instruct'
- model_id:
- type: string
- example: 'model-c0e32dfc-637e-47b2-bf4e-e9b2e58c9da7'
- model_source:
- type: string
- example: 'huggingface'
- message:
+ description: File-ID of the validation file
+ model:
type: string
- example: 'Processing model weights. Job created.'
-
- ImageResponse:
- type: object
- properties:
- id:
+ description: Base model used for fine-tuning
+ model_output_name:
type: string
- model:
+ suffix:
type: string
- object:
- enum:
- - list
- example: 'list'
- data:
- type: array
- items:
- oneOf:
- - $ref: '#/components/schemas/ImageResponseDataB64'
- - $ref: '#/components/schemas/ImageResponseDataUrl'
- discriminator:
- propertyName: type
- required:
- - id
- - model
- - object
- - data
-
- ImageResponseDataB64:
- type: object
- required: [index, b64_json, type]
- properties:
- index:
+ description: Suffix added to the fine-tuned model name
+ n_epochs:
type: integer
- b64_json:
+ description: Number of training epochs
+ n_evals:
+ type: integer
+ description: Number of evaluations during training
+ n_checkpoints:
+ type: integer
+ description: Number of checkpoints saved during training
+ batch_size:
+ type: integer
+ description: Batch size used for training
+ training_type:
+ oneOf:
+ - $ref: '#/components/schemas/FullTrainingType'
+ - $ref: '#/components/schemas/LoRATrainingType'
+ description: Type of training used (full or LoRA)
+ training_method:
+ oneOf:
+ - $ref: '#/components/schemas/TrainingMethodSFT'
+ - $ref: '#/components/schemas/TrainingMethodDPO'
+ description: Method of training used
+ learning_rate:
+ type: number
+ format: float
+ description: Learning rate used for training
+ lr_scheduler:
+ $ref: '#/components/schemas/LRScheduler'
+ description: Learning rate scheduler configuration
+ warmup_ratio:
+ type: number
+ format: float
+ description: Ratio of warmup steps
+ max_grad_norm:
+ type: number
+ format: float
+ description: Maximum gradient norm for clipping
+ weight_decay:
+ type: number
+ format: float
+ description: Weight decay value used
+ wandb_project_name:
type: string
- type:
+ description: Weights & Biases project name
+ wandb_name:
type: string
- enum: [b64_json]
-
- ImageResponseDataUrl:
+ description: Weights & Biases run name
+ from_checkpoint:
+ type: string
+ description: Checkpoint used to continue training
+ from_hf_model:
+ type: string
+ description: Hugging Face Hub repo to start training from
+ hf_model_revision:
+ type: string
+ description: The revision of the Hugging Face Hub model to continue training from
+ progress:
+ $ref: '#/components/schemas/FineTuneProgress'
+ description: Progress information for the fine-tuning job
+ FinetuneDeleteResponse:
type: object
- required: [index, url, type]
properties:
- index:
- type: integer
- url:
- type: string
- type:
+ message:
type: string
- enum: [url]
+ description: Message indicating the result of the deletion
+ FinetuneJobStatus:
+ type: string
+ enum:
+ - pending
+ - queued
+ - running
+ - compressing
+ - uploading
+ - cancel_requested
+ - cancelled
+ - error
+ - completed
+
+ FinetuneEventLevels:
+ type: string
+ enum:
+ - null
+ - info
+ - warning
+ - error
+ - legacy_info
+ - legacy_iwarning
+ - legacy_ierror
+ FinetuneEventType:
+ type: string
+ enum:
+ - job_pending
+ - job_start
+ - job_stopped
+ - model_downloading
+ - model_download_complete
+ - training_data_downloading
+ - training_data_download_complete
+ - validation_data_downloading
+ - validation_data_download_complete
+ - wandb_init
+ - training_start
+ - checkpoint_save
+ - billing_limit
+ - epoch_complete
+ - training_complete
+ - model_compressing
+ - model_compression_complete
+ - model_uploading
+ - model_upload_complete
+ - job_complete
+ - job_error
+ - cancel_requested
+ - job_restarted
+ - refund
+ - warning
- JobInfoSuccessResponse:
+ FinetuneTruncatedList:
type: object
required:
- - type
- - job_id
- - status
- - status_updates
- - args
- - created_at
- - updated_at
+ - data
properties:
- type:
- type: string
- example: 'model_upload'
- job_id:
- type: string
- example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
- status:
- type: string
- enum: ['Queued', 'Running', 'Complete', 'Failed']
- example: 'Complete'
- status_updates:
+ data:
type: array
items:
- type: object
- required:
- - status
- - message
- - timestamp
- properties:
- status:
- type: string
- example: 'Complete'
- message:
- type: string
- example: 'Job is Complete'
- timestamp:
- type: string
- format: date-time
- example: '2025-03-11T22:36:12Z'
- args:
- type: object
- properties:
- description:
- type: string
- example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
- modelName:
- type: string
- example: 'necolinehubner/Qwen2.5-72B-Instruct'
- modelSource:
- type: string
- example: 'unsloth/Qwen2.5-72B-Instruct'
- created_at:
- type: string
- format: date-time
- example: '2025-03-11T22:05:43Z'
- updated_at:
- type: string
- format: date-time
- example: '2025-03-11T22:36:12Z'
-
- JobsInfoSuccessResponse:
+ $ref: '#/components/schemas/FinetuneResponseTruncated'
+ FinetuneListEvents:
type: object
required:
- data
@@ -7216,1967 +8280,2207 @@ components:
data:
type: array
items:
- $ref: '#/components/schemas/JobInfoSuccessResponse'
-
- Pricing:
- type: object
- required: [hourly, input, output, base, finetune]
- properties:
- hourly:
- type: number
- example: 0
- input:
- type: number
- example: 0.3
- output:
- type: number
- example: 0.3
- base:
- type: number
- example: 0
- finetune:
- type: number
- example: 0
-
- ToolsPart:
- type: object
- properties:
- type:
- type: string
- example: 'tool_type'
- function:
- type: object
- properties:
- description:
- type: string
- example: 'A description of the function.'
- name:
- type: string
- example: 'function_name'
- parameters:
- type: object
- additionalProperties: true
- description: 'A map of parameter names to their values.'
- ToolChoice:
- type: object
- required: [id, type, function, index]
- properties:
- # TODO: is this the right place for index?
- index:
- type: number
- id:
- type: string
- type:
- type: string
- enum: ['function']
- function:
- type: object
- required: [name, arguments]
- properties:
- name:
- type: string
- example: 'function_name'
- arguments:
- type: string
-
- FileResponse:
+ $ref: '#/components/schemas/FineTuneEvent'
+ FineTuneEvent:
type: object
required:
- - id
- object
- created_at
- - filename
- - bytes
- - purpose
- - FileType
- - Processed
- - LineCount
+ - message
+ - type
+ - param_count
+ - token_count
+ - total_steps
+ - wandb_url
+ - step
+ - checkpoint_path
+ - model_path
+ - training_offset
+ - hash
properties:
- id:
- type: string
object:
type: string
- example: 'file'
+ enum: [fine-tune-event]
created_at:
- type: integer
- example: 1715021438
- filename:
type: string
- example: 'my_file.jsonl'
- bytes:
+ level:
+ anyOf:
+ - $ref: '#/components/schemas/FinetuneEventLevels'
+ message:
+ type: string
+ type:
+ $ref: '#/components/schemas/FinetuneEventType'
+ param_count:
type: integer
- example: 2664
- purpose:
- $ref: '#/components/schemas/FilePurpose'
- Processed:
- type: boolean
- FileType:
- $ref: '#/components/schemas/FileType'
- LineCount:
+ token_count:
type: integer
- FileList:
- required:
- - data
- type: object
- properties:
- data:
- type: array
- items:
- $ref: '#/components/schemas/FileResponse'
- FileObject:
- type: object
- properties:
- object:
+ total_steps:
+ type: integer
+ wandb_url:
type: string
- id:
+ step:
+ type: integer
+ checkpoint_path:
type: string
- filename:
+ model_path:
type: string
- size:
+ training_offset:
type: integer
- FilePurpose:
- type: string
- description: The purpose of the file
- example: 'fine-tune'
- enum:
- - fine-tune
- - eval
- - eval-sample
- - eval-output
- - eval-summary
- - batch-generated
- - batch-api
- FileType:
- type: string
- description: The type of the file
- default: 'jsonl'
- example: 'jsonl'
- enum:
- - 'csv'
- - 'jsonl'
- - 'parquet'
- FileDeleteResponse:
+ hash:
+ type: string
+ FineTuneProgress:
type: object
+ description: Progress information for a fine-tuning job
+ required:
+ - estimate_available
+ - seconds_remaining
properties:
- id:
- type: string
- deleted:
+ estimate_available:
type: boolean
- FinetuneResponse:
+ description: Whether time estimate is available
+ seconds_remaining:
+ type: integer
+ description: Estimated time remaining in seconds for the fine-tuning job to next state
+ FinetuneListCheckpoints:
+ type: object
+ required:
+ - data
+ properties:
+ data:
+ type: array
+ items:
+ $ref: '#/components/schemas/FineTuneCheckpoint'
+ FineTuneCheckpoint:
type: object
required:
- - id
- - status
+ - step
+ - path
+ - created_at
+ - checkpoint_type
properties:
- id:
- type: string
- format: uuid
- training_file:
+ step:
+ type: integer
+ created_at:
type: string
- validation_file:
+ path:
type: string
- model:
+ checkpoint_type:
type: string
- model_output_name:
+
+ FullTrainingType:
+ type: object
+ properties:
+ type:
type: string
- model_output_path:
+ enum: ['Full']
+ required:
+ - type
+ LoRATrainingType:
+ type: object
+ properties:
+ type:
type: string
- trainingfile_numlines:
+ enum: ['Lora']
+ lora_r:
type: integer
- trainingfile_size:
+ lora_alpha:
type: integer
- created_at:
+ lora_dropout:
+ type: number
+ format: float
+ default: 0.0
+ lora_trainable_modules:
type: string
- format: date-time
- updated_at:
+ default: 'all-linear'
+ required:
+ - type
+ - lora_r
+ - lora_alpha
+
+ TrainingMethodSFT:
+ type: object
+ properties:
+ method:
type: string
- format: date-time
- n_epochs:
- type: integer
- n_checkpoints:
- type: integer
- n_evals:
- type: integer
- batch_size:
+ enum: ['sft']
+ train_on_inputs:
oneOf:
- - type: integer
+ - type: boolean
- type: string
enum:
- - max
- default: 'max'
- learning_rate:
+ - auto
+ type: boolean
+ default: auto
+ description: Whether to mask the user messages in conversational data or prompts in instruction data.
+ required:
+ - method
+ - train_on_inputs
+ TrainingMethodDPO:
+ type: object
+ properties:
+ method:
+ type: string
+ enum: ['dpo']
+ dpo_beta:
type: number
- lr_scheduler:
- type: object
- $ref: '#/components/schemas/LRScheduler'
- warmup_ratio:
+ format: float
+ default: 0.1
+ rpo_alpha:
type: number
- max_grad_norm:
+ format: float
+ default: 0.0
+ dpo_normalize_logratios_by_length:
+ type: boolean
+ default: false
+ dpo_reference_free:
+ type: boolean
+ default: false
+ simpo_gamma:
type: number
format: float
- weight_decay:
+ default: 0.0
+ required:
+ - method
+
+ LRScheduler:
+ type: object
+ properties:
+ lr_scheduler_type:
+ type: string
+ enum:
+ - linear
+ - cosine
+ lr_scheduler_args:
+ oneOf:
+ - $ref: '#/components/schemas/LinearLRSchedulerArgs'
+ - $ref: '#/components/schemas/CosineLRSchedulerArgs'
+ required:
+ - lr_scheduler_type
+ CosineLRSchedulerArgs:
+ type: object
+ properties:
+ min_lr_ratio:
type: number
format: float
- eval_steps:
+ default: 0.0
+ description: The ratio of the final learning rate to the peak learning rate
+ num_cycles:
+ type: number
+ format: float
+ default: 0.5
+ description: Number or fraction of cycles for the cosine learning rate scheduler
+ required:
+ - min_lr_ratio
+ - num_cycles
+ LinearLRSchedulerArgs:
+ type: object
+ properties:
+ min_lr_ratio:
+ type: number
+ format: float
+ default: 0.0
+ description: The ratio of the final learning rate to the peak learning rate
+
+ Autoscaling:
+ type: object
+ description: Configuration for automatic scaling of replicas based on demand.
+ required:
+ - min_replicas
+ - max_replicas
+ properties:
+ min_replicas:
type: integer
- train_on_inputs:
- oneOf:
- - type: boolean
- - type: string
- enum:
- - auto
- default: auto
- training_method:
- type: object
- oneOf:
- - $ref: '#/components/schemas/TrainingMethodSFT'
- - $ref: '#/components/schemas/TrainingMethodDPO'
- training_type:
- type: object
- oneOf:
- - $ref: '#/components/schemas/FullTrainingType'
- - $ref: '#/components/schemas/LoRATrainingType'
+ format: int32
+ description: The minimum number of replicas to maintain, even when there is no load
+ examples:
+ - 2
+ max_replicas:
+ type: integer
+ format: int32
+ description: The maximum number of replicas to scale up to under load
+ examples:
+ - 5
+
+ HardwareSpec:
+ type: object
+ description: Detailed specifications of a hardware configuration
+ required:
+ - gpu_type
+ - gpu_link
+ - gpu_memory
+ - gpu_count
+ properties:
+ gpu_type:
+ type: string
+ description: The type/model of GPU
+ examples:
+ - a100-80gb
+ gpu_link:
+ type: string
+ description: The GPU interconnect technology
+ examples:
+ - sxm
+ gpu_memory:
+ type: number
+ format: float
+ description: Amount of GPU memory in GB
+ examples:
+ - 80
+ gpu_count:
+ type: integer
+ format: int32
+ description: Number of GPUs in this configuration
+ examples:
+ - 2
+
+ EndpointPricing:
+ type: object
+ description: Pricing details for using an endpoint
+ required:
+ - cents_per_minute
+ properties:
+ cents_per_minute:
+ type: number
+ format: float
+ description: Cost per minute of endpoint uptime in cents
+ examples:
+ - 5.42
+
+ HardwareAvailability:
+ type: object
+ description: Indicates the current availability status of a hardware configuration
+ required:
+ - status
+ properties:
status:
- $ref: '#/components/schemas/FinetuneJobStatus'
- job_id:
type: string
- events:
- type: array
- items:
- $ref: '#/components/schemas/FineTuneEvent'
- token_count:
- type: integer
- param_count:
- type: integer
- total_price:
- type: integer
- epochs_completed:
- type: integer
- queue_depth:
- type: integer
- wandb_project_name:
+ description: The availability status of the hardware configuration
+ enum:
+ - available
+ - unavailable
+ - insufficient
+
+ HardwareWithStatus:
+ type: object
+ description: Hardware configuration details with optional availability status
+ required:
+ - object
+ - id
+ - pricing
+ - specs
+ - updated_at
+ properties:
+ object:
+ type: string
+ enum:
+ - hardware
+ id:
type: string
- wandb_url:
+ description: Unique identifier for the hardware configuration
+ examples:
+ - 2x_nvidia_a100_80gb_sxm
+ pricing:
+ $ref: '#/components/schemas/EndpointPricing'
+ specs:
+ $ref: '#/components/schemas/HardwareSpec'
+ availability:
+ $ref: '#/components/schemas/HardwareAvailability'
+ updated_at:
type: string
- from_checkpoint:
+ format: date-time
+ description: Timestamp of when the hardware status was last updated
+
+ CreateEndpointRequest:
+ type: object
+ required:
+ - model
+ - hardware
+ - autoscaling
+ properties:
+ display_name:
type: string
- from_hf_model:
+ description: A human-readable name for the endpoint
+ examples:
+ - My Llama3 70b endpoint
+ model:
type: string
- hf_model_revision:
+ description: The model to deploy on this endpoint
+ examples:
+ - meta-llama/Llama-3-8b-chat-hf
+ hardware:
type: string
- progress:
- $ref: '#/components/schemas/FineTuneProgress'
+ description: The hardware configuration to use for this endpoint
+ examples:
+ - 1x_nvidia_a100_80gb_sxm
+ autoscaling:
+ $ref: '#/components/schemas/Autoscaling'
+ description: Configuration for automatic scaling of the endpoint
+ disable_prompt_cache:
+ type: boolean
+ description: Whether to disable the prompt cache for this endpoint
+ default: false
+ disable_speculative_decoding:
+ type: boolean
+ description: Whether to disable speculative decoding for this endpoint
+ default: false
+ state:
+ type: string
+ description: The desired state of the endpoint
+ enum:
+ - STARTED
+ - STOPPED
+ default: STARTED
+ example: STARTED
+ inactive_timeout:
+ type: integer
+ description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to null, omit or set to 0 to disable automatic timeout.
+ nullable: true
+ example: 60
+ availability_zone:
+ type: string
+ description: Create the endpoint in a specified availability zone (e.g., us-central-4b)
- FinetuneResponseTruncated:
+ DedicatedEndpoint:
type: object
- description: A truncated version of the fine-tune response, used for POST /fine-tunes, GET /fine-tunes and POST /fine-tunes/{id}/cancel endpoints
+ description: Details about a dedicated endpoint deployment
required:
+ - object
- id
- - status
+ - name
+ - display_name
+ - model
+ - hardware
+ - type
+ - owner
+ - state
+ - autoscaling
- created_at
- - updated_at
- example:
- id: ft-01234567890123456789
- status: completed
- created_at: '2023-05-17T17:35:45.123Z'
- updated_at: '2023-05-17T18:46:23.456Z'
- user_id: 'user_01234567890123456789'
- owner_address: 'user@example.com'
- total_price: 1500
- token_count: 850000
- events: [] # FineTuneTruncated object has no events
- model: 'meta-llama/Llama-2-7b-hf'
- model_output_name: 'mynamespace/meta-llama/Llama-2-7b-hf-32162631'
- n_epochs: 3
- training_file: 'file-01234567890123456789'
- wandb_project_name: 'my-finetune-project'
properties:
+ object:
+ type: string
+ enum:
+ - endpoint
+ description: The type of object
+ example: endpoint
id:
type: string
- description: Unique identifier for the fine-tune job
- status:
- $ref: '#/components/schemas/FinetuneJobStatus'
- created_at:
+ description: Unique identifier for the endpoint
+ example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+ name:
type: string
- format: date-time
- description: Creation timestamp of the fine-tune job
- updated_at:
+ description: System name for the endpoint
+ example: devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1
+ display_name:
type: string
- format: date-time
- description: Last update timestamp of the fine-tune job
- user_id:
+ description: Human-readable name for the endpoint
+ example: My Llama3 70b endpoint
+ model:
type: string
- description: Identifier for the user who created the job
- owner_address:
+ description: The model deployed on this endpoint
+ example: meta-llama/Llama-3-8b-chat-hf
+ hardware:
type: string
- description: Owner address information
- total_price:
- type: integer
- description: Total price for the fine-tuning job
- token_count:
- type: integer
- description: Count of tokens processed
- events:
- type: array
- items:
- $ref: '#/components/schemas/FineTuneEvent'
- description: Events related to this fine-tune job
- # FineTuneUserParams fields
- training_file:
+ description: The hardware configuration used for this endpoint
+ example: 1x_nvidia_a100_80gb_sxm
+ type:
type: string
- description: File-ID of the training file
- validation_file:
+ enum:
+ - dedicated
+ description: The type of endpoint
+ example: dedicated
+ owner:
type: string
- description: File-ID of the validation file
- model:
+ description: The owner of this endpoint
+ example: devuser
+ state:
type: string
- description: Base model used for fine-tuning
- model_output_name:
+ enum:
+ - PENDING
+ - STARTING
+ - STARTED
+ - STOPPING
+ - STOPPED
+ - ERROR
+ description: Current state of the endpoint
+ example: STARTED
+ autoscaling:
+ $ref: '#/components/schemas/Autoscaling'
+ description: Configuration for automatic scaling of the endpoint
+ created_at:
type: string
- suffix:
+ format: date-time
+ description: Timestamp when the endpoint was created
+ example: 2025-02-04T10:43:55.405Z
+
+ ListEndpoint:
+ type: object
+ description: Details about an endpoint when listed via the list endpoint
+ required:
+ - id
+ - object
+ - name
+ - model
+ - type
+ - owner
+ - state
+ - created_at
+ properties:
+ object:
type: string
- description: Suffix added to the fine-tuned model name
- n_epochs:
- type: integer
- description: Number of training epochs
- n_evals:
- type: integer
- description: Number of evaluations during training
- n_checkpoints:
- type: integer
- description: Number of checkpoints saved during training
- batch_size:
- type: integer
- description: Batch size used for training
- training_type:
- oneOf:
- - $ref: '#/components/schemas/FullTrainingType'
- - $ref: '#/components/schemas/LoRATrainingType'
- description: Type of training used (full or LoRA)
- training_method:
- oneOf:
- - $ref: '#/components/schemas/TrainingMethodSFT'
- - $ref: '#/components/schemas/TrainingMethodDPO'
- description: Method of training used
- learning_rate:
- type: number
- format: float
- description: Learning rate used for training
- lr_scheduler:
- $ref: '#/components/schemas/LRScheduler'
- description: Learning rate scheduler configuration
- warmup_ratio:
- type: number
- format: float
- description: Ratio of warmup steps
- max_grad_norm:
- type: number
- format: float
- description: Maximum gradient norm for clipping
- weight_decay:
- type: number
- format: float
- description: Weight decay value used
- wandb_project_name:
+ enum:
+ - endpoint
+ description: The type of object
+ example: endpoint
+ id:
type: string
- description: Weights & Biases project name
- wandb_name:
+ description: Unique identifier for the endpoint
+ example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+ name:
type: string
- description: Weights & Biases run name
- from_checkpoint:
+ description: System name for the endpoint
+ example: allenai/OLMo-7B
+ model:
type: string
- description: Checkpoint used to continue training
- from_hf_model:
+ description: The model deployed on this endpoint
+ example: allenai/OLMo-7B
+ type:
type: string
- description: Hugging Face Hub repo to start training from
- hf_model_revision:
+ enum:
+ - serverless
+ - dedicated
+ description: The type of endpoint
+ example: serverless
+ owner:
type: string
- description: The revision of the Hugging Face Hub model to continue training from
- progress:
- $ref: '#/components/schemas/FineTuneProgress'
- description: Progress information for the fine-tuning job
- FinetuneDeleteResponse:
- type: object
+ description: The owner of this endpoint
+ example: together
+ state:
+ type: string
+ enum:
+ - PENDING
+ - STARTING
+ - STARTED
+ - STOPPING
+ - STOPPED
+ - ERROR
+ description: Current state of the endpoint
+ example: STARTED
+ created_at:
+ type: string
+ format: date-time
+ description: Timestamp when the endpoint was created
+ example: 2024-02-28T21:34:35.444Z
+
+ DisplayorExecuteOutput:
properties:
- message:
+ data:
+ properties:
+ application/geo+json:
+ type: object
+ application/javascript:
+ type: string
+ application/json:
+ type: object
+ application/pdf:
+ format: byte
+ type: string
+ application/vnd.vega.v5+json:
+ type: object
+ application/vnd.vegalite.v4+json:
+ type: object
+ image/gif:
+ format: byte
+ type: string
+ image/jpeg:
+ format: byte
+ type: string
+ image/png:
+ format: byte
+ type: string
+ image/svg+xml:
+ type: string
+ text/html:
+ type: string
+ text/latex:
+ type: string
+ text/markdown:
+ type: string
+ text/plain:
+ type: string
+ type: object
+ type:
+ enum:
+ - display_data
+ - execute_result
type: string
- description: Message indicating the result of the deletion
- FinetuneJobStatus:
- type: string
- enum:
- - pending
- - queued
- - running
- - compressing
- - uploading
- - cancel_requested
- - cancelled
- - error
- - completed
+ required:
+ - type
+ - data
+ title: DisplayorExecuteOutput
+
+ Error:
+ oneOf:
+ - type: string
+ - additionalProperties: true
+ type: object
+ title: Error
+
+ ErrorOutput:
+ title: ErrorOutput
+ description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
+ properties:
+ data:
+ type: string
+ type:
+ enum:
+ - error
+ type: string
+ required:
+ - type
+ - data
+
+ ExecuteRequest:
+ title: ExecuteRequest
+ required:
+ - language
+ - code
+ properties:
+ code:
+ description: 'Code snippet to execute.'
+ example: "print('Hello, world!')"
+ type: string
+ files:
+ description: Files to upload to the session. If present, files will be uploaded before executing the given code.
+ items:
+ properties:
+ content:
+ type: string
+ encoding:
+ description: Encoding of the file content. Use `string` for text files such as code, and `base64` for binary files, such as images.
+ enum:
+ - string
+ - base64
+ type: string
+ name:
+ type: string
+ required:
+ - name
+ - encoding
+ - content
+ type: object
+ type: array
+ language:
+ default: python
+ description: Programming language for the code to execute. Currently only supports Python, but more will be added.
+ enum:
+ - python
+ session_id:
+ description: Identifier of the current session. Used to make follow-up calls. Requests will return an error if the session does not belong to the caller or has expired.
+ example: ses_abcDEF123
+ nullable: false
+ type: string
+
+ ExecuteResponse:
+ title: ExecuteResponse
+ type: object
+ description: 'The result of the execution. If successful, `data` contains the result and `errors` will be null. If unsuccessful, `data` will be null and `errors` will contain the errors.'
+ oneOf:
+ - title: SuccessfulExecution
+ type: object
+ required: [data, errors]
+ properties:
+ errors:
+ type: 'null'
+ data:
+ type: object
+ nullable: false
+ required: [session_id, outputs]
+ properties:
+ outputs:
+ type: array
+ items:
+ discriminator:
+ propertyName: type
+ oneOf:
+ - title: StreamOutput
+ description: Outputs that were printed to stdout or stderr
+ type: object
+ required: [type, data]
+ properties:
+ type:
+ enum:
+ - stdout
+ - stderr
+ type: string
+ data:
+ type: string
+ - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
+ properties:
+ data:
+ type: string
+ type:
+ enum:
+ - error
+ type: string
+ required:
+ - type
+ - data
+ title: ErrorOutput
+ - properties:
+ data:
+ properties:
+ application/geo+json:
+ type: object
+ additionalProperties: true
+ application/javascript:
+ type: string
+ application/json:
+ type: object
+ additionalProperties: true
+ application/pdf:
+ format: byte
+ type: string
+ application/vnd.vega.v5+json:
+ type: object
+ additionalProperties: true
+ application/vnd.vegalite.v4+json:
+ type: object
+ additionalProperties: true
+ image/gif:
+ format: byte
+ type: string
+ image/jpeg:
+ format: byte
+ type: string
+ image/png:
+ format: byte
+ type: string
+ image/svg+xml:
+ type: string
+ text/html:
+ type: string
+ text/latex:
+ type: string
+ text/markdown:
+ type: string
+ text/plain:
+ type: string
+ type: object
+ type:
+ enum:
+ - display_data
+ - execute_result
+ type: string
+ required:
+ - type
+ - data
+ title: DisplayorExecuteOutput
+ title: InterpreterOutput
+ session_id:
+ type: string
+ description: Identifier of the current session. Used to make follow-up calls.
+ example: ses_abcDEF123
+ nullable: false
+ status:
+ type: string
+ enum:
+ - success
+ description: Status of the execution. Currently only supports success.
+ - title: FailedExecution
+ type: object
+ required: [data, errors]
+ properties:
+ data:
+ type: 'null'
+ errors:
+ type: array
+ items:
+ title: Error
+ oneOf:
+ - type: string
+ - type: object
+ additionalProperties: true
- FinetuneEventLevels:
- type: string
- enum:
- - null
- - info
- - warning
- - error
- - legacy_info
- - legacy_iwarning
- - legacy_ierror
- FinetuneEventType:
- type: string
- enum:
- - job_pending
- - job_start
- - job_stopped
- - model_downloading
- - model_download_complete
- - training_data_downloading
- - training_data_download_complete
- - validation_data_downloading
- - validation_data_download_complete
- - wandb_init
- - training_start
- - checkpoint_save
- - billing_limit
- - epoch_complete
- - training_complete
- - model_compressing
- - model_compression_complete
- - model_uploading
- - model_upload_complete
- - job_complete
- - job_error
- - cancel_requested
- - job_restarted
- - refund
- - warning
+ InterpreterOutput:
+ discriminator:
+ propertyName: type
+ oneOf:
+ - description: Outputs that were printed to stdout or stderr
+ properties:
+ data:
+ type: string
+ type:
+ enum:
+ - stdout
+ - stderr
+ type: string
+ required:
+ - type
+ - data
+ title: StreamOutput
+ - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
+ properties:
+ data:
+ type: string
+ type:
+ enum:
+ - error
+ type: string
+ required:
+ - type
+ - data
+ title: ErrorOutput
+ - properties:
+ data:
+ properties:
+ application/geo+json:
+ type: object
+ application/javascript:
+ type: string
+ application/json:
+ type: object
+ application/pdf:
+ format: byte
+ type: string
+ application/vnd.vega.v5+json:
+ type: object
+ application/vnd.vegalite.v4+json:
+ type: object
+ image/gif:
+ format: byte
+ type: string
+ image/jpeg:
+ format: byte
+ type: string
+ image/png:
+ format: byte
+ type: string
+ image/svg+xml:
+ type: string
+ text/html:
+ type: string
+ text/latex:
+ type: string
+ text/markdown:
+ type: string
+ text/plain:
+ type: string
+ type: object
+ type:
+ enum:
+ - display_data
+ - execute_result
+ type: string
+ required:
+ - type
+ - data
+ title: DisplayorExecuteOutput
+ title: InterpreterOutput
- FinetuneTruncatedList:
- type: object
- required:
- - data
+ Response:
properties:
- data:
- type: array
+ errors:
items:
- $ref: '#/components/schemas/FinetuneResponseTruncated'
- FinetuneListEvents:
- type: object
- required:
- - data
- properties:
- data:
+ oneOf:
+ - type: string
+ - additionalProperties: true
+ type: object
+ title: Error
type: array
- items:
- $ref: '#/components/schemas/FineTuneEvent'
- FineTuneEvent:
- type: object
- required:
- - object
- - created_at
- - message
- - type
- - param_count
- - token_count
- - total_steps
- - wandb_url
- - step
- - checkpoint_path
- - model_path
- - training_offset
- - hash
- properties:
- object:
- type: string
- enum: [fine-tune-event]
- created_at:
- type: string
- level:
- anyOf:
- - $ref: '#/components/schemas/FinetuneEventLevels'
- message:
- type: string
- type:
- $ref: '#/components/schemas/FinetuneEventType'
- param_count:
- type: integer
- token_count:
- type: integer
- total_steps:
- type: integer
- wandb_url:
- type: string
- step:
- type: integer
- checkpoint_path:
- type: string
- model_path:
- type: string
- training_offset:
- type: integer
- hash:
- type: string
- FineTuneProgress:
+ title: Response
type: object
- description: Progress information for a fine-tuning job
- required:
- - estimate_available
- - seconds_remaining
- properties:
- estimate_available:
- type: boolean
- description: Whether time estimate is available
- seconds_remaining:
- type: integer
- description: Estimated time remaining in seconds for the fine-tuning job to next state
- FinetuneListCheckpoints:
+
+ SessionListResponse:
+ allOf:
+ - properties:
+ errors:
+ items:
+ oneOf:
+ - type: string
+ - additionalProperties: true
+ type: object
+ title: Error
+ type: array
+ title: Response
+ type: object
+ - properties:
+ data:
+ properties:
+ sessions:
+ items:
+ properties:
+ execute_count:
+ type: integer
+ expires_at:
+ format: date-time
+ type: string
+ id:
+ description: Session Identifier. Used to make follow-up calls.
+ example: ses_abcDEF123
+ type: string
+ last_execute_at:
+ format: date-time
+ type: string
+ started_at:
+ format: date-time
+ type: string
+ required:
+ - execute_count
+ - expires_at
+ - id
+ - last_execute_at
+ - started_at
+ type: object
+ type: array
+ required:
+ - sessions
+ type: object
+ title: SessionListResponse
type: object
- required:
- - data
+
+ StreamOutput:
+ description: Outputs that were printed to stdout or stderr
properties:
data:
- type: array
- items:
- $ref: '#/components/schemas/FineTuneCheckpoint'
- FineTuneCheckpoint:
- type: object
- required:
- - step
- - path
- - created_at
- - checkpoint_type
- properties:
- step:
- type: integer
- created_at:
- type: string
- path:
- type: string
- checkpoint_type:
type: string
-
- FullTrainingType:
- type: object
- properties:
type:
+ enum:
+ - stdout
+ - stderr
type: string
- enum: ['Full']
required:
- type
- LoRATrainingType:
+ - data
+ title: StreamOutput
+
+ CreateBatchRequest:
type: object
+ required: [endpoint, input_file_id]
properties:
- type:
+ endpoint:
type: string
- enum: ['Lora']
- lora_r:
- type: integer
- lora_alpha:
+ description: The endpoint to use for batch processing
+ example: '/v1/chat/completions'
+ input_file_id:
+ type: string
+ description: ID of the uploaded input file containing batch requests
+ example: 'file-abc123def456ghi789'
+ completion_window:
+ type: string
+ description: Time window for batch completion (optional)
+ example: '24h'
+ priority:
type: integer
- lora_dropout:
- type: number
- format: float
- default: 0.0
- lora_trainable_modules:
+ description: Priority for batch processing (optional)
+ example: 1
+ model_id:
type: string
- default: 'all-linear'
- required:
- - type
- - lora_r
- - lora_alpha
-
- TrainingMethodSFT:
+ description: 'Model to use for processing batch requests'
+ example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
+ BatchErrorResponse:
type: object
properties:
- method:
+ error:
type: string
- enum: ['sft']
- train_on_inputs:
- oneOf:
- - type: boolean
- - type: string
- enum:
- - auto
- type: boolean
- default: auto
- description: Whether to mask the user messages in conversational data or prompts in instruction data.
- required:
- - method
- - train_on_inputs
- TrainingMethodDPO:
+ BatchJobWithWarning:
type: object
properties:
- method:
+ job:
+ $ref: '#/components/schemas/BatchJob'
+ warning:
type: string
- enum: ['dpo']
- dpo_beta:
- type: number
- format: float
- default: 0.1
- rpo_alpha:
- type: number
- format: float
- default: 0.0
- dpo_normalize_logratios_by_length:
- type: boolean
- default: false
- dpo_reference_free:
- type: boolean
- default: false
- simpo_gamma:
- type: number
- format: float
- default: 0.0
- required:
- - method
-
- LRScheduler:
+ BatchJob:
type: object
properties:
- lr_scheduler_type:
+ id:
type: string
- enum:
- - linear
- - cosine
- lr_scheduler_args:
- oneOf:
- - $ref: '#/components/schemas/LinearLRSchedulerArgs'
- - $ref: '#/components/schemas/CosineLRSchedulerArgs'
- required:
- - lr_scheduler_type
- CosineLRSchedulerArgs:
- type: object
- properties:
- min_lr_ratio:
- type: number
- format: float
- default: 0.0
- description: The ratio of the final learning rate to the peak learning rate
- num_cycles:
- type: number
- format: float
- default: 0.5
- description: Number or fraction of cycles for the cosine learning rate scheduler
- required:
- - min_lr_ratio
- - num_cycles
- LinearLRSchedulerArgs:
- type: object
- properties:
- min_lr_ratio:
- type: number
- format: float
- default: 0.0
- description: The ratio of the final learning rate to the peak learning rate
-
- Autoscaling:
- type: object
- description: Configuration for automatic scaling of replicas based on demand.
- required:
- - min_replicas
- - max_replicas
- properties:
- min_replicas:
- type: integer
- format: int32
- description: The minimum number of replicas to maintain, even when there is no load
- examples:
- - 2
- max_replicas:
+ format: uuid
+ example: '01234567-8901-2345-6789-012345678901'
+ user_id:
+ type: string
+ example: 'user_789xyz012'
+ input_file_id:
+ type: string
+ example: 'file-input123abc456def'
+ file_size_bytes:
type: integer
- format: int32
- description: The maximum number of replicas to scale up to under load
- examples:
- - 5
-
- HardwareSpec:
- type: object
- description: Detailed specifications of a hardware configuration
- required:
- - gpu_type
- - gpu_link
- - gpu_memory
- - gpu_count
- properties:
- gpu_type:
+ format: int64
+ example: 1048576
+ description: 'Size of input file in bytes'
+ status:
+ $ref: '#/components/schemas/BatchJobStatus'
+ job_deadline:
type: string
- description: The type/model of GPU
- examples:
- - a100-80gb
- gpu_link:
+ format: date-time
+ example: '2024-01-15T15:30:00Z'
+ created_at:
type: string
- description: The GPU interconnect technology
- examples:
- - sxm
- gpu_memory:
+ format: date-time
+ example: '2024-01-15T14:30:00Z'
+ endpoint:
+ type: string
+ example: '/v1/chat/completions'
+ progress:
type: number
- format: float
- description: Amount of GPU memory in GB
- examples:
- - 80
- gpu_count:
- type: integer
- format: int32
- description: Number of GPUs in this configuration
- examples:
- - 2
+ format: float64
+ example: 75.0
+ description: 'Completion progress (0.0 to 100)'
+ model_id:
+ type: string
+ example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
+ description: 'Model used for processing requests'
+ output_file_id:
+ type: string
+ example: 'file-output789xyz012ghi'
+ error_file_id:
+ type: string
+ example: 'file-errors456def789jkl'
+ error:
+ type: string
+ completed_at:
+ type: string
+ format: date-time
+ example: '2024-01-15T15:45:30Z'
+ BatchJobStatus:
+ type: string
+ enum:
+ - VALIDATING
+ - IN_PROGRESS
+ - COMPLETED
+ - FAILED
+ - EXPIRED
+ - CANCELLED
+ example: 'IN_PROGRESS'
+ description: 'Current status of the batch job'
- EndpointPricing:
+ EvaluationTypedRequest:
type: object
- description: Pricing details for using an endpoint
required:
- - cents_per_minute
+ - type
+ - parameters
properties:
- cents_per_minute:
- type: number
- format: float
- description: Cost per minute of endpoint uptime in cents
- examples:
- - 5.42
+ type:
+ type: string
+ enum: [classify, score, compare]
+ description: The type of evaluation to perform
+ example: 'classify'
+ parameters:
+ oneOf:
+ - $ref: '#/components/schemas/EvaluationClassifyParameters'
+ - $ref: '#/components/schemas/EvaluationScoreParameters'
+ - $ref: '#/components/schemas/EvaluationCompareParameters'
+ description: Type-specific parameters for the evaluation
- HardwareAvailability:
+ EvaluationClassifyParameters:
type: object
- description: Indicates the current availability status of a hardware configuration
required:
- - status
+ - judge
+ - labels
+ - pass_labels
+ - input_data_file_path
properties:
- status:
+ judge:
+ $ref: '#/components/schemas/EvaluationJudgeModelConfig'
+ labels:
+ type: array
+ items:
+ type: string
+ minItems: 2
+ description: List of possible classification labels
+ example: ['yes', 'no']
+ pass_labels:
+ type: array
+ items:
+ type: string
+ minItems: 1
+ description: List of labels that are considered passing
+ example: ['yes']
+ model_to_evaluate:
+ $ref: '#/components/schemas/EvaluationModelOrString'
+ input_data_file_path:
type: string
- description: The availability status of the hardware configuration
- enum:
- - available
- - unavailable
- - insufficient
+ description: Data file ID
+ example: 'file-1234-aefd'
- HardwareWithStatus:
+ EvaluationScoreParameters:
type: object
- description: Hardware configuration details with optional availability status
required:
- - object
- - id
- - pricing
- - specs
- - updated_at
+ - judge
+ - min_score
+ - max_score
+ - pass_threshold
+ - input_data_file_path
properties:
- object:
- type: string
- enum:
- - hardware
- id:
- type: string
- description: Unique identifier for the hardware configuration
- examples:
- - 2x_nvidia_a100_80gb_sxm
- pricing:
- $ref: '#/components/schemas/EndpointPricing'
- specs:
- $ref: '#/components/schemas/HardwareSpec'
- availability:
- $ref: '#/components/schemas/HardwareAvailability'
- updated_at:
+ judge:
+ $ref: '#/components/schemas/EvaluationJudgeModelConfig'
+ min_score:
+ type: number
+ format: float
+ example: 0.0
+ description: Minimum possible score
+ max_score:
+ type: number
+ format: float
+ example: 10.0
+ description: Maximum possible score
+ pass_threshold:
+ type: number
+ format: float
+ example: 7.0
+ description: Score threshold for passing
+ model_to_evaluate:
+ $ref: '#/components/schemas/EvaluationModelOrString'
+ input_data_file_path:
type: string
- format: date-time
- description: Timestamp of when the hardware status was last updated
+ example: 'file-01234567890123456789'
+ description: Data file ID
- CreateEndpointRequest:
+ EvaluationCompareParameters:
type: object
required:
- - model
- - hardware
- - autoscaling
+ - judge
+ - input_data_file_path
properties:
- display_name:
- type: string
- description: A human-readable name for the endpoint
- examples:
- - My Llama3 70b endpoint
- model:
- type: string
- description: The model to deploy on this endpoint
- examples:
- - meta-llama/Llama-3-8b-chat-hf
- hardware:
- type: string
- description: The hardware configuration to use for this endpoint
- examples:
- - 1x_nvidia_a100_80gb_sxm
- autoscaling:
- $ref: '#/components/schemas/Autoscaling'
- description: Configuration for automatic scaling of the endpoint
- disable_prompt_cache:
- type: boolean
- description: Whether to disable the prompt cache for this endpoint
- default: false
- disable_speculative_decoding:
- type: boolean
- description: Whether to disable speculative decoding for this endpoint
- default: false
- state:
- type: string
- description: The desired state of the endpoint
- enum:
- - STARTED
- - STOPPED
- default: STARTED
- example: STARTED
- inactive_timeout:
- type: integer
- description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to null, omit or set to 0 to disable automatic timeout.
- nullable: true
- example: 60
- availability_zone:
+ judge:
+ $ref: '#/components/schemas/EvaluationJudgeModelConfig'
+ model_a:
+ $ref: '#/components/schemas/EvaluationModelOrString'
+ model_b:
+ $ref: '#/components/schemas/EvaluationModelOrString'
+ input_data_file_path:
type: string
- description: Create the endpoint in a specified availability zone (e.g., us-central-4b)
+ description: Data file name
- DedicatedEndpoint:
+ EvaluationJudgeModelConfig:
type: object
- description: Details about a dedicated endpoint deployment
required:
- - object
- - id
- - name
- - display_name
- model
- - hardware
- - type
- - owner
- - state
- - autoscaling
- - created_at
+ - system_template
+ - model_source
properties:
- object:
- type: string
- enum:
- - endpoint
- description: The type of object
- example: endpoint
- id:
- type: string
- description: Unique identifier for the endpoint
- example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
- name:
- type: string
- description: System name for the endpoint
- example: devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1
- display_name:
- type: string
- description: Human-readable name for the endpoint
- example: My Llama3 70b endpoint
model:
type: string
- description: The model deployed on this endpoint
- example: meta-llama/Llama-3-8b-chat-hf
- hardware:
- type: string
- description: The hardware configuration used for this endpoint
- example: 1x_nvidia_a100_80gb_sxm
- type:
- type: string
- enum:
- - dedicated
- description: The type of endpoint
- example: dedicated
- owner:
+ description: Name of the judge model
+ example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
+ system_template:
type: string
- description: The owner of this endpoint
- example: devuser
- state:
+ description: System prompt template for the judge
+ example: 'Imagine you are a helpful assistant'
+ model_source:
type: string
- enum:
- - PENDING
- - STARTING
- - STARTED
- - STOPPING
- - STOPPED
- - ERROR
- description: Current state of the endpoint
- example: STARTED
- autoscaling:
- $ref: '#/components/schemas/Autoscaling'
- description: Configuration for automatic scaling of the endpoint
- created_at:
+ description: "Source of the judge model."
+ enum: [serverless, dedicated, external]
+ external_api_token:
type: string
- format: date-time
- description: Timestamp when the endpoint was created
- example: 2025-02-04T10:43:55.405Z
+ description: "Bearer/API token for external judge models."
+ external_base_url:
+ type: string
+ description: "Base URL for external judge models. Must be OpenAI-compatible base URL."
- ListEndpoint:
+ EvaluationModelOrString:
+ oneOf:
+ - type: string
+ description: Field name in the input data
+ - $ref: '#/components/schemas/EvaluationModelRequest'
+
+ EvaluationModelRequest:
type: object
- description: Details about an endpoint when listed via the list endpoint
required:
- - id
- - object
- - name
- model
- - type
- - owner
- - state
- - created_at
+ - max_tokens
+ - temperature
+ - system_template
+ - input_template
+ - model_source
properties:
- object:
+ model:
type: string
- enum:
- - endpoint
- description: The type of object
- example: endpoint
- id:
+ description: Name of the model to evaluate
+ example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
+ max_tokens:
+ type: integer
+ minimum: 1
+ description: Maximum number of tokens to generate
+ example: 512
+ temperature:
+ type: number
+ format: float
+ minimum: 0
+ maximum: 2
+ description: Sampling temperature
+ example: 0.7
+ system_template:
type: string
- description: Unique identifier for the endpoint
- example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
- name:
+ description: System prompt template
+ example: 'Imagine you are helpful assistant'
+ input_template:
type: string
- description: System name for the endpoint
- example: allenai/OLMo-7B
- model:
+ description: Input prompt template
+ example: 'Please classify {{prompt}} based on the labels below'
+ model_source:
type: string
- description: The model deployed on this endpoint
- example: allenai/OLMo-7B
- type:
+ description: "Source of the model."
+ enum: [serverless, dedicated, external]
+ external_api_token:
type: string
- enum:
- - serverless
- - dedicated
- description: The type of endpoint
- example: serverless
- owner:
+ description: "Bearer/API token for external models."
+ external_base_url:
type: string
- description: The owner of this endpoint
- example: together
- state:
+ description: "Base URL for external models. Must be OpenAI-compatible base URL"
+
+ EvaluationResponse:
+ type: object
+ properties:
+ workflow_id:
type: string
- enum:
- - PENDING
- - STARTING
- - STARTED
- - STOPPING
- - STOPPED
- - ERROR
- description: Current state of the endpoint
- example: STARTED
- created_at:
+ description: The ID of the created evaluation job
+ example: 'eval-1234-1244513'
+ status:
type: string
- format: date-time
- description: Timestamp when the endpoint was created
- example: 2024-02-28T21:34:35.444Z
+ enum: [pending]
+ description: Initial status of the job
- DisplayorExecuteOutput:
+ EvaluationJob:
+ type: object
properties:
- data:
- properties:
- application/geo+json:
- type: object
- application/javascript:
- type: string
- application/json:
- type: object
- application/pdf:
- format: byte
- type: string
- application/vnd.vega.v5+json:
- type: object
- application/vnd.vegalite.v4+json:
- type: object
- image/gif:
- format: byte
- type: string
- image/jpeg:
- format: byte
- type: string
- image/png:
- format: byte
- type: string
- image/svg+xml:
- type: string
- text/html:
- type: string
- text/latex:
- type: string
- text/markdown:
- type: string
- text/plain:
- type: string
- type: object
+ workflow_id:
+ type: string
+ description: The evaluation job ID
+ example: 'eval-1234aedf'
type:
- enum:
- - display_data
- - execute_result
type: string
- required:
- - type
- - data
- title: DisplayorExecuteOutput
-
- Error:
- oneOf:
- - type: string
- - additionalProperties: true
+ enum: [classify, score, compare]
+ description: The type of evaluation
+ example: classify
+ owner_id:
+ type: string
+ description: ID of the job owner (admin only)
+ status:
+ type: string
+ enum: [pending, queued, running, completed, error, user_error]
+ description: Current status of the job
+ example: completed
+ status_updates:
+ type: array
+ items:
+ $ref: '#/components/schemas/EvaluationJobStatusUpdate'
+ description: History of status updates (admin only)
+ parameters:
type: object
- title: Error
-
- ErrorOutput:
- title: ErrorOutput
- description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
- properties:
- data:
+ description: The parameters used for this evaluation
+ additionalProperties: true
+ created_at:
type: string
- type:
- enum:
- - error
+ format: date-time
+ description: When the job was created
+ example: '2025-07-23T17:10:04.837888Z'
+ updated_at:
type: string
- required:
- - type
- - data
+ format: date-time
+ description: When the job was last updated
+ example: '2025-07-23T17:10:04.837888Z'
+ results:
+ oneOf:
+ - $ref: '#/components/schemas/EvaluationClassifyResults'
+ - $ref: '#/components/schemas/EvaluationScoreResults'
+ - $ref: '#/components/schemas/EvaluationCompareResults'
+ - type: object
+ properties:
+ error:
+ type: string
+ nullable: true
+ description: Results of the evaluation (when completed)
- ExecuteRequest:
- title: ExecuteRequest
- required:
- - language
- - code
+ EvaluationJobStatusUpdate:
+ type: object
properties:
- code:
- description: 'Code snippet to execute.'
- example: "print('Hello, world!')"
+ status:
type: string
- files:
- description: Files to upload to the session. If present, files will be uploaded before executing the given code.
- items:
- properties:
- content:
- type: string
- encoding:
- description: Encoding of the file content. Use `string` for text files such as code, and `base64` for binary files, such as images.
- enum:
- - string
- - base64
- type: string
- name:
- type: string
- required:
- - name
- - encoding
- - content
- type: object
- type: array
- language:
- default: python
- description: Programming language for the code to execute. Currently only supports Python, but more will be added.
- enum:
- - python
- session_id:
- description: Identifier of the current session. Used to make follow-up calls. Requests will return an error if the session does not belong to the caller or has expired.
- example: ses_abcDEF123
- nullable: false
+ description: The status at this update
+ example: pending
+ message:
+ type: string
+ description: Additional message for this update
+ example: Job is pending evaluation
+ timestamp:
+ type: string
+ format: date-time
+ description: When this update occurred
+ example: '2025-07-23T17:10:04.837888Z'
+
+ EvaluationClassifyResults:
+ type: object
+ properties:
+ generation_fail_count:
+ type: number
+ format: integer
+ nullable: true
+ description: Number of failed generations.
+ example: 0
+ judge_fail_count:
+ type: number
+ format: integer
+ nullable: true
+ description: Number of failed judge generations
+ example: 0
+ invalid_label_count:
+ type: number
+ format: float
+ nullable: true
+ description: Number of invalid labels
+ example: 0
+ result_file_id:
+ type: string
+ description: Data File ID
+ example: file-1234-aefd
+ pass_percentage:
+ type: number
+ format: integer
+ nullable: true
+ description: Pecentage of pass labels.
+ example: 10
+ label_counts:
type: string
+ description: JSON string representing label counts
+ example: '{"yes": 10, "no": 0}'
- ExecuteResponse:
- title: ExecuteResponse
+ EvaluationScoreResults:
type: object
- description: 'The result of the execution. If successful, `data` contains the result and `errors` will be null. If unsuccessful, `data` will be null and `errors` will contain the errors.'
- oneOf:
- - title: SuccessfulExecution
- type: object
- required: [data, errors]
- properties:
- errors:
- type: 'null'
- data:
- type: object
- nullable: false
- required: [session_id, outputs]
- properties:
- outputs:
- type: array
- items:
- discriminator:
- propertyName: type
- oneOf:
- - title: StreamOutput
- description: Outputs that were printed to stdout or stderr
- type: object
- required: [type, data]
- properties:
- type:
- enum:
- - stdout
- - stderr
- type: string
- data:
- type: string
- - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
- properties:
- data:
- type: string
- type:
- enum:
- - error
- type: string
- required:
- - type
- - data
- title: ErrorOutput
- - properties:
- data:
- properties:
- application/geo+json:
- type: object
- additionalProperties: true
- application/javascript:
- type: string
- application/json:
- type: object
- additionalProperties: true
- application/pdf:
- format: byte
- type: string
- application/vnd.vega.v5+json:
- type: object
- additionalProperties: true
- application/vnd.vegalite.v4+json:
- type: object
- additionalProperties: true
- image/gif:
- format: byte
- type: string
- image/jpeg:
- format: byte
- type: string
- image/png:
- format: byte
- type: string
- image/svg+xml:
- type: string
- text/html:
- type: string
- text/latex:
- type: string
- text/markdown:
- type: string
- text/plain:
- type: string
- type: object
- type:
- enum:
- - display_data
- - execute_result
- type: string
- required:
- - type
- - data
- title: DisplayorExecuteOutput
- title: InterpreterOutput
- session_id:
- type: string
- description: Identifier of the current session. Used to make follow-up calls.
- example: ses_abcDEF123
- nullable: false
- status:
- type: string
- enum:
- - success
- description: Status of the execution. Currently only supports success.
- - title: FailedExecution
+ properties:
+ aggregated_scores:
type: object
- required: [data, errors]
- properties:
- data:
- type: 'null'
- errors:
- type: array
- items:
- title: Error
- oneOf:
- - type: string
- - type: object
- additionalProperties: true
-
- InterpreterOutput:
- discriminator:
- propertyName: type
- oneOf:
- - description: Outputs that were printed to stdout or stderr
- properties:
- data:
- type: string
- type:
- enum:
- - stdout
- - stderr
- type: string
- required:
- - type
- - data
- title: StreamOutput
- - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
properties:
- data:
- type: string
- type:
- enum:
- - error
- type: string
- required:
- - type
- - data
- title: ErrorOutput
- - properties:
- data:
- properties:
- application/geo+json:
- type: object
- application/javascript:
- type: string
- application/json:
- type: object
- application/pdf:
- format: byte
- type: string
- application/vnd.vega.v5+json:
- type: object
- application/vnd.vegalite.v4+json:
- type: object
- image/gif:
- format: byte
- type: string
- image/jpeg:
- format: byte
- type: string
- image/png:
- format: byte
- type: string
- image/svg+xml:
- type: string
- text/html:
- type: string
- text/latex:
- type: string
- text/markdown:
- type: string
- text/plain:
- type: string
- type: object
- type:
- enum:
- - display_data
- - execute_result
- type: string
- required:
- - type
- - data
- title: DisplayorExecuteOutput
- title: InterpreterOutput
+ mean_score:
+ type: number
+ format: float
+ std_score:
+ type: number
+ format: float
+ pass_percentage:
+ type: number
+ format: float
+ generation_fail_count:
+ type: number
+ format: integer
+ nullable: true
+ description: Number of failed generations.
+ example: 0
+ judge_fail_count:
+ type: number
+ format: integer
+ nullable: true
+ description: Number of failed judge generations
+ example: 0
+ invalid_score_count:
+ type: number
+ format: integer
+ description: number of invalid scores generated from model
+ failed_samples:
+ type: number
+ format: integer
+ description: number of failed samples generated from model
+ result_file_id:
+ type: string
+ description: Data File ID
+ example: file-1234-aefd
- Response:
+ EvaluationCompareResults:
+ type: object
properties:
- errors:
+ num_samples:
+ type: integer
+ description: Total number of samples compared
+ A_wins:
+ type: integer
+ description: Number of times model A won
+ B_wins:
+ type: integer
+ description: Number of times model B won
+ Ties:
+ type: integer
+ description: Number of ties
+ generation_fail_count:
+ type: number
+ format: integer
+ nullable: true
+ description: Number of failed generations.
+ example: 0
+ judge_fail_count:
+ type: number
+ format: integer
+ nullable: true
+ description: Number of failed judge generations
+ example: 0
+ result_file_id:
+ type: string
+ description: Data File ID
+
+ AudioFileBinary:
+ type: string
+ format: binary
+ description: Audio file to transcribe
+
+ AudioFileUrl:
+ type: string
+ format: uri
+ description: Public HTTPS URL to audio file
+
+ CreateVideoBody:
+ title: Create video request
+ description: Parameters for creating a new video generation job.
+ type: object
+ required:
+ - model
+ properties:
+ model:
+ type: string
+ description: The model to be used for the video creation request.
+ prompt:
+ type: string
+ maxLength: 32000
+ minLength: 1
+ description: Text prompt that describes the video to generate.
+ height:
+ type: integer
+ width:
+ type: integer
+ seconds:
+ type: string
+ description: Clip duration in seconds.
+ fps:
+ type: integer
+ description: Frames per second. Defaults to 24.
+ steps:
+ type: integer
+ minimum: 10
+ maximum: 50
+ description: The number of denoising steps the model performs during video generation. More steps typically result in higher quality output but require longer processing time.
+ seed:
+ type: integer
+ description: Seed to use in initializing the video generation. Using the same seed allows deterministic video generation. If not provided a random seed is generated for each request.
+ guidance_scale:
+ type: integer
+ description: Controls how closely the video generation follows your prompt. Higher values make the model adhere more strictly to your text description, while lower values allow more creative freedom. guidence_scale affects both visual content and temporal consistency.Recommended range is 6.0-10.0 for most video models. Values above 12 may cause over-guidance artifacts or unnatural motion patterns.
+ output_format:
+ $ref: '#/components/schemas/VideoOutputFormat'
+ description: Specifies the format of the output video. Defaults to MP4.
+ output_quality:
+ type: integer
+ description: Compression quality. Defaults to 20.
+ negative_prompt:
+ type: string
+ description: Similar to prompt, but specifies what to avoid instead of what to include
+ frame_images:
+ description: Array of images to guide video generation, similar to keyframes.
+ example:
+ - [
+ {
+ "input_image": "aac49721-1964-481a-ae78-8a4e29b91402",
+ "frame": 0
+ },
+ {
+ "input_image": "c00abf5f-6cdb-4642-a01d-1bfff7bc3cf7",
+ "frame": 48
+ },
+ {
+ "input_image": "3ad204c3-a9de-4963-8a1a-c3911e3afafe",
+ "frame": "last"
+ }
+ ]
+ type: array
items:
- oneOf:
- - type: string
- - additionalProperties: true
- type: object
- title: Error
+ $ref: '#/components/schemas/VideoFrameImageInput'
+ reference_images:
+ description: Unlike frame_images which constrain specific timeline positions, reference images guide the general appearance that should appear consistently across the video.
type: array
- title: Response
- type: object
+ items:
+ type: string
+ VideoStatus:
+ description: Current lifecycle status of the video job.
+ type: string
+ enum:
+ - in_progress
+ - completed
+ - failed
- SessionListResponse:
- allOf:
- - properties:
- errors:
- items:
- oneOf:
- - type: string
- - additionalProperties: true
- type: object
- title: Error
- type: array
- title: Response
- type: object
- - properties:
- data:
- properties:
- sessions:
- items:
- properties:
- execute_count:
- type: integer
- expires_at:
- format: date-time
- type: string
- id:
- description: Session Identifier. Used to make follow-up calls.
- example: ses_abcDEF123
- type: string
- last_execute_at:
- format: date-time
- type: string
- started_at:
- format: date-time
- type: string
- required:
- - execute_count
- - expires_at
- - id
- - last_execute_at
- - started_at
- type: object
- type: array
- required:
- - sessions
- type: object
- title: SessionListResponse
+ VideoFrameImageInput:
type: object
+ required: ['input_image']
+ properties:
+ input_image:
+ type: string
+ description: URL path to hosted image that is used for a frame
+ frame:
+ description: |
+ Optional param to specify where to insert the frame. If this is omitted, the following heuristics are applied:
+ - frame_images size is one, frame is first.
+ - If size is two, frames are first and last.
+ - If size is larger, frames are first, last and evenly spaced between.
+ anyOf:
+ - type: number
+ - type: string
+ enum:
+ - first
+ - last
- StreamOutput:
- description: Outputs that were printed to stdout or stderr
+ VideoOutputFormat:
+ type: string
+ enum:
+ - MP4
+ - WEBM
+
+ VideoJob:
properties:
- data:
+ id:
+ type: string
+ description: Unique identifier for the video job.
+ object:
+ description: The object type, which is always video.
type: string
- type:
enum:
- - stdout
- - stderr
+ - video
+ model:
type: string
- required:
- - type
- - data
- title: StreamOutput
-
- CreateBatchRequest:
+ description: The video generation model that produced the job.
+ status:
+ $ref: '#/components/schemas/VideoStatus'
+ description: Current lifecycle status of the video job.
+ created_at:
+ type: number
+ description: Unix timestamp (seconds) for when the job was created.
+ completed_at:
+ type: number
+ description: Unix timestamp (seconds) for when the job completed, if finished.
+ size:
+ type: string
+ description: The resolution of the generated video.
+ seconds:
+ type: string
+ description: Duration of the generated clip in seconds.
+ error:
+ description: Error payload that explains why generation failed, if applicable.
+ type: object
+ properties:
+ code:
+ type: string
+ message:
+ type: string
+ required:
+ - message
+ outputs:
+ description: Available upon completion, the outputs provides the cost charged and the hosted url to access the video
+ type: object
+ properties:
+ cost:
+ type: integer
+ description: The cost of generated video charged to the owners account.
+ video_url:
+ type: string
+ description: URL hosting the generated video
+ required:
+ - cost
+ - video_url
type: object
- required: [endpoint, input_file_id]
+ required:
+ - id
+ - model
+ - status
+ - size
+ - seconds
+ - created_at
+ title: Video job
+ description: Structured information describing a generated video job.
+ ContainerStatus:
properties:
- endpoint:
+ finishedAt:
+ description: FinishedAt is the timestamp when the container finished execution
+ (if terminated)
type: string
- description: The endpoint to use for batch processing
- example: '/v1/chat/completions'
- input_file_id:
+ message:
+ description: Message provides a human-readable message with details about the
+ container's status
type: string
- description: ID of the uploaded input file containing batch requests
- example: 'file-abc123def456ghi789'
- completion_window:
+ name:
+ description: Name is the name of the container
type: string
- description: Time window for batch completion (optional)
- example: '24h'
- priority:
- type: integer
- description: Priority for batch processing (optional)
- example: 1
- model_id:
+ reason:
+ description: Reason provides a brief machine-readable reason for the container's
+ current status
type: string
- description: 'Model to use for processing batch requests'
- example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
- BatchErrorResponse:
- type: object
- properties:
- error:
+ startedAt:
+ description: StartedAt is the timestamp when the container started execution
type: string
- BatchJobWithWarning:
- type: object
- properties:
- job:
- $ref: '#/components/schemas/BatchJob'
- warning:
+ status:
+ description: Status is the current state of the container (e.g., "Running",
+ "Terminated", "Waiting")
type: string
- BatchJob:
type: object
+ CreateDeploymentRequest:
properties:
- id:
- type: string
- format: uuid
- example: '01234567-8901-2345-6789-012345678901'
- user_id:
- type: string
- example: 'user_789xyz012'
- input_file_id:
+ args:
+ description: Args overrides the container's CMD. Provide as an array of
+ arguments (e.g., ["python", "app.py"])
+ items:
+ type: string
+ type: array
+ autoscaling:
+ additionalProperties:
+ type: string
+ description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
+ "QueueBacklogPerWorker", "target": "10"} to scale based on queue
+ backlog'
+ type: object
+ command:
+ description: Command overrides the container's ENTRYPOINT. Provide as an array
+ (e.g., ["/bin/sh", "-c"])
+ items:
+ type: string
+ type: array
+ cpu:
+ description: CPU is the number of CPU cores to allocate per container instance
+ (e.g., 0.1 = 100 milli cores)
+ minimum: 0.1
+ type: number
+ description:
+ description: Description is an optional human-readable description of your
+ deployment
type: string
- example: 'file-input123abc456def'
- file_size_bytes:
+ environment_variables:
+ description: EnvironmentVariables is a list of environment variables to set in
+ the container. Each must have a name and either a value or
+ value_from_secret
+ items:
+ $ref: "#/components/schemas/EnvironmentVariable"
+ type: array
+ gpu_count:
+ description: GPUCount is the number of GPUs to allocate per container instance.
+ Defaults to 0 if not specified
type: integer
- format: int64
- example: 1048576
- description: 'Size of input file in bytes'
- status:
- $ref: '#/components/schemas/BatchJobStatus'
- job_deadline:
+ gpu_type:
+ description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
+ enum:
+ - h100-80gb
+ - " a100-80gb"
type: string
- format: date-time
- example: '2024-01-15T15:30:00Z'
- created_at:
+ health_check_path:
+ description: HealthCheckPath is the HTTP path for health checks (e.g.,
+ "/health"). If set, the platform will check this endpoint to
+ determine container health
type: string
- format: date-time
- example: '2024-01-15T14:30:00Z'
- endpoint:
+ image:
+ description: Image is the container image to deploy from registry.together.ai.
type: string
- example: '/v1/chat/completions'
- progress:
+ max_replicas:
+ description: MaxReplicas is the maximum number of container instances that can
+ be scaled up to. If not set, will be set to MinReplicas
+ type: integer
+ memory:
+ description: Memory is the amount of RAM to allocate per container instance in
+ GiB (e.g., 0.5 = 512MiB)
+ minimum: 0.1
type: number
- format: float64
- example: 75.0
- description: 'Completion progress (0.0 to 100)'
- model_id:
+ min_replicas:
+ description: MinReplicas is the minimum number of container instances to run.
+ Defaults to 1 if not specified
+ type: integer
+ name:
+ description: Name is the unique identifier for your deployment. Must contain
+ only alphanumeric characters, underscores, or hyphens (1-100
+ characters)
+ maxLength: 100
+ minLength: 1
type: string
- example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
- description: 'Model used for processing requests'
- output_file_id:
+ port:
+ description: Port is the container port your application listens on (e.g., 8080
+ for web servers). Required if your application serves traffic
+ type: integer
+ storage:
+ description: Storage is the amount of ephemeral disk storage to allocate per
+ container instance (e.g., 10 = 10GiB)
+ type: integer
+ termination_grace_period_seconds:
+ description: TerminationGracePeriodSeconds is the time in seconds to wait for
+ graceful shutdown before forcefully terminating the replica
+ type: integer
+ volumes:
+ description: Volumes is a list of volume mounts to attach to the container. Each
+ mount must reference an existing volume by name
+ items:
+ $ref: "#/components/schemas/VolumeMount"
+ type: array
+ required:
+ - gpu_type
+ - image
+ - name
+ type: object
+ CreateSecretRequest:
+ properties:
+ description:
+ description: Description is an optional human-readable description of the
+ secret's purpose (max 500 characters)
+ maxLength: 500
type: string
- example: 'file-output789xyz012ghi'
- error_file_id:
+ name:
+ description: Name is the unique identifier for the secret. Can contain
+ alphanumeric characters, underscores, hyphens, forward slashes, and
+ periods (1-100 characters)
+ maxLength: 100
+ minLength: 1
type: string
- example: 'file-errors456def789jkl'
- error:
+ project_id:
+ description: ProjectID is ignored - the project is automatically determined from
+ your authentication
type: string
- completed_at:
+ value:
+ description: Value is the sensitive data to store securely (e.g., API keys,
+ passwords, tokens). This value will be encrypted at rest
+ minLength: 1
type: string
- format: date-time
- example: '2024-01-15T15:45:30Z'
- BatchJobStatus:
- type: string
- enum:
- - VALIDATING
- - IN_PROGRESS
- - COMPLETED
- - FAILED
- - EXPIRED
- - CANCELLED
- example: 'IN_PROGRESS'
- description: 'Current status of the batch job'
-
- EvaluationTypedRequest:
+ required:
+ - name
+ - value
type: object
+ CreateVolumeRequest:
+ properties:
+ content:
+ allOf:
+ - $ref: "#/components/schemas/VolumeContent"
+ description: Content specifies the content configuration for this volume
+ name:
+ description: Name is the unique identifier for the volume within the project
+ type: string
+ type:
+ allOf:
+ - $ref: "#/components/schemas/VolumeType"
+ description: Type is the volume type (currently only "readOnly" is supported)
required:
+ - content
+ - name
- type
- - parameters
+ type: object
+ DeploymentListResponse:
properties:
- type:
+ data:
+ description: Data is the array of deployment items
+ items:
+ $ref: "#/components/schemas/DeploymentResponseItem"
+ type: array
+ object:
+ description: Object is the type identifier for this response (always "list")
type: string
- enum: [classify, score, compare]
- description: The type of evaluation to perform
- example: 'classify'
- parameters:
- oneOf:
- - $ref: '#/components/schemas/EvaluationClassifyParameters'
- - $ref: '#/components/schemas/EvaluationScoreParameters'
- - $ref: '#/components/schemas/EvaluationCompareParameters'
- description: Type-specific parameters for the evaluation
-
- EvaluationClassifyParameters:
type: object
- required:
- - judge
- - labels
- - pass_labels
- - input_data_file_path
+ DeploymentLogs:
properties:
- judge:
- $ref: '#/components/schemas/EvaluationJudgeModelConfig'
- labels:
+ lines:
+ items:
+ type: string
type: array
+ type: object
+ DeploymentResponseItem:
+ properties:
+ args:
+ description: Args are the arguments passed to the container's command
items:
type: string
- minItems: 2
- description: List of possible classification labels
- example: ['yes', 'no']
- pass_labels:
type: array
+ autoscaling:
+ additionalProperties:
+ type: string
+ description: Autoscaling contains autoscaling configuration parameters for this
+ deployment
+ type: object
+ command:
+ description: Command is the entrypoint command run in the container
items:
type: string
- minItems: 1
- description: List of labels that are considered passing
- example: ['yes']
- model_to_evaluate:
- $ref: '#/components/schemas/EvaluationModelOrString'
- input_data_file_path:
- type: string
- description: Data file ID
- example: 'file-1234-aefd'
-
- EvaluationScoreParameters:
- type: object
- required:
- - judge
- - min_score
- - max_score
- - pass_threshold
- - input_data_file_path
- properties:
- judge:
- $ref: '#/components/schemas/EvaluationJudgeModelConfig'
- min_score:
- type: number
- format: float
- example: 0.0
- description: Minimum possible score
- max_score:
+ type: array
+ cpu:
+ description: CPU is the amount of CPU resource allocated to each replica in
+ cores (fractional value is allowed)
type: number
- format: float
- example: 10.0
- description: Maximum possible score
- pass_threshold:
+ created_at:
+ description: CreatedAt is the ISO8601 timestamp when this deployment was created
+ type: string
+ description:
+ description: Description provides a human-readable explanation of the
+ deployment's purpose or content
+ type: string
+ desired_replicas:
+ description: DesiredReplicas is the number of replicas that the orchestrator is
+ targeting
+ type: integer
+ environment_variables:
+ description: EnvironmentVariables is a list of environment variables set in the
+ container
+ items:
+ $ref: "#/components/schemas/EnvironmentVariable"
+ type: array
+ gpu_count:
+ description: GPUCount is the number of GPUs allocated to each replica in this
+ deployment
+ type: integer
+ gpu_type:
+ description: GPUType specifies the type of GPU requested (if any) for this
+ deployment
+ enum:
+ - h100-80gb
+ - " a100-80gb"
+ type: string
+ health_check_path:
+ description: HealthCheckPath is the HTTP path used for health checks of the
+ application
+ type: string
+ id:
+ description: ID is the unique identifier of the deployment
+ type: string
+ image:
+ description: Image specifies the container image used for this deployment
+ type: string
+ max_replicas:
+ description: MaxReplicas is the maximum number of replicas to run for this
+ deployment
+ type: integer
+ memory:
+ description: Memory is the amount of memory allocated to each replica in GiB
+ (fractional value is allowed)
type: number
- format: float
- example: 7.0
- description: Score threshold for passing
- model_to_evaluate:
- $ref: '#/components/schemas/EvaluationModelOrString'
- input_data_file_path:
+ min_replicas:
+ description: MinReplicas is the minimum number of replicas to run for this
+ deployment
+ type: integer
+ name:
+ description: Name is the name of the deployment
+ type: string
+ object:
+ description: Object is the type identifier for this response (always "deployment")
+ type: string
+ port:
+ description: Port is the container port that the deployment exposes
+ type: integer
+ ready_replicas:
+ description: ReadyReplicas is the current number of replicas that are in the
+ Ready state
+ type: integer
+ replica_events:
+ additionalProperties:
+ $ref: "#/components/schemas/ReplicaEvent"
+ description: ReplicaEvents is a mapping of replica names or IDs to their status
+ events
+ type: object
+ status:
+ allOf:
+ - $ref: "#/components/schemas/DeploymentStatus"
+ description: Status represents the overall status of the deployment (e.g.,
+ Updating, Scaling, Ready, Failed)
+ enum:
+ - Updating
+ - Scaling
+ - Ready
+ - Failed
+ storage:
+ description: Storage is the amount of storage (in MB or units as defined by the
+ platform) allocated to each replica
+ type: integer
+ updated_at:
+ description: UpdatedAt is the ISO8601 timestamp when this deployment was last
+ updated
type: string
- example: 'file-01234567890123456789'
- description: Data file ID
-
- EvaluationCompareParameters:
+ volumes:
+ description: Volumes is a list of volume mounts for this deployment
+ items:
+ $ref: "#/components/schemas/VolumeMount"
+ type: array
type: object
- required:
- - judge
- - input_data_file_path
+ DeploymentStatus:
+ enum:
+ - Updating
+ - Scaling
+ - Ready
+ - Failed
+ type: string
+ x-enum-varnames:
+ - DeploymentStatusUpdating
+ - DeploymentStatusScaling
+ - DeploymentStatusReady
+ - DeploymentStatusFailed
+ EnvironmentVariable:
properties:
- judge:
- $ref: '#/components/schemas/EvaluationJudgeModelConfig'
- model_a:
- $ref: '#/components/schemas/EvaluationModelOrString'
- model_b:
- $ref: '#/components/schemas/EvaluationModelOrString'
- input_data_file_path:
+ name:
+ description: Name is the environment variable name (e.g., "DATABASE_URL"). Must
+ start with a letter or underscore, followed by letters, numbers, or
+ underscores
+ type: string
+ value:
+ description: Value is the plain text value for the environment variable. Use
+ this for non-sensitive values. Either Value or ValueFromSecret must
+ be set, but not both
+ type: string
+ value_from_secret:
+ description: ValueFromSecret references a secret by name or ID to use as the
+ value. Use this for sensitive values like API keys or passwords.
+ Either Value or ValueFromSecret must be set, but not both
type: string
- description: Data file name
-
- EvaluationJudgeModelConfig:
- type: object
required:
- - model
- - system_template
- - model_source
+ - name
+ type: object
+ ImageListResponse:
properties:
- model:
- type: string
- description: Name of the judge model
- example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
- system_template:
+ data:
+ description: Data is the array of image items
+ items:
+ $ref: "#/components/schemas/ImageResponseItem"
+ type: array
+ object:
+ description: Object is the type identifier for this response (always "list")
type: string
- description: System prompt template for the judge
- example: 'Imagine you are a helpful assistant'
- model_source:
+ type: object
+ ImageResponseItem:
+ properties:
+ object:
+ description: Object is the type identifier for this response (always "image")
type: string
- description: "Source of the judge model."
- enum: [serverless, dedicated, external]
- external_api_token:
+ tag:
+ description: Tag is the image tag/version identifier (e.g., "latest", "v1.0.0")
type: string
- description: "Bearer/API token for external judge models."
- external_base_url:
+ url:
+ description: URL is the full registry URL for this image including tag (e.g.,
+ "registry.together.ai/project-id/repository:tag")
type: string
- description: "Base URL for external judge models. Must be OpenAI-compatible base URL."
-
- EvaluationModelOrString:
- oneOf:
- - type: string
- description: Field name in the input data
- - $ref: '#/components/schemas/EvaluationModelRequest'
-
- EvaluationModelRequest:
type: object
- required:
- - model
- - max_tokens
- - temperature
- - system_template
- - input_template
- - model_source
+ KubernetesEvent:
properties:
- model:
+ action:
+ description: Action is the action taken or reported by this event
type: string
- description: Name of the model to evaluate
- example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
- max_tokens:
+ count:
+ description: Count is the number of times this event has occurred
type: integer
- minimum: 1
- description: Maximum number of tokens to generate
- example: 512
- temperature:
- type: number
- format: float
- minimum: 0
- maximum: 2
- description: Sampling temperature
- example: 0.7
- system_template:
- type: string
- description: System prompt template
- example: 'Imagine you are helpful assistant'
- input_template:
+ first_seen:
+ description: FirstSeen is the timestamp when this event was first observed
type: string
- description: Input prompt template
- example: 'Please classify {{prompt}} based on the labels below'
- model_source:
+ last_seen:
+ description: LastSeen is the timestamp when this event was last observed
type: string
- description: "Source of the model."
- enum: [serverless, dedicated, external]
- external_api_token:
+ message:
+ description: Message is a human-readable description of the event
type: string
- description: "Bearer/API token for external models."
- external_base_url:
+ reason:
+ description: Reason is a brief machine-readable reason for this event (e.g.,
+ "Pulling", "Started", "Failed")
type: string
- description: "Base URL for external models. Must be OpenAI-compatible base URL"
-
- EvaluationResponse:
type: object
+ ListSecretsResponse:
properties:
- workflow_id:
+ data:
+ description: Data is the array of secret items
+ items:
+ $ref: "#/components/schemas/SecretResponseItem"
+ type: array
+ object:
+ description: Object is the type identifier for this response (always "list")
type: string
- description: The ID of the created evaluation job
- example: 'eval-1234-1244513'
- status:
+ type: object
+ ListVolumesResponse:
+ properties:
+ data:
+ description: Data is the array of volume items
+ items:
+ $ref: "#/components/schemas/VolumeResponseItem"
+ type: array
+ object:
+ description: Object is the type identifier for this response (always "list")
type: string
- enum: [pending]
- description: Initial status of the job
-
- EvaluationJob:
type: object
+ ReplicaEvent:
properties:
- workflow_id:
+ container_status:
+ allOf:
+ - $ref: "#/components/schemas/ContainerStatus"
+ description: ContainerStatus provides detailed status information about the
+ container within this replica
+ events:
+ description: Events is a list of Kubernetes events related to this replica for
+ troubleshooting
+ items:
+ $ref: "#/components/schemas/KubernetesEvent"
+ type: array
+ replica_completed_at:
+ description: ReplicaCompletedAt is the timestamp when the replica finished
+ execution
type: string
- description: The evaluation job ID
- example: 'eval-1234aedf'
- type:
+ replica_marked_for_termination_at:
+ description: ReplicaMarkedForTerminationAt is the timestamp when the replica was
+ marked for termination
type: string
- enum: [classify, score, compare]
- description: The type of evaluation
- example: classify
- owner_id:
+ replica_ready_since:
+ description: ReplicaReadySince is the timestamp when the replica became ready to
+ serve traffic
type: string
- description: ID of the job owner (admin only)
- status:
+ replica_running_since:
+ description: ReplicaRunningSince is the timestamp when the replica entered the
+ running state
type: string
- enum: [pending, queued, running, completed, error, user_error]
- description: Current status of the job
- example: completed
- status_updates:
- type: array
- items:
- $ref: '#/components/schemas/EvaluationJobStatusUpdate'
- description: History of status updates (admin only)
- parameters:
- type: object
- description: The parameters used for this evaluation
- additionalProperties: true
- created_at:
+ replica_started_at:
+ description: ReplicaStartedAt is the timestamp when the replica was created
type: string
- format: date-time
- description: When the job was created
- example: '2025-07-23T17:10:04.837888Z'
- updated_at:
+ replica_status:
+ description: ReplicaStatus is the current status of the replica (e.g.,
+ "Running", "Pending", "Failed")
type: string
- format: date-time
- description: When the job was last updated
- example: '2025-07-23T17:10:04.837888Z'
- results:
- oneOf:
- - $ref: '#/components/schemas/EvaluationClassifyResults'
- - $ref: '#/components/schemas/EvaluationScoreResults'
- - $ref: '#/components/schemas/EvaluationCompareResults'
- - type: object
- properties:
- error:
- type: string
- nullable: true
- description: Results of the evaluation (when completed)
-
- EvaluationJobStatusUpdate:
- type: object
- properties:
- status:
+ replica_status_message:
+ description: ReplicaStatusMessage provides a human-readable message explaining
+ the replica's status
type: string
- description: The status at this update
- example: pending
- message:
+ replica_status_reason:
+ description: ReplicaStatusReason provides a brief machine-readable reason for
+ the replica's status
type: string
- description: Additional message for this update
- example: Job is pending evaluation
- timestamp:
+ scheduled_on_cluster:
+ description: ScheduledOnCluster identifies which cluster this replica is
+ scheduled on
type: string
- format: date-time
- description: When this update occurred
- example: '2025-07-23T17:10:04.837888Z'
-
- EvaluationClassifyResults:
type: object
+ RepositoryListResponse:
properties:
- generation_fail_count:
- type: number
- format: integer
- nullable: true
- description: Number of failed generations.
- example: 0
- judge_fail_count:
- type: number
- format: integer
- nullable: true
- description: Number of failed judge generations
- example: 0
- invalid_label_count:
- type: number
- format: float
- nullable: true
- description: Number of invalid labels
- example: 0
- result_file_id:
- type: string
- description: Data File ID
- example: file-1234-aefd
- pass_percentage:
- type: number
- format: integer
- nullable: true
- description: Pecentage of pass labels.
- example: 10
- label_counts:
+ data:
+ description: Data is the array of repository items
+ items:
+ $ref: "#/components/schemas/RepositoryResponseItem"
+ type: array
+ object:
+ description: Object is the type identifier for this response (always "list")
type: string
- description: JSON string representing label counts
- example: '{"yes": 10, "no": 0}'
-
- EvaluationScoreResults:
type: object
+ RepositoryResponseItem:
properties:
- aggregated_scores:
- type: object
- properties:
- mean_score:
- type: number
- format: float
- std_score:
- type: number
- format: float
- pass_percentage:
- type: number
- format: float
- generation_fail_count:
- type: number
- format: integer
- nullable: true
- description: Number of failed generations.
- example: 0
- judge_fail_count:
- type: number
- format: integer
- nullable: true
- description: Number of failed judge generations
- example: 0
- invalid_score_count:
- type: number
- format: integer
- description: number of invalid scores generated from model
- failed_samples:
- type: number
- format: integer
- description: number of failed samples generated from model
- result_file_id:
+ id:
+ description: ID is the unique identifier for this repository (repository name
+ with slashes replaced by "___")
+ type: string
+ object:
+ description: Object is the type identifier for this response (always
+ "image-repository")
+ type: string
+ url:
+ description: URL is the full registry URL for this repository (e.g.,
+ "registry.together.ai/project-id/repository-name")
type: string
- description: Data File ID
- example: file-1234-aefd
-
- EvaluationCompareResults:
type: object
+ SecretResponseItem:
properties:
- num_samples:
- type: integer
- description: Total number of samples compared
- A_wins:
- type: integer
- description: Number of times model A won
- B_wins:
- type: integer
- description: Number of times model B won
- Ties:
- type: integer
- description: Number of ties
- generation_fail_count:
- type: number
- format: integer
- nullable: true
- description: Number of failed generations.
- example: 0
- judge_fail_count:
- type: number
- format: integer
- nullable: true
- description: Number of failed judge generations
- example: 0
- result_file_id:
+ created_at:
+ description: CreatedAt is the ISO8601 timestamp when this secret was created
+ type: string
+ created_by:
+ description: CreatedBy is the identifier of the user who created this secret
+ type: string
+ description:
+ description: Description is a human-readable description of the secret's purpose
+ type: string
+ id:
+ description: ID is the unique identifier for this secret
+ type: string
+ last_updated_by:
+ description: LastUpdatedBy is the identifier of the user who last updated this
+ secret
+ type: string
+ name:
+ description: Name is the name/key of the secret
+ type: string
+ object:
+ description: Object is the type identifier for this response (always "secret")
+ type: string
+ updated_at:
+ description: UpdatedAt is the ISO8601 timestamp when this secret was last updated
type: string
- description: Data File ID
-
- AudioFileBinary:
- type: string
- format: binary
- description: Audio file to transcribe
-
- AudioFileUrl:
- type: string
- format: uri
- description: Public HTTPS URL to audio file
-
- CreateVideoBody:
- title: Create video request
- description: Parameters for creating a new video generation job.
type: object
- required:
- - model
+ UpdateDeploymentRequest:
properties:
- model:
- type: string
- description: The model to be used for the video creation request.
- prompt:
+ args:
+ description: Args overrides the container's CMD. Provide as an array of
+ arguments (e.g., ["python", "app.py"])
+ items:
+ type: string
+ type: array
+ autoscaling:
+ additionalProperties:
+ type: string
+ description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
+ "QueueBacklogPerWorker", "target": "10"} to scale based on queue
+ backlog'
+ type: object
+ command:
+ description: Command overrides the container's ENTRYPOINT. Provide as an array
+ (e.g., ["/bin/sh", "-c"])
+ items:
+ type: string
+ type: array
+ cpu:
+ description: CPU is the number of CPU cores to allocate per container instance
+ (e.g., 0.1 = 100 milli cores)
+ minimum: 0.1
+ type: number
+ description:
+ description: Description is an optional human-readable description of your
+ deployment
type: string
- maxLength: 32000
- minLength: 1
- description: Text prompt that describes the video to generate.
- height:
- type: integer
- width:
+ environment_variables:
+ description: EnvironmentVariables is a list of environment variables to set in
+ the container. This will replace all existing environment variables
+ items:
+ $ref: "#/components/schemas/EnvironmentVariable"
+ type: array
+ gpu_count:
+ description: GPUCount is the number of GPUs to allocate per container instance
type: integer
- seconds:
+ gpu_type:
+ description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
+ enum:
+ - h100-80gb
+ - " a100-80gb"
type: string
- description: Clip duration in seconds.
- fps:
+ health_check_path:
+ description: HealthCheckPath is the HTTP path for health checks (e.g.,
+ "/health"). Set to empty string to disable health checks
+ type: string
+ image:
+ description: Image is the container image to deploy from registry.together.ai.
+ type: string
+ max_replicas:
+ description: MaxReplicas is the maximum number of replicas that can be scaled up
+ to.
type: integer
- description: Frames per second. Defaults to 24.
- steps:
+ memory:
+ description: Memory is the amount of RAM to allocate per container instance in
+ GiB (e.g., 0.5 = 512MiB)
+ minimum: 0.1
+ type: number
+ min_replicas:
+ description: MinReplicas is the minimum number of replicas to run
type: integer
- minimum: 10
- maximum: 50
- description: The number of denoising steps the model performs during video generation. More steps typically result in higher quality output but require longer processing time.
- seed:
+ name:
+ description: Name is the new unique identifier for your deployment. Must contain
+ only alphanumeric characters, underscores, or hyphens (1-100
+ characters)
+ maxLength: 100
+ minLength: 1
+ type: string
+ port:
+ description: Port is the container port your application listens on (e.g., 8080
+ for web servers)
type: integer
- description: Seed to use in initializing the video generation. Using the same seed allows deterministic video generation. If not provided a random seed is generated for each request.
- guidance_scale:
+ storage:
+ description: Storage is the amount of ephemeral disk storage to allocate per
+ container instance (e.g., 10 = 10GiB)
type: integer
- description: Controls how closely the video generation follows your prompt. Higher values make the model adhere more strictly to your text description, while lower values allow more creative freedom. guidence_scale affects both visual content and temporal consistency.Recommended range is 6.0-10.0 for most video models. Values above 12 may cause over-guidance artifacts or unnatural motion patterns.
- output_format:
- $ref: '#/components/schemas/VideoOutputFormat'
- description: Specifies the format of the output video. Defaults to MP4.
- output_quality:
+ termination_grace_period_seconds:
+ description: TerminationGracePeriodSeconds is the time in seconds to wait for
+ graceful shutdown before forcefully terminating the replica
type: integer
- description: Compression quality. Defaults to 20.
- negative_prompt:
- type: string
- description: Similar to prompt, but specifies what to avoid instead of what to include
- frame_images:
- description: Array of images to guide video generation, similar to keyframes.
- example:
- - [
- {
- "input_image": "aac49721-1964-481a-ae78-8a4e29b91402",
- "frame": 0
- },
- {
- "input_image": "c00abf5f-6cdb-4642-a01d-1bfff7bc3cf7",
- "frame": 48
- },
- {
- "input_image": "3ad204c3-a9de-4963-8a1a-c3911e3afafe",
- "frame": "last"
- }
- ]
- type: array
+ volumes:
+ description: Volumes is a list of volume mounts to attach to the container. This
+ will replace all existing volumes
items:
- $ref: '#/components/schemas/VideoFrameImageInput'
- reference_images:
- description: Unlike frame_images which constrain specific timeline positions, reference images guide the general appearance that should appear consistently across the video.
+ $ref: "#/components/schemas/VolumeMount"
type: array
- items:
- type: string
- VideoStatus:
- description: Current lifecycle status of the video job.
- type: string
- enum:
- - in_progress
- - completed
- - failed
-
- VideoFrameImageInput:
type: object
- required: ['input_image']
+ UpdateSecretRequest:
properties:
- input_image:
+ description:
+ description: Description is an optional human-readable description of the
+ secret's purpose (max 500 characters)
+ maxLength: 500
type: string
- description: URL path to hosted image that is used for a frame
- frame:
- description: |
- Optional param to specify where to insert the frame. If this is omitted, the following heuristics are applied:
- - frame_images size is one, frame is first.
- - If size is two, frames are first and last.
- - If size is larger, frames are first, last and evenly spaced between.
- anyOf:
- - type: number
- - type: string
- enum:
- - first
- - last
-
- VideoOutputFormat:
- type: string
- enum:
- - MP4
- - WEBM
-
- VideoJob:
+ name:
+ description: Name is the new unique identifier for the secret. Can contain
+ alphanumeric characters, underscores, hyphens, forward slashes, and
+ periods (1-100 characters)
+ maxLength: 100
+ minLength: 1
+ type: string
+ project_id:
+ description: ProjectID is ignored - the project is automatically determined from
+ your authentication
+ type: string
+ value:
+ description: Value is the new sensitive data to store securely. Updating this
+ will replace the existing secret value
+ minLength: 1
+ type: string
+ type: object
+ UpdateVolumeRequest:
+ properties:
+ content:
+ allOf:
+ - $ref: "#/components/schemas/VolumeContent"
+ description: Content specifies the new content that will be preloaded to this
+ volume
+ name:
+ description: Name is the new unique identifier for the volume within the project
+ type: string
+ type:
+ allOf:
+ - $ref: "#/components/schemas/VolumeType"
+ description: Type is the new volume type (currently only "readOnly" is supported)
+ type: object
+ VolumeMount:
+ properties:
+ mount_path:
+ description: MountPath is the path in the container where the volume will be
+ mounted (e.g., "/data")
+ type: string
+ name:
+ description: Name is the name of the volume to mount. Must reference an existing
+ volume by name or ID
+ type: string
+ required:
+ - mount_path
+ - name
+ type: object
+ VolumeResponseItem:
properties:
+ content:
+ allOf:
+ - $ref: "#/components/schemas/VolumeContent"
+ description: Content specifies the content that will be preloaded to this volume
+ created_at:
+ description: CreatedAt is the ISO8601 timestamp when this volume was created
+ type: string
id:
+ description: ID is the unique identifier for this volume
+ type: string
+ name:
+ description: Name is the name of the volume
type: string
- description: Unique identifier for the video job.
object:
- description: The object type, which is always video.
+ description: Object is the type identifier for this response (always "volume")
type: string
- enum:
- - video
- model:
+ type:
+ allOf:
+ - $ref: "#/components/schemas/VolumeType"
+ description: Type is the volume type (e.g., "readOnly")
+ updated_at:
+ description: UpdatedAt is the ISO8601 timestamp when this volume was last updated
type: string
- description: The video generation model that produced the job.
- status:
- $ref: '#/components/schemas/VideoStatus'
- description: Current lifecycle status of the video job.
- created_at:
- type: number
- description: Unix timestamp (seconds) for when the job was created.
- completed_at:
- type: number
- description: Unix timestamp (seconds) for when the job completed, if finished.
- size:
+ type: object
+ VolumeContent:
+ properties:
+ source_prefix:
+ description: SourcePrefix is the file path prefix for the content to be
+ preloaded into the volume
+ example: models/
type: string
- description: The resolution of the generated video.
- seconds:
+ type:
+ description: Type is the content type (currently only "files" is supported which
+ allows preloading files uploaded via Files API into the volume)
+ enum:
+ - files
+ example: files
type: string
- description: Duration of the generated clip in seconds.
- error:
- description: Error payload that explains why generation failed, if applicable.
- type: object
- properties:
- code:
- type: string
- message:
- type: string
- required:
- - message
- outputs:
- description: Available upon completion, the outputs provides the cost charged and the hosted url to access the video
- type: object
- properties:
- cost:
- type: integer
- description: The cost of generated video charged to the owners account.
- video_url:
- type: string
- description: URL hosting the generated video
- required:
- - cost
- - video_url
type: object
- required:
- - id
- - model
- - status
- - size
- - seconds
- - created_at
- title: Video job
- description: Structured information describing a generated video job.
+ VolumeType:
+ enum:
+ - readOnly
+ type: string
+ x-enum-varnames:
+ - VolumeTypeReadOnly