From 67d173dc09724e9a3d372747f7484e6e35739063 Mon Sep 17 00:00:00 2001
From: Blaine Kasten <blainekasten@gmail.com>
Date: Wed, 3 Dec 2025 11:02:24 -0600
Subject: [PATCH 1/4] feat: Add BYOC apis

---
 openapi.yaml | 8274 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 4967 insertions(+), 3307 deletions(-)

diff --git a/openapi.yaml b/openapi.yaml
index de97a6c..0fa9121 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -15,914 +15,823 @@ servers:
 security:
   - bearerAuth: []
 paths:
-  /voices:
+  /deployments:
     get:
-      tags: ['Voices']
-      summary: Fetch available voices for each model
-      description: Fetch available voices for each model
-      operationId: fetchVoices
+      description: Get a list of all deployments in your project
       responses:
-        '200':
-          description: Success
+        "200":
+          description: List of deployments
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListVoicesResponse'
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.audio.voices.list()
-
-            print(response.data)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.audio.voices.list()
-
-            print(response.data)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.audio.voices.list()
-
-            console.log(response.data);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.audio.voices.list()
-
-            console.log(response.data);
-  /videos/{id}:
-    get:
-      tags: ['Video']
-      summary: Fetch video metadata
-      description: Fetch video metadata
-      servers:
-        - url: https://api.together.xyz/v2
-      operationId: retrieveVideo
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.retrieve(video_id)
-
-            print(response.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.retrieve(video_id)
-
-            print(response.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.retrieve(videoId);
-
-            console.log(response.status);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.retrieve(videoId);
-
-            console.log(response.status);
-      parameters:
-        - in: path
-          name: id
-          schema:
-            type: string
-          required: true
-          description: Identifier of video from create response.
-      responses:
-        '200':
-          description: Success
+                $ref: "#/components/schemas/DeploymentListResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VideoJob'
-        '400':
-          description: Invalid request parameters.
-        '404':
-          description: Video ID not found.
-  /videos:
+                type: object
+      summary: Get the list of deployments
+      tags:
+        - Deployments
     post:
-      tags: ['Video']
-      summary: Create video
-      description: Create a video
-      operationId: createVideo
-      servers:
-        - url: https://api.together.xyz/v2
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.create(
-                model="together/video-model",
-                prompt="A cartoon of an astronaut riding a horse on the moon"
-            )
-
-            print(response.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.create(
-                model="together/video-model",
-                prompt="A cartoon of an astronaut riding a horse on the moon"
-            )
-
-            print(response.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.create({
-              model: "together/video-model",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
-
-            console.log(response.id);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.create({
-              model: "together/video-model",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
-
-            console.log(response.id);
+      description: Create a new deployment with specified configuration
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateVideoBody'
+              $ref: "#/components/schemas/CreateDeploymentRequest"
+        description: Deployment configuration
+        required: true
       responses:
-        '200':
-          description: Success
+        "200":
+          description: Deployment created successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VideoJob'
-  /chat/completions:
-    post:
-      tags: ['Chat']
-      summary: Create chat completion
-      description: Query a chat model.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.chat.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                messages=[
-                    {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": "What are some fun things to do in New York?"},
-                ]
-            )
-
-            print(response.choices[0].message.content)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.chat.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                messages=[
-                    {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": "What are some fun things to do in New York?"},
-                ]
-            )
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.chat.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              messages: [
-                { role: "system", content: "You are a helpful assistant." },
-                { role: "user", "content": "What are some fun things to do in New York?" },
-              ],
-            });
-
-            console.log(response.choices[0].message?.content);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.chat.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              messages: [
-                { role: "system", content: "You are a helpful assistant." },
-                { role: "user", "content": "What are some fun things to do in New York?" },
-              ],
-            });
-
-            console.log(response.choices[0].message?.content);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/chat/completions" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "messages": [
-                     {"role": "system", "content": "You are a helpful assistant."},
-                     {"role": "user", "content": "What are some fun things to do in New York?"}
-                   ]
-                 }'
-      operationId: chat-completions
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ChatCompletionRequest'
-      responses:
-        '200':
-          description: '200'
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "400":
+          description: Invalid request
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ChatCompletionResponse'
-            text/event-stream:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
               schema:
-                $ref: '#/components/schemas/ChatCompletionStream'
-        '400':
-          description: 'BadRequest'
+                type: object
+      summary: Create a new deployment
+      tags:
+        - Deployments
+  "/v1/deployments/{id}":
+    delete:
+      description: Delete an existing deployment
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Deployment deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                type: object
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+                type: object
+      summary: Delete a deployment
+      tags:
+        - Deployments
+    get:
+      description: Retrieve details of a specific deployment by its ID or name
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Deployment details
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /completions:
-    post:
-      tags: ['Completion']
-      summary: Create completion
-      description: Query a language, code, or image model.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                prompt="The largest city in France is",
-                max_tokens=1
-            )
-
-            print(response.choices[0].text)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                prompt="The largest city in France is",
-                max_tokens=1
-            )
-
-            print(response.choices[0].text)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              prompt: "The largest city in France is",
-              max_tokens: 1,
-            });
-
-            console.log(response.choices[0].text);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              prompt: "The largest city in France is",
-              max_tokens: 1
-            });
-
-            console.log(response.choices[0].text);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/completions" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "prompt": "The largest city in France is",
-                   "max_tokens": 1
-                 }'
-      operationId: completions
+                type: object
+      summary: Get a deployment by ID or name
+      tags:
+        - Deployments
+    patch:
+      description: Update an existing deployment configuration
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CompletionRequest'
+              $ref: "#/components/schemas/UpdateDeploymentRequest"
+        description: Updated deployment configuration
+        required: true
       responses:
-        '200':
-          description: '200'
+        "200":
+          description: Deployment updated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/CompletionResponse'
-            text/event-stream:
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
               schema:
-                $ref: '#/components/schemas/CompletionStream'
-        '400':
-          description: 'BadRequest'
+                type: object
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                type: object
+      summary: Update a deployment
+      tags:
+        - Deployments
+  "/v1/deployments/{id}/logs":
+    get:
+      description: Retrieve logs from a deployment, optionally filtered by replica ID.
+        Use follow=true to stream logs in real-time.
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+        - description: Replica ID to filter logs
+          in: query
+          name: replica_id
+          schema:
+            type: string
+        - description: Stream logs in real-time (ndjson format)
+          in: query
+          name: follow
+          schema:
+            type: boolean
+      responses:
+        "200":
+          description: Deployment logs
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                $ref: "#/components/schemas/DeploymentLogs"
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /embeddings:
-    post:
-      tags: ['Embeddings']
-      summary: Create embedding
-      description: Query an embedding model for a given string of text.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.embeddings.create(
-                model="BAAI/bge-large-en-v1.5",
-                input="New York City",
-            )
-
-            print(response.data[0].embedding)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.embeddings.create(
-                model="BAAI/bge-large-en-v1.5",
-                input="New York City",
-            )
-
-            print(response.data[0].embedding)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.embeddings.create({
-              model: "BAAI/bge-large-en-v1.5",
-              input: "New York City",
-            });
-
-            console.log(response.data[0].embedding);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.embeddings.create({
-              model: "BAAI/bge-large-en-v1.5",
-              input: "New York City",
-            });
-
-            console.log(response.data[0].embedding);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/embeddings" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "BAAI/bge-large-en-v1.5",
-                   "input": "New York City"
-                 }'
-      operationId: embeddings
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EmbeddingsRequest'
+                type: object
+      summary: Get logs for a deployment
+      tags:
+        - Deployments
+  /image-repositories:
+    get:
+      description: Retrieve all container image repositories available in your project
       responses:
-        '200':
-          description: '200'
+        "200":
+          description: List of repositories
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/EmbeddingsResponse'
-        '400':
-          description: 'BadRequest'
+                $ref: "#/components/schemas/RepositoryListResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                type: object
+      summary: Get the list of image repositories in your project
+      tags:
+        - Images
+  "/v1/image-repositories/{id}/images":
+    get:
+      description: Retrieve all container images (tags) available in a specific repository
+      parameters:
+        - description: Repository ID
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: List of images
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                $ref: "#/components/schemas/ImageListResponse"
+        "404":
+          description: Repository not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                type: object
+      summary: Get the list of images available under a repository
+      tags:
+        - Images
+  /secrets:
+    get:
+      description: Retrieve all secrets in your project
+      responses:
+        "200":
+          description: List of secrets
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                $ref: "#/components/schemas/ListSecretsResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /models:
-    get:
-      tags: ['Models']
-      summary: List all models
-      description: Lists all of Together's open-source models
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            models = client.models.list()
-
-            for model in models:
-                print(model.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            models = client.models.list()
-
-            for model in models:
-                print(model.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const models = await client.models.list();
-
-            for (const model of models) {
-              console.log(model.id);
-            }
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const models = await client.models.list();
-
-            for (const model of models) {
-              console.log(model.id);
-            }
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/models" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: models
-      parameters:
-        - name: dedicated
-          in: query
-          description: Filter models to only return dedicated models
-          schema:
-            type: boolean
+                type: object
+      summary: Get the list of project secrets
+      tags:
+        - Secrets
+    post:
+      description: Create a new secret to store sensitive configuration values
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateSecretRequest"
+        description: Secret configuration
+        required: true
       responses:
-        '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ModelInfoList'
-        '400':
-          description: 'BadRequest'
+        "200":
+          description: Secret created successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                $ref: "#/components/schemas/SecretResponseItem"
+        "400":
+          description: Invalid request
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+                type: object
+      summary: Create a new secret
+      tags:
+        - Secrets
+  "/v1/secrets/{id}":
+    delete:
+      description: Delete an existing secret
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Secret deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                type: object
+        "404":
+          description: Secret not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-    post:
-      tags: ['Models']
-      summary: Upload a custom model or adapter
-      description: Upload a custom model or adapter from Hugging Face or S3
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.models.upload(
-                "My-Fine-Tuned-Model",
-                "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            )
-
-            print(response.job_id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.models.upload(
-                model_name="My-Fine-Tuned-Model",
-                model_source="https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            )
-
-            print(response.data.job_id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.models.upload({
-              model_name: "My-Fine-Tuned-Model",
-              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            })
-
-            console.log(response);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.models.upload({
-              model_name: "My-Fine-Tuned-Model",
-              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            })
-
-            console.log(response);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/models" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                    "model_name": "My-Fine-Tuned-Model",
-                    "model_source": "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz"
-                  }'
-      operationId: uploadModel
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Delete a secret
+      tags:
+        - Secrets
+    get:
+      description: Retrieve details of a specific secret by its ID or name
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Secret details
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/SecretResponseItem"
+        "404":
+          description: Secret not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get a secret by ID or name
+      tags:
+        - Secrets
+    patch:
+      description: Update an existing secret's value or metadata
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
       requestBody:
-        required: true
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/ModelUploadRequest'
+              $ref: "#/components/schemas/UpdateSecretRequest"
+        description: Updated secret configuration
+        required: true
       responses:
-        '200':
-          description: Model / adapter upload job created successfully
+        "200":
+          description: Secret updated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ModelUploadSuccessResponse'
-
-  /jobs/{jobId}:
+                $ref: "#/components/schemas/SecretResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                type: object
+        "404":
+          description: Secret not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Update a secret
+      tags:
+        - Secrets
+  "/v1/storage/{filename}":
     get:
-      tags: ['Jobs']
-      summary: Get job status
-      description: Get the status of a specific job
-      operationId: getJob
+      description: Download a file by redirecting to a signed URL
       parameters:
-        - name: jobId
+        - description: Filename
           in: path
+          name: filename
           required: true
           schema:
             type: string
-          description: The ID of the job to retrieve
-          example: job-a15dad11-8d8e-4007-97c5-a211304de284
       responses:
-        '200':
-          description: Job status retrieved successfully
+        "307":
+          description: Redirect to signed download URL
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/JobInfoSuccessResponse'
-
-  /jobs:
+                type: string
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "404":
+          description: File not found
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Download a file
+      tags:
+        - files
+  "/v1/storage/{filename}/url":
     get:
-      tags: ['Jobs']
-      summary: List all jobs
-      description: List all jobs and their statuses
-      operationId: listJobs
+      description: Get a presigned download URL for a file
+      parameters:
+        - description: Filename
+          in: path
+          name: filename
+          required: true
+          schema:
+            type: string
       responses:
-        '200':
-          description: Jobs retrieved successfully
+        "200":
+          description: Signed URL
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/JobsInfoSuccessResponse'
-
-  /images/generations:
+                $ref: "#/components/schemas/api_v1.SignedURLResponse"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "404":
+          description: File not found
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Get a signed URL for a file
+      tags:
+        - files
+  /storage/multipart/abort:
     post:
-      tags: ['Images']
-      summary: Create image
-      description: Use an image model to generate an image for a given prompt.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.images.generate(
-                model="black-forest-labs/FLUX.1-schnell",
-                steps=4,
-                prompt="A cartoon of an astronaut riding a horse on the moon",
-            )
-
-            print(response.data[0].url)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
+      description: Abort a multi-part upload and discard all uploaded parts
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/files.AbortMultiPartRequest"
+        description: Abort multi-part upload request
+        required: true
+      responses:
+        "200":
+          description: Multi-part upload aborted successfully
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Abort multi-part upload
+      tags:
+        - files
+  /storage/multipart/complete:
+    post:
+      description: Complete a multi-part upload by providing all part ETags
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/files.CompleteMultiPartRequest"
+        description: Complete multi-part upload request
+        required: true
+      responses:
+        "200":
+          description: Multi-part upload completed
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/files.CompleteUploadResponse"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Complete multi-part upload
+      tags:
+        - files
+  /storage/multipart/init:
+    post:
+      description: Initiate a multi-part upload and get presigned URLs for each part
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/files.InitiateMultiPartRequest"
+        description: Multi-part upload init request
+        required: true
+      responses:
+        "200":
+          description: Multi-part upload info
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/files.MultiPartInitResponse"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Initiate multi-part upload
+      tags:
+        - files
+  /storage/upload-request:
+    post:
+      description: Request a presigned upload URL for a file
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/files.FileRequest"
+        description: Upload request
+        required: true
+      responses:
+        "200":
+          description: Upload URL and path
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/files.UploadResponse"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Request an upload URL for a file
+      tags:
+        - files
+  /storage/volumes:
+    get:
+      description: Retrieve all volumes in your project
+      responses:
+        "200":
+          description: List of volumes
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ListVolumesResponse"
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get the list of project volumes
+      tags:
+        - Volumes
+    post:
+      description: Create a new volume to preload files in deployments
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateVolumeRequest"
+        description: Volume configuration
+        required: true
+      responses:
+        "200":
+          description: Volume created successfully
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/VolumeResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Create a new volume
+      tags:
+        - Volumes
+  "/v1/storage/volumes/{id}":
+    delete:
+      description: Delete an existing volume
+      parameters:
+        - description: Volume ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Volume deleted successfully
+          content:
+            application/json:
+              schema:
+                type: object
+        "404":
+          description: Volume not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Delete a volume
+      tags:
+        - Volumes
+    get:
+      description: Retrieve details of a specific volume by its ID or name
+      parameters:
+        - description: Volume ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Volume details
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/VolumeResponseItem"
+        "404":
+          description: Volume not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get a volume by ID or name
+      tags:
+        - Volumes
+    patch:
+      description: Update an existing volume's configuration or contents
+      parameters:
+        - description: Volume ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/UpdateVolumeRequest"
+        description: Updated volume configuration
+        required: true
+      responses:
+        "200":
+          description: Volume updated successfully
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/VolumeResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                type: object
+        "404":
+          description: Volume not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Update a volume
+      tags:
+        - Volumes
+  /voices:
+    get:
+      tags: ['Voices']
+      summary: Fetch available voices for each model
+      description: Fetch available voices for each model
+      operationId: fetchVoices
+      responses:
+        '200':
+          description: Success
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListVoicesResponse'
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
             from together import Together
             import os
 
@@ -930,13 +839,22 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.images.generate(
-                model="black-forest-labs/FLUX.1-schnell",
-                steps=4,
-                prompt="A cartoon of an astronaut riding a horse on the moon",
+            response = client.audio.voices.list()
+
+            print(response.data)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            print(response.data[0].url)
+            response = client.audio.voices.list()
+
+            print(response.data)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -946,12 +864,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.images.generate({
-              model: "black-forest-labs/FLUX.1-schnell",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
+            const response = await client.audio.voices.list()
 
-            console.log(response.data[0].url);
+            console.log(response.data);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -961,126 +876,17 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.images.generate({
-              model: "black-forest-labs/FLUX.1-schnell",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
+            const response = await client.audio.voices.list()
 
-            console.log(response.data[0].url);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/images/generations" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "black-forest-labs/FLUX.1-schnell",
-                   "prompt": "A cartoon of an astronaut riding a horse on the moon"
-                 }'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              required:
-                - prompt
-                - model
-              properties:
-                prompt:
-                  type: string
-                  description: A description of the desired images. Maximum length varies by model.
-                  example: cat floating in space, cinematic
-                model:
-                  type: string
-                  description: >
-                    The model to use for image generation.<br>
-                    <br>
-                    [See all of Together AI's image models](https://docs.together.ai/docs/serverless-models#image-models)
-                  example: black-forest-labs/FLUX.1-schnell
-                  anyOf:
-                    - type: string
-                      enum:
-                        - black-forest-labs/FLUX.1-schnell-Free
-                        - black-forest-labs/FLUX.1-schnell
-                        - black-forest-labs/FLUX.1.1-pro
-                    - type: string
-                steps:
-                  type: integer
-                  default: 20
-                  description: Number of generation steps.
-                image_url:
-                  type: string
-                  description: URL of an image to use for image models that support it.
-                seed:
-                  type: integer
-                  description: Seed used for generation. Can be used to reproduce image generations.
-                n:
-                  type: integer
-                  default: 1
-                  description: Number of image results to generate.
-                height:
-                  type: integer
-                  default: 1024
-                  description: Height of the image to generate in number of pixels.
-                width:
-                  type: integer
-                  default: 1024
-                  description: Width of the image to generate in number of pixels.
-                negative_prompt:
-                  type: string
-                  description: The prompt or prompts not to guide the image generation.
-                response_format:
-                  type: string
-                  description: Format of the image response. Can be either a base64 string or a URL.
-                  enum:
-                    - base64
-                    - url
-                guidance_scale:
-                  type: number
-                  description: Adjusts the alignment of the generated image with the input prompt. Higher values (e.g., 8-10) make the output more faithful to the prompt, while lower values (e.g., 1-5) encourage more creative freedom.
-                  default: 3.5
-                output_format:
-                  type: string
-                  description: The format of the image response. Can be either be `jpeg` or `png`. Defaults to `jpeg`.
-                  default: jpeg
-                  enum:
-                    - jpeg
-                    - png
-                image_loras:
-                  description: An array of objects that define LoRAs (Low-Rank Adaptations) to influence the generated image.
-                  type: array
-                  items:
-                    type: object
-                    required: [path, scale]
-                    properties:
-                      path:
-                        type: string
-                        description: The URL of the LoRA to apply (e.g. https://huggingface.co/strangerzonehf/Flux-Midjourney-Mix2-LoRA).
-                      scale:
-                        type: number
-                        description: The strength of the LoRA's influence. Most LoRA's recommend a value of 1.
-                reference_images:
-                  description: An array of image URLs that guide the overall appearance and style of the generated image. These reference images influence the visual characteristics consistently across the generation.
-                  type: array
-                  items:
-                    type: string
-                    description: URL of a reference image to guide the image generation.
-                disable_safety_checker:
-                  type: boolean
-                  description: If true, disables the safety checker for image generation.
-      responses:
-        '200':
-          description: Image generated successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ImageResponse'
-  /files:
+            console.log(response.data);
+  /videos/{id}:
     get:
-      tags: ['Files']
-      summary: List all files
-      description: List the metadata for all uploaded data files.
+      tags: ['Video']
+      summary: Fetch video metadata
+      description: Fetch video metadata
+      servers:
+        - url: https://api.together.xyz/v2
+      operationId: retrieveVideo
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1093,10 +899,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.list()
+            response = client.videos.retrieve(video_id)
 
-            for file in response.data:
-                print(file.id)
+            print(response.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1107,10 +912,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.list()
+            response = client.videos.retrieve(video_id)
 
-            for file in response.data:
-                print(file.id)
+            print(response.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1120,11 +924,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.list();
+            const response = await client.videos.retrieve(videoId);
 
-            for (const file of response.data) {
-              console.log(file.id);
-            }
+            console.log(response.status);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1134,29 +936,35 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.list();
+            const response = await client.videos.retrieve(videoId);
 
-            for (const file of response.data) {
-              console.log(file.id);
-            }
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/files" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
+            console.log(response.status);
+      parameters:
+        - in: path
+          name: id
+          schema:
+            type: string
+          required: true
+          description: Identifier of video from create response.
       responses:
         '200':
-          description: List of files
+          description: Success
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileList'
-  /files/{id}:
-    get:
-      tags: ['Files']
-      summary: List file
-      description: List the metadata for a single uploaded data file.
+                $ref: '#/components/schemas/VideoJob'
+        '400':
+          description: Invalid request parameters.
+        '404':
+          description: Video ID not found.
+  /videos:
+    post:
+      tags: ['Video']
+      summary: Create video
+      description: Create a video
+      operationId: createVideo
+      servers:
+        - url: https://api.together.xyz/v2
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1169,9 +977,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve(id="file-id")
+            response = client.videos.create(
+                model="together/video-model",
+                prompt="A cartoon of an astronaut riding a horse on the moon"
+            )
 
-            print(file)
+            print(response.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1182,9 +993,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve(id="file-id")
+            response = client.videos.create(
+                model="together/video-model",
+                prompt="A cartoon of an astronaut riding a horse on the moon"
+            )
 
-            print(file)
+            print(response.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1194,9 +1008,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const file = await client.files.retrieve("file-id");
+            const response = await client.videos.create({
+              model: "together/video-model",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            console.log(file);
+            console.log(response.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1206,32 +1023,29 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const file = await client.files.retrieve("file-id");
+            const response = await client.videos.create({
+              model: "together/video-model",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            console.log(file);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/files/ID" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+            console.log(response.id);
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateVideoBody'
       responses:
         '200':
-          description: File retrieved successfully
+          description: Success
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileResponse'
-    delete:
-      tags: ['Files']
-      summary: Delete a file
-      description: Delete a previously uploaded data file.
+                $ref: '#/components/schemas/VideoJob'
+  /chat/completions:
+    post:
+      tags: ['Chat']
+      summary: Create chat completion
+      description: Query a chat model.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1244,9 +1058,15 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.delete(id="file-id")
+            response = client.chat.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What are some fun things to do in New York?"},
+                ]
+            )
 
-            print(response)
+            print(response.choices[0].message.content)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1257,9 +1077,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.delete(id="file-id")
-
-            print(response)
+            response = client.chat.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What are some fun things to do in New York?"},
+                ]
+            )
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1269,9 +1093,15 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.delete("file-id");
+            const response = await client.chat.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              messages: [
+                { role: "system", content: "You are a helpful assistant." },
+                { role: "user", "content": "What are some fun things to do in New York?" },
+              ],
+            });
 
-            console.log(response);
+            console.log(response.choices[0].message?.content);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1281,32 +1111,86 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.delete("file-id");
+            const response = await client.chat.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              messages: [
+                { role: "system", content: "You are a helpful assistant." },
+                { role: "user", "content": "What are some fun things to do in New York?" },
+              ],
+            });
 
-            console.log(response);
+            console.log(response.choices[0].message?.content);
         - lang: Shell
           label: cURL
           source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/files/file-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+            curl -X POST "https://api.together.xyz/v1/chat/completions" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "messages": [
+                     {"role": "system", "content": "You are a helpful assistant."},
+                     {"role": "user", "content": "What are some fun things to do in New York?"}
+                   ]
+                 }'
+      operationId: chat-completions
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ChatCompletionRequest'
       responses:
         '200':
-          description: File deleted successfully
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileDeleteResponse'
-  /files/{id}/content:
-    get:
-      tags: ['Files']
-      summary: Get file contents
-      description: Get the contents of a single uploaded data file.
+                $ref: '#/components/schemas/ChatCompletionResponse'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/ChatCompletionStream'
+        '400':
+          description: 'BadRequest'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /completions:
+    post:
+      tags: ['Completion']
+      summary: Create completion
+      description: Query a language, code, or image model.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1319,9 +1203,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve_content(id="file-id")
+            response = client.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                prompt="The largest city in France is",
+                max_tokens=1
+            )
 
-            print(file.filename)
+            print(response.choices[0].text)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1332,9 +1220,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            with client.files.with_streaming_response.content(id="file-id") as response:
-              for line in response.iter_lines():
-                print(line)
+            response = client.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                prompt="The largest city in France is",
+                max_tokens=1
+            )
+
+            print(response.choices[0].text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1344,10 +1236,13 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.content("file-id");
-            const content = await response.text();
+            const response = await client.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              prompt: "The largest city in France is",
+              max_tokens: 1,
+            });
 
-            console.log(content);
+            console.log(response.choices[0].text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1357,40 +1252,82 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.content("file-id");
-            const content = await response.text();
+            const response = await client.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              prompt: "The largest city in France is",
+              max_tokens: 1
+            });
 
-            console.log(content);
+            console.log(response.choices[0].text);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/files/file-id/content" \
+            curl -X POST "https://api.together.xyz/v1/completions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "prompt": "The largest city in France is",
+                   "max_tokens": 1
+                 }'
+      operationId: completions
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CompletionRequest'
       responses:
         '200':
-          description: File content retrieved successfully
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileObject'
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/CompletionResponse'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/CompletionStream'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /files/upload:
+        '401':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /embeddings:
     post:
-      tags: ['Files']
-      summary: Upload a file
-      description: Upload a file with specified purpose, file name, and file type.
+      tags: ['Embeddings']
+      summary: Create embedding
+      description: Query an embedding model for a given string of text.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1403,11 +1340,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            file_path = os.path.join(current_dir, "data.jsonl")
-            file = client.files.upload(file=file_path)
+            response = client.embeddings.create(
+                model="BAAI/bge-large-en-v1.5",
+                input="New York City",
+            )
 
-            print(file.id)
+            print(response.data[0].embedding)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1418,98 +1356,220 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            file_path = os.path.join(current_dir, "data.jsonl")
-            file = client.files.upload(file=file_path)
+            response = client.embeddings.create(
+                model="BAAI/bge-large-en-v1.5",
+                input="New York City",
+            )
 
-            print(file.id)
+            print(response.data[0].embedding)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
-            import { upload } from "together-ai/lib/upload"
-            import path from "path";
-            import { fileURLToPath } from "url";
+            import Together from "together-ai";
 
-            const __filename = fileURLToPath(import.meta.url);
-            const __dirname = path.dirname(__filename);
-            const filepath = path.join(__dirname, "data.jsonl");
-            const file = await upload(filepath);
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-            console.log(file.id);
+            const response = await client.embeddings.create({
+              model: "BAAI/bge-large-en-v1.5",
+              input: "New York City",
+            });
+
+            console.log(response.data[0].embedding);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
-            import { upload } from "together-ai/lib/upload"
-            import path from "path";
-            import { fileURLToPath } from "url";
+            import Together from "together-ai";
 
-            const __filename = fileURLToPath(import.meta.url);
-            const __dirname = path.dirname(__filename);
-            const filepath = path.join(__dirname, "data.jsonl");
-            const file = await upload(filepath);
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-            console.log(file.id);
+            const response = await client.embeddings.create({
+              model: "BAAI/bge-large-en-v1.5",
+              input: "New York City",
+            });
+
+            console.log(response.data[0].embedding);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/files/upload" \
+            curl -X POST "https://api.together.xyz/v1/embeddings" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@/path/to/data.jsonl" \
-                 -F "file_name=data.jsonl" \
-                 -F "purpose=fine-tune"
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "BAAI/bge-large-en-v1.5",
+                   "input": "New York City"
+                 }'
+      operationId: embeddings
       requestBody:
-        required: true
         content:
-          multipart/form-data:
+          application/json:
             schema:
-              type: object
-              required:
-                - purpose
-                - file_name
-                - file
-              properties:
-                purpose:
-                  $ref: '#/components/schemas/FilePurpose'
-                file_name:
-                  type: string
-                  description: The name of the file being uploaded
-                  example: 'dataset.csv'
-                file_type:
-                  $ref: '#/components/schemas/FileType'
-                file:
-                  type: string
-                  format: binary
-                  description: The content of the file being uploaded
+              $ref: '#/components/schemas/EmbeddingsRequest'
       responses:
         '200':
-          description: File uploaded successfully
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileResponse'
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/EmbeddingsResponse'
+        '400':
+          description: 'BadRequest'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /models:
+    get:
+      tags: ['Models']
+      summary: List all models
+      description: Lists all of Together's open-source models
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            models = client.models.list()
+
+            for model in models:
+                print(model.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            models = client.models.list()
+
+            for model in models:
+                print(model.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const models = await client.models.list();
+
+            for (const model of models) {
+              console.log(model.id);
+            }
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const models = await client.models.list();
+
+            for (const model of models) {
+              console.log(model.id);
+            }
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/models" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: models
+      parameters:
+        - name: dedicated
+          in: query
+          description: Filter models to only return dedicated models
+          schema:
+            type: boolean
+      responses:
+        '200':
+          description: '200'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ModelInfoList'
         '400':
-          description: Bad Request
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
         '401':
-          description: Unauthorized
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /fine-tunes:
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
     post:
-      tags: ['Fine-tuning']
-      summary: Create job
-      description: Create a fine-tuning job with the provided model and training data.
+      tags: ['Models']
+      summary: Upload a custom model or adapter
+      description: Upload a custom model or adapter from Hugging Face or S3
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1522,12 +1582,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                training_file="file-id"
+            response = client.models.upload(
+                "My-Fine-Tuned-Model",
+                "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
             )
 
-            print(response)
+            print(response.job_id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1538,12 +1598,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                training_file="file-id"
+            response = client.models.upload(
+                model_name="My-Fine-Tuned-Model",
+                model_source="https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
             )
 
-            print(response)
+            print(response.data.job_id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1553,10 +1613,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-              training_file: "file-id",
-            });
+            const response = await client.models.upload({
+              model_name: "My-Fine-Tuned-Model",
+              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            })
 
             console.log(response);
         - lang: JavaScript
@@ -1568,148 +1628,78 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-              training_file: "file-id",
-            });
+            const response = await client.models.upload({
+              model_name: "My-Fine-Tuned-Model",
+              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            })
 
             console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/fine-tunes" \
+            curl -X POST "https://api.together.xyz/v1/models" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                   "training_file": "file-id"
-                 }'
+                    "model_name": "My-Fine-Tuned-Model",
+                    "model_source": "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz"
+                  }'
+      operationId: uploadModel
       requestBody:
         required: true
         content:
           application/json:
             schema:
-              type: object
-              required:
-                - training_file
-                - model
-              properties:
-                training_file:
-                  type: string
-                  description: File-ID of a training file uploaded to the Together API
-                validation_file:
-                  type: string
-                  description: File-ID of a validation file uploaded to the Together API
-                model:
-                  type: string
-                  description: Name of the base model to run fine-tune job on
-                n_epochs:
-                  type: integer
-                  default: 1
-                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
-                n_checkpoints:
-                  type: integer
-                  default: 1
-                  description: Number of intermediate model versions saved during training for evaluation
-                n_evals:
-                  type: integer
-                  default: 0
-                  description: Number of evaluations to be run on a given validation set during training
-                batch_size:
-                  oneOf:
-                    - type: integer
-                    - type: string
-                      enum:
-                        - max
-                  default: 'max'
-                  description: Number of training examples processed together (larger batches use more memory but may train faster). Defaults to "max". We use training optimizations like packing, so the effective batch size may be different than the value you set.
-                learning_rate:
-                  type: number
-                  format: float
-                  default: 0.00001
-                  description: Controls how quickly the model adapts to new information (too high may cause instability, too low may slow convergence)
-                lr_scheduler:
-                  type: object
-                  default: none
-                  $ref: '#/components/schemas/LRScheduler'
-                  description: The learning rate scheduler to use. It specifies how the learning rate is adjusted during training.
-                warmup_ratio:
-                  type: number
-                  format: float
-                  default: 0.0
-                  description: The percent of steps at the start of training to linearly increase the learning rate.
-                max_grad_norm:
-                  type: number
-                  format: float
-                  default: 1.0
-                  description: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
-                weight_decay:
-                  type: number
-                  format: float
-                  default: 0.0
-                  description: Weight decay. Regularization parameter for the optimizer.
-                suffix:
-                  type: string
-                  description: Suffix that will be added to your fine-tuned model name
-                wandb_api_key:
-                  type: string
-                  description: Integration key for tracking experiments and model metrics on W&B platform
-                wandb_base_url:
-                  type: string
-                  description: The base URL of a dedicated Weights & Biases instance.
-                wandb_project_name:
-                  type: string
-                  description: The Weights & Biases project for your run. If not specified, will use `together` as the project name.
-                wandb_name:
-                  type: string
-                  description: The Weights & Biases name for your run.
-                train_on_inputs:
-                  oneOf:
-                    - type: boolean
-                    - type: string
-                      enum:
-                        - auto
-                  type: boolean
-                  default: auto
-                  description: Whether to mask the user messages in conversational data or prompts in instruction data.
-                  deprecated: true
-                training_method:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/TrainingMethodSFT'
-                    - $ref: '#/components/schemas/TrainingMethodDPO'
-                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
-                training_type:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/FullTrainingType'
-                    - $ref: '#/components/schemas/LoRATrainingType'
-                from_checkpoint:
-                  type: string
-                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
-                from_hf_model:
-                  type: string
-                  description: The Hugging Face Hub repo to start training from. Should be as close as possible to the base model (specified by the `model` argument) in terms of architecture and size.
-                hf_model_revision:
-                  type: string
-                  description: The revision of the Hugging Face Hub model to continue training from. E.g., hf_model_revision=main (default, used if the argument is not provided) or hf_model_revision='607a30d783dfa663caf39e06633721c8d4cfcd7e' (specific commit).
-                hf_api_token:
-                  type: string
-                  description: The API token for the Hugging Face Hub.
-                hf_output_repo_name:
-                  type: string
-                  description: The name of the Hugging Face repository to upload the fine-tuned model to.
+              $ref: '#/components/schemas/ModelUploadRequest'
       responses:
         '200':
-          description: Fine-tuning job initiated successfully
+          description: Model / adapter upload job created successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneResponseTruncated'
+                $ref: '#/components/schemas/ModelUploadSuccessResponse'
+
+  /jobs/{jobId}:
     get:
-      tags: ['Fine-tuning']
+      tags: ['Jobs']
+      summary: Get job status
+      description: Get the status of a specific job
+      operationId: getJob
+      parameters:
+        - name: jobId
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the job to retrieve
+          example: job-a15dad11-8d8e-4007-97c5-a211304de284
+      responses:
+        '200':
+          description: Job status retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/JobInfoSuccessResponse'
+
+  /jobs:
+    get:
+      tags: ['Jobs']
       summary: List all jobs
-      description: List the metadata for all fine-tuning jobs. Returns a list of FinetuneResponseTruncated objects.
+      description: List all jobs and their statuses
+      operationId: listJobs
+      responses:
+        '200':
+          description: Jobs retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/JobsInfoSuccessResponse'
+
+  /images/generations:
+    post:
+      tags: ['Images']
+      summary: Create image
+      description: Use an image model to generate an image for a given prompt.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1722,10 +1712,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.list()
+            response = client.images.generate(
+                model="black-forest-labs/FLUX.1-schnell",
+                steps=4,
+                prompt="A cartoon of an astronaut riding a horse on the moon",
+            )
 
-            for fine_tune in response.data:
-                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
+            print(response.data[0].url)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1736,10 +1729,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.list()
+            response = client.images.generate(
+                model="black-forest-labs/FLUX.1-schnell",
+                steps=4,
+                prompt="A cartoon of an astronaut riding a horse on the moon",
+            )
 
-            for fine_tune in response.data:
-                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
+            print(response.data[0].url)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1749,11 +1745,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.list();
+            const response = await client.images.generate({
+              model: "black-forest-labs/FLUX.1-schnell",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            for (const fineTune of response.data) {
-              console.log(fineTune.id, fineTune.status);
-            }
+            console.log(response.data[0].url);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1763,29 +1760,22 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.list();
+            const response = await client.images.generate({
+              model: "black-forest-labs/FLUX.1-schnell",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            for (const fineTune of response.data) {
-              console.log(fineTune.id, fineTune.status);
-            }
+            console.log(response.data[0].url);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes" \
+            curl -X POST "https://api.together.xyz/v1/images/generations" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      responses:
-        '200':
-          description: List of fine-tune jobs
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuneTruncatedList'
-  /fine-tunes/estimate-price:
-    post:
-      tags: ['Fine-tuning']
-      summary: Estimate price
-      description: Estimate the price of a fine-tuning job.
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "black-forest-labs/FLUX.1-schnell",
+                   "prompt": "A cartoon of an astronaut riding a horse on the moon"
+                 }'
       requestBody:
         required: true
         content:
@@ -1793,74 +1783,103 @@ paths:
             schema:
               type: object
               required:
-                - training_file
+                - prompt
+                - model
               properties:
-                training_file:
-                  type: string
-                  description: File-ID of a training file uploaded to the Together API
-                validation_file:
+                prompt:
                   type: string
-                  description: File-ID of a validation file uploaded to the Together API
+                  description: A description of the desired images. Maximum length varies by model.
+                  example: cat floating in space, cinematic
                 model:
                   type: string
-                  description: Name of the base model to run fine-tune job on
-                n_epochs:
+                  description: >
+                    The model to use for image generation.<br>
+                    <br>
+                    [See all of Together AI's image models](https://docs.together.ai/docs/serverless-models#image-models)
+                  example: black-forest-labs/FLUX.1-schnell
+                  anyOf:
+                    - type: string
+                      enum:
+                        - black-forest-labs/FLUX.1-schnell-Free
+                        - black-forest-labs/FLUX.1-schnell
+                        - black-forest-labs/FLUX.1.1-pro
+                    - type: string
+                steps:
+                  type: integer
+                  default: 20
+                  description: Number of generation steps.
+                image_url:
+                  type: string
+                  description: URL of an image to use for image models that support it.
+                seed:
+                  type: integer
+                  description: Seed used for generation. Can be used to reproduce image generations.
+                n:
                   type: integer
                   default: 1
-                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
-                n_evals:
+                  description: Number of image results to generate.
+                height:
                   type: integer
-                  default: 0
-                  description: Number of evaluations to be run on a given validation set during training
-                training_method:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/TrainingMethodSFT'
-                    - $ref: '#/components/schemas/TrainingMethodDPO'
-                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
-                training_type:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/FullTrainingType'
-                    - $ref: '#/components/schemas/LoRATrainingType'
-                from_checkpoint:
+                  default: 1024
+                  description: Height of the image to generate in number of pixels.
+                width:
+                  type: integer
+                  default: 1024
+                  description: Width of the image to generate in number of pixels.
+                negative_prompt:
                   type: string
-                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
+                  description: The prompt or prompts not to guide the image generation.
+                response_format:
+                  type: string
+                  description: Format of the image response. Can be either a base64 string or a URL.
+                  enum:
+                    - base64
+                    - url
+                guidance_scale:
+                  type: number
+                  description: Adjusts the alignment of the generated image with the input prompt. Higher values (e.g., 8-10) make the output more faithful to the prompt, while lower values (e.g., 1-5) encourage more creative freedom.
+                  default: 3.5
+                output_format:
+                  type: string
+                  description: The format of the image response. Can be either be `jpeg` or `png`. Defaults to `jpeg`.
+                  default: jpeg
+                  enum:
+                    - jpeg
+                    - png
+                image_loras:
+                  description: An array of objects that define LoRAs (Low-Rank Adaptations) to influence the generated image.
+                  type: array
+                  items:
+                    type: object
+                    required: [path, scale]
+                    properties:
+                      path:
+                        type: string
+                        description: The URL of the LoRA to apply (e.g. https://huggingface.co/strangerzonehf/Flux-Midjourney-Mix2-LoRA).
+                      scale:
+                        type: number
+                        description: The strength of the LoRA's influence. Most LoRA's recommend a value of 1.
+                reference_images:
+                  description: An array of image URLs that guide the overall appearance and style of the generated image. These reference images influence the visual characteristics consistently across the generation.
+                  type: array
+                  items:
+                    type: string
+                    description: URL of a reference image to guide the image generation.
+                disable_safety_checker:
+                  type: boolean
+                  description: If true, disables the safety checker for image generation.
       responses:
-        '500':
-          description: Internal Server Error
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
         '200':
-          description: Price estimated successfully
+          description: Image generated successfully
           content:
             application/json:
               schema:
-                type: object
-                properties:
-                  estimated_total_price:
-                    type: number
-                    description: The price of the fine-tuning job
-                  allowed_to_proceed:
-                    type: boolean
-                    description: Whether the user is allowed to proceed with the fine-tuning job
-                    example: true
-                  user_limit:
-                    type: number
-                    description: The user's credit limit in dollars
-                  estimated_train_token_count:
-                    type: number
-                    description: The estimated number of tokens to be trained
-                  estimated_eval_token_count:
-                    type: number
-                    description: The estimated number of tokens for evaluation
-  /fine-tunes/{id}:
+                $ref: '#/components/schemas/ImageResponse'
+  /files:
     get:
-      tags: ['Fine-tuning']
-      summary: List job
-      description: List the metadata for a single fine-tuning job.
+      tags: ['Files']
+      summary: List all files
+      description: List the metadata for all uploaded data files.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1873,9 +1892,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            fine_tune = client.fine_tuning.retrieve(id="ft-id")
+            response = client.files.list()
 
-            print(fine_tune)
+            for file in response.data:
+                print(file.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1886,9 +1906,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            fine_tune = client.fine_tuning.retrieve(id="ft-id")
+            response = client.files.list()
 
-            print(fine_tune)
+            for file in response.data:
+                print(file.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1898,9 +1919,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const fineTune = await client.fineTuning.retrieve("ft-id");
+            const response = await client.files.list();
 
-            console.log(fineTune);
+            for (const file of response.data) {
+              console.log(file.id);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1910,32 +1933,29 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const fineTune = await client.fineTuning.retrieve("ft-id");
+            const response = await client.files.list();
 
-            console.log(fineTune);
+            for (const file of response.data) {
+              console.log(file.id);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id" \
+            curl "https://api.together.xyz/v1/files" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
       responses:
         '200':
-          description: Fine-tune job details retrieved successfully
+          description: List of files
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneResponse'
-    delete:
-      tags: ['Fine-tuning']
-      summary: Delete a fine-tune job
-      description: Delete a fine-tuning job.
+                $ref: '#/components/schemas/FileList'
+  /files/{id}:
+    get:
+      tags: ['Files']
+      summary: List file
+      description: List the metadata for a single uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1948,9 +1968,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.delete(id="ft-id")
+            file = client.files.retrieve(id="file-id")
 
-            print(response)
+            print(file)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1961,9 +1981,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.delete(id="ft-id")
+            file = client.files.retrieve(id="file-id")
 
-            print(response)
+            print(file)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1973,9 +1993,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.delete("ft-id");
+            const file = await client.files.retrieve("file-id");
 
-            console.log(response);
+            console.log(file);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1985,13 +2005,13 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.delete("ft-id");
+            const file = await client.files.retrieve("file-id");
 
-            console.log(response);
+            console.log(file);
         - lang: Shell
           label: cURL
           source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/fine-tunes/ft-id?force=false" \
+            curl "https://api.together.xyz/v1/files/ID" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
       parameters:
@@ -2000,35 +2020,17 @@ paths:
           required: true
           schema:
             type: string
-        - name: force
-          in: query
-          schema:
-            type: boolean
-            default: false
       responses:
         '200':
-          description: Fine-tune job deleted successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuneDeleteResponse'
-        '404':
-          description: Fine-tune job not found
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: Internal server error
+          description: File retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /fine-tunes/{id}/events:
-    get:
-      tags: ['Fine-tuning']
-      summary: List job events
-      description: List the events for a single fine-tuning job.
+                $ref: '#/components/schemas/FileResponse'
+    delete:
+      tags: ['Files']
+      summary: Delete a file
+      description: Delete a previously uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2041,9 +2043,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            events = client.fine_tuning.list_events(id="ft-id")
+            response = client.files.delete(id="file-id")
 
-            print(events)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2054,10 +2056,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.list_events(id="ft-id")
+            response = client.files.delete(id="file-id")
 
-            for event in response.data:
-                print(event)
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2067,9 +2068,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const events = await client.fineTuning.listEvents("ft-id");
+            const response = await client.files.delete("file-id");
 
-            console.log(events);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2079,15 +2080,14 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const events = await client.fineTuning.listEvents("ft-id");
+            const response = await client.files.delete("file-id");
 
-            console.log(events);
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id/events" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
+            curl -X "DELETE" "https://api.together.xyz/v1/files/file-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY"
       parameters:
         - name: id
           in: path
@@ -2096,16 +2096,16 @@ paths:
             type: string
       responses:
         '200':
-          description: List of fine-tune events
+          description: File deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneListEvents'
-  /fine-tunes/{id}/checkpoints:
+                $ref: '#/components/schemas/FileDeleteResponse'
+  /files/{id}/content:
     get:
-      tags: ['Fine-tuning']
-      summary: List checkpoints
-      description: List the checkpoints for a single fine-tuning job.
+      tags: ['Files']
+      summary: Get file contents
+      description: Get the contents of a single uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2118,9 +2118,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
+            file = client.files.retrieve_content(id="file-id")
 
-            print(checkpoints)
+            print(file.filename)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2131,9 +2131,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
-
-            print(checkpoints)
+            with client.files.with_streaming_response.content(id="file-id") as response:
+              for line in response.iter_lines():
+                print(line)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2143,9 +2143,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
+            const response = await client.files.content("file-id");
+            const content = await response.text();
 
-            console.log(checkpoints);
+            console.log(content);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2155,13 +2156,14 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
+            const response = await client.files.content("file-id");
+            const content = await response.text();
 
-            console.log(checkpoints);
+            console.log(content);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id/checkpoints" \
+            curl "https://api.together.xyz/v1/files/file-id/content" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
       parameters:
@@ -2172,16 +2174,22 @@ paths:
             type: string
       responses:
         '200':
-          description: List of fine-tune checkpoints
+          description: File content retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneListCheckpoints'
-  /finetune/download:
-    get:
-      tags: ['Fine-tuning']
-      summary: Download model
-      description: Receive a compressed fine-tuned model or checkpoint.
+                $ref: '#/components/schemas/FileObject'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /files/upload:
+    post:
+      tags: ['Files']
+      summary: Upload a file
+      description: Upload a file with specified purpose, file name, and file type.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2194,10 +2202,11 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            # This will download the content to a location on disk
-            response = client.fine_tuning.download(id="ft-id")
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            file_path = os.path.join(current_dir, "data.jsonl")
+            file = client.files.upload(file=file_path)
 
-            print(response)
+            print(file.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2208,86 +2217,98 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            # Using `with_streaming_response` gives you control to do what you want with the response.
-            stream = client.fine_tuning.with_streaming_response.content(ft_id="ft-id")
-
-            with stream as response:
-                for line in response.iter_lines():
-                    print(line)
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            file_path = os.path.join(current_dir, "data.jsonl")
+            file = client.files.upload(file=file_path)
 
+            print(file.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+            import { upload } from "together-ai/lib/upload"
+            import path from "path";
+            import { fileURLToPath } from "url";
 
-            const response = await client.fineTuning.content({
-              ft_id: "ft-id",
-            });
+            const __filename = fileURLToPath(import.meta.url);
+            const __dirname = path.dirname(__filename);
+            const filepath = path.join(__dirname, "data.jsonl");
+            const file = await upload(filepath);
 
-            console.log(await response.blob());
+            console.log(file.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+            import { upload } from "together-ai/lib/upload"
+            import path from "path";
+            import { fileURLToPath } from "url";
 
-            const response = await client.fineTuning.content({
-              ft_id: "ft-id",
-            });
+            const __filename = fileURLToPath(import.meta.url);
+            const __dirname = path.dirname(__filename);
+            const filepath = path.join(__dirname, "data.jsonl");
+            const file = await upload(filepath);
 
-            console.log(await response.blob());
+            console.log(file.id);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/finetune/download?ft_id=ft-id&checkpoint=merged"
+            curl "https://api.together.xyz/v1/files/upload" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - in: query
-          name: ft_id
-          schema:
-            type: string
-          required: true
-          description: Fine-tune ID to download. A string that starts with `ft-`.
-        - in: query
-          name: checkpoint_step
-          schema:
-            type: integer
-          required: false
-          description: Specifies step number for checkpoint to download. Ignores `checkpoint` value if set.
-        - in: query
-          name: checkpoint
-          schema:
-            type: string
-            enum:
-              - merged
-              - adapter
-              - model_output_path
-          description: Specifies checkpoint type to download - `merged` vs `adapter`. This field is required if the checkpoint_step is not set.
+                 -F "file=@/path/to/data.jsonl" \
+                 -F "file_name=data.jsonl" \
+                 -F "purpose=fine-tune"
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              type: object
+              required:
+                - purpose
+                - file_name
+                - file
+              properties:
+                purpose:
+                  $ref: '#/components/schemas/FilePurpose'
+                file_name:
+                  type: string
+                  description: The name of the file being uploaded
+                  example: 'dataset.csv'
+                file_type:
+                  $ref: '#/components/schemas/FileType'
+                file:
+                  type: string
+                  format: binary
+                  description: The content of the file being uploaded
       responses:
         '200':
-          description: Successfully downloaded the fine-tuned model or checkpoint.
+          description: File uploaded successfully
           content:
-            application/octet-stream:
+            application/json:
               schema:
-                type: string
-                format: binary
+                $ref: '#/components/schemas/FileResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
         '400':
-          description: Invalid request parameters.
-        '404':
-          description: Fine-tune ID not found.
-  /fine-tunes/{id}/cancel:
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /fine-tunes:
     post:
       tags: ['Fine-tuning']
-      summary: Cancel job
-      description: Cancel a currently running fine-tuning job. Returns a FinetuneResponseTruncated object.
+      summary: Create job
+      description: Create a fine-tuning job with the provided model and training data.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2300,7 +2321,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.cancel(id="ft-id")
+            response = client.fine_tuning.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                training_file="file-id"
+            )
 
             print(response)
         - lang: Python
@@ -2313,7 +2337,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.cancel(id="ft-id")
+            response = client.fine_tuning.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                training_file="file-id"
+            )
 
             print(response)
         - lang: TypeScript
@@ -2325,7 +2352,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.cancel("ft-id");
+            const response = await client.fineTuning.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+              training_file: "file-id",
+            });
 
             console.log(response);
         - lang: JavaScript
@@ -2337,154 +2367,191 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.cancel("ft-id");
+            const response = await client.fineTuning.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+              training_file: "file-id",
+            });
 
             console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/fine-tunes/ft-id/cancel" \
+            curl -X POST "https://api.together.xyz/v1/fine-tunes" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - in: path
-          name: id
-          schema:
-            type: string
-          required: true
-          description: Fine-tune ID to cancel. A string that starts with `ft-`.
-      responses:
-        '200':
-          description: Successfully cancelled the fine-tuning job.
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuneResponseTruncated'
-        '400':
-          description: Invalid request parameters.
-        '404':
-          description: Fine-tune ID not found.
-  /rerank:
-    post:
-      tags: ['Rerank']
-      summary: Create a rerank request
-      description: Query a reranker model
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            documents = [
-                {
-                    "title": "Llama",
-                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                },
-                {
-                    "title": "Panda",
-                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                },
-                {
-                    "title": "Guanaco",
-                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                },
-                {
-                    "title": "Wild Bactrian camel",
-                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                }
-            ]
-
-            response = client.rerank.create(
-                model="Salesforce/Llama-Rank-v1",
-                query="What animals can I find near Peru?",
-                documents=documents,
-            )
-
-            for result in response.results:
-                print(f"Rank: {result.index + 1}")
-                print(f"Title: {documents[result.index]['title']}")
-                print(f"Text: {documents[result.index]['text']}")
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            documents = [
-                {
-                    "title": "Llama",
-                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                },
-                {
-                    "title": "Panda",
-                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                },
-                {
-                    "title": "Guanaco",
-                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                },
-                {
-                    "title": "Wild Bactrian camel",
-                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                }
-            ]
-
-            response = client.rerank.create(
-                model="Salesforce/Llama-Rank-v1",
-                query="What animals can I find near Peru?",
-                documents=documents,
-            )
-
-            for result in response.results:
-                print(f"Rank: {result.index + 1}")
-                print(f"Title: {documents[result.index]['title']}")
-                print(f"Text: {documents[result.index]['text']}")
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                   "training_file": "file-id"
+                 }'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - training_file
+                - model
+              properties:
+                training_file:
+                  type: string
+                  description: File-ID of a training file uploaded to the Together API
+                validation_file:
+                  type: string
+                  description: File-ID of a validation file uploaded to the Together API
+                model:
+                  type: string
+                  description: Name of the base model to run fine-tune job on
+                n_epochs:
+                  type: integer
+                  default: 1
+                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
+                n_checkpoints:
+                  type: integer
+                  default: 1
+                  description: Number of intermediate model versions saved during training for evaluation
+                n_evals:
+                  type: integer
+                  default: 0
+                  description: Number of evaluations to be run on a given validation set during training
+                batch_size:
+                  oneOf:
+                    - type: integer
+                    - type: string
+                      enum:
+                        - max
+                  default: 'max'
+                  description: Number of training examples processed together (larger batches use more memory but may train faster). Defaults to "max". We use training optimizations like packing, so the effective batch size may be different than the value you set.
+                learning_rate:
+                  type: number
+                  format: float
+                  default: 0.00001
+                  description: Controls how quickly the model adapts to new information (too high may cause instability, too low may slow convergence)
+                lr_scheduler:
+                  type: object
+                  default: none
+                  $ref: '#/components/schemas/LRScheduler'
+                  description: The learning rate scheduler to use. It specifies how the learning rate is adjusted during training.
+                warmup_ratio:
+                  type: number
+                  format: float
+                  default: 0.0
+                  description: The percent of steps at the start of training to linearly increase the learning rate.
+                max_grad_norm:
+                  type: number
+                  format: float
+                  default: 1.0
+                  description: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
+                weight_decay:
+                  type: number
+                  format: float
+                  default: 0.0
+                  description: Weight decay. Regularization parameter for the optimizer.
+                suffix:
+                  type: string
+                  description: Suffix that will be added to your fine-tuned model name
+                wandb_api_key:
+                  type: string
+                  description: Integration key for tracking experiments and model metrics on W&B platform
+                wandb_base_url:
+                  type: string
+                  description: The base URL of a dedicated Weights & Biases instance.
+                wandb_project_name:
+                  type: string
+                  description: The Weights & Biases project for your run. If not specified, will use `together` as the project name.
+                wandb_name:
+                  type: string
+                  description: The Weights & Biases name for your run.
+                train_on_inputs:
+                  oneOf:
+                    - type: boolean
+                    - type: string
+                      enum:
+                        - auto
+                  type: boolean
+                  default: auto
+                  description: Whether to mask the user messages in conversational data or prompts in instruction data.
+                  deprecated: true
+                training_method:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/TrainingMethodSFT'
+                    - $ref: '#/components/schemas/TrainingMethodDPO'
+                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
+                training_type:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/FullTrainingType'
+                    - $ref: '#/components/schemas/LoRATrainingType'
+                from_checkpoint:
+                  type: string
+                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
+                from_hf_model:
+                  type: string
+                  description: The Hugging Face Hub repo to start training from. Should be as close as possible to the base model (specified by the `model` argument) in terms of architecture and size.
+                hf_model_revision:
+                  type: string
+                  description: The revision of the Hugging Face Hub model to continue training from. E.g., hf_model_revision=main (default, used if the argument is not provided) or hf_model_revision='607a30d783dfa663caf39e06633721c8d4cfcd7e' (specific commit).
+                hf_api_token:
+                  type: string
+                  description: The API token for the Hugging Face Hub.
+                hf_output_repo_name:
+                  type: string
+                  description: The name of the Hugging Face repository to upload the fine-tuned model to.
+      responses:
+        '200':
+          description: Fine-tuning job initiated successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FinetuneResponseTruncated'
+    get:
+      tags: ['Fine-tuning']
+      summary: List all jobs
+      description: List the metadata for all fine-tuning jobs. Returns a list of FinetuneResponseTruncated objects.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-            const documents = [{
-              "title": "Llama",
-              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-            },
-            {
-              "title": "Panda",
-              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-            },
-            {
-              "title": "Guanaco",
-              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-            },
-            {
-              "title": "Wild Bactrian camel",
-              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-            }];
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-            const response = await client.rerank.create({
-              model: "Salesforce/Llama-Rank-v1",
-              query: "What animals can I find near Peru?",
-              documents,
+            response = client.fine_tuning.list()
+
+            for fine_tune in response.data:
+                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.fine_tuning.list()
+
+            for fine_tune in response.data:
+                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            for (const result of response.results) {
-              console.log(`Rank: ${result.index + 1}`);
-              console.log(`Title: ${documents[result.index].title}`);
-              console.log(`Text: ${documents[result.index].text}`);
+            const response = await client.fineTuning.list();
+
+            for (const fineTune of response.data) {
+              console.log(fineTune.id, fineTune.status);
             }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
@@ -2495,115 +2562,104 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const documents = [{
-              "title": "Llama",
-              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-            },
-            {
-              "title": "Panda",
-              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-            },
-            {
-              "title": "Guanaco",
-              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-            },
-            {
-              "title": "Wild Bactrian camel",
-              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-            }];
-
-            const response = await client.rerank.create({
-              model: "Salesforce/Llama-Rank-v1",
-              query: "What animals can I find near Peru?",
-              documents,
-            });
+            const response = await client.fineTuning.list();
 
-            for (const result of response.results) {
-              console.log(`Rank: ${result.index + 1}`);
-              console.log(`Title: ${documents[result.index].title}`);
-              console.log(`Text: ${documents[result.index].text}`);
+            for (const fineTune of response.data) {
+              console.log(fineTune.id, fineTune.status);
             }
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/rerank" \
+            curl "https://api.together.xyz/v1/fine-tunes" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "Salesforce/Llama-Rank-v1",
-                   "query": "What animals can I find near Peru?",
-                   "documents": [{
-                      "title": "Llama",
-                      "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                    },
-                    {
-                      "title": "Panda",
-                      "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                    },
-                    {
-                      "title": "Guanaco",
-                      "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                    },
-                    {
-                      "title": "Wild Bactrian camel",
-                      "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                    }]
-                 }'
-      operationId: rerank
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/RerankRequest'
+                 -H "Content-Type: application/json"
       responses:
         '200':
-          description: '200'
+          description: List of fine-tune jobs
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RerankResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                $ref: '#/components/schemas/FinetuneTruncatedList'
+  /fine-tunes/estimate-price:
+    post:
+      tags: ['Fine-tuning']
+      summary: Estimate price
+      description: Estimate the price of a fine-tuning job.
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - training_file
+              properties:
+                training_file:
+                  type: string
+                  description: File-ID of a training file uploaded to the Together API
+                validation_file:
+                  type: string
+                  description: File-ID of a validation file uploaded to the Together API
+                model:
+                  type: string
+                  description: Name of the base model to run fine-tune job on
+                n_epochs:
+                  type: integer
+                  default: 1
+                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
+                n_evals:
+                  type: integer
+                  default: 0
+                  description: Number of evaluations to be run on a given validation set during training
+                training_method:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/TrainingMethodSFT'
+                    - $ref: '#/components/schemas/TrainingMethodDPO'
+                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
+                training_type:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/FullTrainingType'
+                    - $ref: '#/components/schemas/LoRATrainingType'
+                from_checkpoint:
+                  type: string
+                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
+      responses:
+        '500':
+          description: Internal Server Error
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+        '200':
+          description: Price estimated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /audio/speech:
-    post:
-      tags: ['Audio']
-      summary: Create audio generation request
-      description: Generate audio from input text
+                type: object
+                properties:
+                  estimated_total_price:
+                    type: number
+                    description: The price of the fine-tuning job
+                  allowed_to_proceed:
+                    type: boolean
+                    description: Whether the user is allowed to proceed with the fine-tuning job
+                    example: true
+                  user_limit:
+                    type: number
+                    description: The user's credit limit in dollars
+                  estimated_train_token_count:
+                    type: number
+                    description: The estimated number of tokens to be trained
+                  estimated_eval_token_count:
+                    type: number
+                    description: The estimated number of tokens for evaluation
+  /fine-tunes/{id}:
+    get:
+      tags: ['Fine-tuning']
+      summary: List job
+      description: List the metadata for a single fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2616,13 +2672,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.audio.speech.create(
-                model="cartesia/sonic-2",
-                input="The quick brown fox jumps over the lazy dog.",
-                voice="laidback woman",
-            )
+            fine_tune = client.fine_tuning.retrieve(id="ft-id")
 
-            response.stream_to_file("audio.wav")
+            print(fine_tune)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2633,545 +2685,302 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.audio.speech.with_streaming_response.create(
-                model="cartesia/sonic-2",
-                input="The quick brown fox jumps over the lazy dog.",
-                voice="laidback woman",
-            )
+            fine_tune = client.fine_tuning.retrieve(id="ft-id")
 
-            with response as stream:
-              stream.stream_to_file("audio.wav")
+            print(fine_tune)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { createWriteStream } from "fs";
-            import { join } from "path";
-            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.audio.speech.create({
-              model: "cartesia/sonic-2",
-              input: "The quick brown fox jumps over the lazy dog.",
-              voice: "laidback woman",
-            });
-
-            const filepath = join(process.cwd(), "audio.wav");
-            const writeStream = createWriteStream(filepath);
+            const fineTune = await client.fineTuning.retrieve("ft-id");
 
-            if (response.body) {
-              await pipeline(response.body, writeStream);
-            }
+            console.log(fineTune);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { createWriteStream } from "fs";
-            import { join } from "path";
-            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.audio.speech.create({
-              model: "cartesia/sonic-2",
-              input: "The quick brown fox jumps over the lazy dog.",
-              voice: "laidback woman",
-            });
-
-            const filepath = join(process.cwd(), "audio.wav");
-            const writeStream = createWriteStream(filepath);
+            const fineTune = await client.fineTuning.retrieve("ft-id");
 
-            if (response.body) {
-              await pipeline(response.body, writeStream);
-            }
+            console.log(fineTune);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/speech" \
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "cartesia/sonic-2",
-                   "input": "The quick brown fox jumps over the lazy dog.",
-                   "voice": "laidback woman"
-                 }' \
-                 --output audio.wav
-      operationId: audio-speech
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/AudioSpeechRequest'
+                 -H "Content-Type: application/json"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
       responses:
         '200':
-          description: 'OK'
-          content:
-            application/octet-stream:
-              schema:
-                type: string
-                format: binary
-            audio/wav:
-              schema:
-                type: string
-                format: binary
-            audio/mpeg:
-              schema:
-                type: string
-                format: binary
-            text/event-stream:
-              schema:
-                $ref: '#/components/schemas/AudioSpeechStreamResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+          description: Fine-tune job details retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /audio/speech/websocket:
-    get:
-      tags: ['Audio']
-      summary: Real-time text-to-speech via WebSocket
-      description: |
-        Establishes a WebSocket connection for real-time text-to-speech generation. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/audio/speech/websocket) for bidirectional streaming communication.
+                $ref: '#/components/schemas/FinetuneResponse'
+    delete:
+      tags: ['Fine-tuning']
+      summary: Delete a fine-tune job
+      description: Delete a fine-tuning job.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-        **Connection Setup:**
-        - Protocol: WebSocket (wss://)
-        - Authentication: Pass API key as Bearer token in Authorization header
-        - Parameters: Sent as query parameters (model, voice, max_partial_length)
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-        **Client Events:**
-        - `tts_session.updated`: Update session parameters like voice
-          ```json
-          {
-            "type": "tts_session.updated",
-            "session": {
-              "voice": "tara"
-            }
-          }
-          ```
-        - `input_text_buffer.append`: Send text chunks for TTS generation
-          ```json
-          {
-            "type": "input_text_buffer.append",
-            "text": "Hello, this is a test."
-          }
-          ```
-        - `input_text_buffer.clear`: Clear the buffered text
-          ```json
-          {
-            "type": "input_text_buffer.clear"
-          }
-          ```
-        - `input_text_buffer.commit`: Signal end of text input and process remaining text
-          ```json
-          {
-            "type": "input_text_buffer.commit"
-          }
-          ```
-
-        **Server Events:**
-        - `session.created`: Initial session confirmation (sent first)
-          ```json
-          {
-            "event_id": "evt_123456",
-            "type": "session.created",
-            "session": {
-              "id": "session-id",
-              "object": "realtime.tts.session",
-              "modalities": ["text", "audio"],
-              "model": "hexgrad/Kokoro-82M",
-              "voice": "tara"
-            }
-          }
-          ```
-        - `conversation.item.input_text.received`: Acknowledgment that text was received
-          ```json
-          {
-            "type": "conversation.item.input_text.received",
-            "text": "Hello, this is a test."
-          }
-          ```
-        - `conversation.item.audio_output.delta`: Audio chunks as base64-encoded data
-          ```json
-          {
-            "type": "conversation.item.audio_output.delta",
-            "item_id": "tts_1",
-            "delta": "<base64_encoded_audio_chunk>"
-          }
-          ```
-        - `conversation.item.audio_output.done`: Audio generation complete for an item
-          ```json
-          {
-            "type": "conversation.item.audio_output.done",
-            "item_id": "tts_1"
-          }
-          ```
-        - `conversation.item.tts.failed`: Error occurred
-          ```json
-          {
-            "type": "conversation.item.tts.failed",
-            "error": {
-              "message": "Error description",
-              "type": "invalid_request_error",
-              "param": null,
-              "code": "invalid_api_key"
-            }
-          }
-          ```
-
-        **Text Processing:**
-        - Partial text (no sentence ending) is held in buffer until:
-          - We believe that the text is complete enough to be processed for TTS generation
-          - The partial text exceeds `max_partial_length` characters (default: 250)
-          - The `input_text_buffer.commit` event is received
-
-        **Audio Format:**
-        - Format: WAV (PCM s16le)
-        - Sample Rate: 24000 Hz
-        - Encoding: Base64
-        - Delivered via `conversation.item.audio_output.delta` events
-
-        **Error Codes:**
-        - `invalid_api_key`: Invalid API key provided (401)
-        - `missing_api_key`: Authorization header missing (401)
-        - `model_not_available`: Invalid or unavailable model (400)
-        - Invalid text format errors (400)
+            response = client.fine_tuning.delete(id="ft-id")
 
-      operationId: realtime-tts
-      x-codeSamples:
+            print(response)
         - lang: Python
-          label: Python WebSocket Client
+          label: Together AI SDK (v2)
           source: |
-            import asyncio
-            import websockets
-            import json
-            import base64
+            from together import Together
             import os
 
-            async def generate_speech():
-                api_key = os.environ.get("TOGETHER_API_KEY")
-                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara"
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-                headers = {
-                    "Authorization": f"Bearer {api_key}"
-                }
+            response = client.fine_tuning.delete(id="ft-id")
 
-                async with websockets.connect(url, additional_headers=headers) as ws:
-                    # Wait for session created
-                    session_msg = await ws.recv()
-                    session_data = json.loads(session_msg)
-                    print(f"Session created: {session_data['session']['id']}")
+            print(response)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-                    # Send text for TTS
-                    text_chunks = [
-                        "Hello, this is a test.",
-                        "This is the second sentence.",
-                        "And this is the final one."
-                    ]
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-                    async def send_text():
-                        for chunk in text_chunks:
-                            await ws.send(json.dumps({
-                                "type": "input_text_buffer.append",
-                                "text": chunk
-                            }))
-                            await asyncio.sleep(0.5)  # Simulate typing
+            const response = await client.fineTuning.delete("ft-id");
 
-                        # Commit to process any remaining text
-                        await ws.send(json.dumps({
-                            "type": "input_text_buffer.commit"
-                        }))
+            console.log(response);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
 
-                    async def receive_audio():
-                        audio_data = bytearray()
-                        async for message in ws:
-                            data = json.loads(message)
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-                            if data["type"] == "conversation.item.input_text.received":
-                                print(f"Text received: {data['text']}")
-                            elif data["type"] == "conversation.item.audio_output.delta":
-                                # Decode base64 audio chunk
-                                audio_chunk = base64.b64decode(data['delta'])
-                                audio_data.extend(audio_chunk)
-                                print(f"Received audio chunk for item {data['item_id']}")
-                            elif data["type"] == "conversation.item.audio_output.done":
-                                print(f"Audio generation complete for item {data['item_id']}")
-                            elif data["type"] == "conversation.item.tts.failed":
-                                error = data.get("error", {})
-                                print(f"Error: {error.get('message')}")
-                                break
+            const response = await client.fineTuning.delete("ft-id");
 
-                        # Save the audio to a file
-                        with open("output.wav", "wb") as f:
-                            f.write(audio_data)
-                        print("Audio saved to output.wav")
+            console.log(response);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X "DELETE" "https://api.together.xyz/v1/fine-tunes/ft-id?force=false" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+        - name: force
+          in: query
+          schema:
+            type: boolean
+            default: false
+      responses:
+        '200':
+          description: Fine-tune job deleted successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FinetuneDeleteResponse'
+        '404':
+          description: Fine-tune job not found
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /fine-tunes/{id}/events:
+    get:
+      tags: ['Fine-tuning']
+      summary: List job events
+      description: List the events for a single fine-tuning job.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-                    # Run send and receive concurrently
-                    await asyncio.gather(send_text(), receive_audio())
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-            asyncio.run(generate_speech())
-        - lang: JavaScript
-          label: Node.js WebSocket Client
+            events = client.fine_tuning.list_events(id="ft-id")
+
+            print(events)
+        - lang: Python
+          label: Together AI SDK (v2)
           source: |
-            import WebSocket from 'ws';
-            import fs from 'fs';
+            from together import Together
+            import os
 
-            const apiKey = process.env.TOGETHER_API_KEY;
-            const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara';
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-            const ws = new WebSocket(url, {
-              headers: {
-                'Authorization': `Bearer ${apiKey}`
-              }
-            });
+            response = client.fine_tuning.list_events(id="ft-id")
 
-            const audioData = [];
+            for event in response.data:
+                print(event)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-            ws.on('open', () => {
-              console.log('WebSocket connection established!');
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            ws.on('message', (data) => {
-              const message = JSON.parse(data.toString());
+            const events = await client.fineTuning.listEvents("ft-id");
 
-              if (message.type === 'session.created') {
-                console.log(`Session created: ${message.session.id}`);
+            console.log(events);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
 
-                // Send text chunks
-                const textChunks = [
-                  "Hello, this is a test.",
-                  "This is the second sentence.",
-                  "And this is the final one."
-                ];
-
-                textChunks.forEach((text, index) => {
-                  setTimeout(() => {
-                    ws.send(JSON.stringify({
-                      type: 'input_text_buffer.append',
-                      text: text
-                    }));
-                  }, index * 500);
-                });
-
-                // Commit after all chunks
-                setTimeout(() => {
-                  ws.send(JSON.stringify({
-                    type: 'input_text_buffer.commit'
-                  }));
-                }, textChunks.length * 500 + 100);
-
-              } else if (message.type === 'conversation.item.input_text.received') {
-                console.log(`Text received: ${message.text}`);
-              } else if (message.type === 'conversation.item.audio_output.delta') {
-                // Decode base64 audio chunk
-                const audioChunk = Buffer.from(message.delta, 'base64');
-                audioData.push(audioChunk);
-                console.log(`Received audio chunk for item ${message.item_id}`);
-              } else if (message.type === 'conversation.item.audio_output.done') {
-                console.log(`Audio generation complete for item ${message.item_id}`);
-              } else if (message.type === 'conversation.item.tts.failed') {
-                const errorMessage = message.error?.message ?? 'Unknown error';
-                console.error(`Error: ${errorMessage}`);
-                ws.close();
-              }
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            ws.on('close', () => {
-              // Save the audio to a file
-              if (audioData.length > 0) {
-                const completeAudio = Buffer.concat(audioData);
-                fs.writeFileSync('output.wav', completeAudio);
-                console.log('Audio saved to output.wav');
-              }
-            });
+            const events = await client.fineTuning.listEvents("ft-id");
 
-            ws.on('error', (error) => {
-              console.error('WebSocket error:', error);
-            });
+            console.log(events);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id/events" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
       parameters:
-        - in: query
-          name: model
-          required: false
-          schema:
-            type: string
-            enum:
-              - hexgrad/Kokoro-82M
-              - cartesia/sonic-english
-            default: hexgrad/Kokoro-82M
-          description: The TTS model to use for speech generation. Can also be set via `tts_session.updated` event.
-        - in: query
-          name: voice
-          required: false
+        - name: id
+          in: path
+          required: true
           schema:
             type: string
-            default: tara
-          description: |
-            The voice to use for speech generation. Default is 'tara'.
-            Available voices vary by model. Can also be updated via `tts_session.updated` event.
-        - in: query
-          name: max_partial_length
-          required: false
-          schema:
-            type: integer
-            default: 250
-          description: |
-            Maximum number of characters in partial text before forcing TTS generation
-            even without a sentence ending. Helps reduce latency for long text without punctuation.
       responses:
-        '101':
-          description: |
-            Switching Protocols - WebSocket connection established successfully.
-
-            Error message format:
-            ```json
-            {
-              "type": "conversation.item.tts.failed",
-              "error": {
-                "message": "Error description",
-                "type": "invalid_request_error",
-                "param": null,
-                "code": "error_code"
-              }
-            }
-            ```
-  /audio/transcriptions:
-    post:
-      tags: ['Audio']
-      summary: Create audio transcription request
-      description: Transcribes audio into text
+        '200':
+          description: List of fine-tune events
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FinetuneListEvents'
+  /fine-tunes/{id}/checkpoints:
+    get:
+      tags: ['Fine-tuning']
+      summary: List checkpoints
+      description: List the checkpoints for a single fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
           source: |
             # Docs for v2 can be found by changing the above selector ^
             from together import Together
+            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.transcriptions.create(
-                model="openai/whisper-large-v3",
-                file=file,
-            )
+            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
 
-            print(response.text)
+            print(checkpoints)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
             from together import Together
+            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.transcriptions.create(
-                model="openai/whisper-large-v3",
-                file=file,
-            )
+            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
 
-            print(response.text)
+            print(checkpoints)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.transcriptions.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-            });
+            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
 
-            console.log(response.text);
+            console.log(checkpoints);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.transcriptions.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-            });
+            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
 
-            console.log(response.text);
+            console.log(checkpoints);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id/checkpoints" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@audio.wav" \
-                 -F "model=openai/whisper-large-v3"
-      operationId: audio-transcriptions
-      requestBody:
-        required: true
-        content:
-          multipart/form-data:
-            schema:
-              $ref: '#/components/schemas/AudioTranscriptionRequest'
+                 -H "Content-Type: application/json"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
       responses:
         '200':
-          description: 'OK'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/AudioTranscriptionResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+          description: List of fine-tune checkpoints
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /audio/translations:
-    post:
-      tags: ['Audio']
-      summary: Create audio translation request
-      description: Translates audio into English
+                $ref: '#/components/schemas/FinetuneListCheckpoints'
+  /finetune/download:
+    get:
+      tags: ['Fine-tuning']
+      summary: Download model
+      description: Receive a compressed fine-tuned model or checkpoint.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3184,15 +2993,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.translations.create(
-                model="openai/whisper-large-v3",
-                file=file,
-                language="es",
-            )
+            # This will download the content to a location on disk
+            response = client.fine_tuning.download(id="ft-id")
 
-            print(response.text)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3203,113 +3007,101 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
+            # Using `with_streaming_response` gives you control to do what you want with the response.
+            stream = client.fine_tuning.with_streaming_response.content(ft_id="ft-id")
 
-            response = client.audio.translations.create(
-                model="openai/whisper-large-v3",
-                file=file,
-                language="es",
-            )
+            with stream as response:
+                for line in response.iter_lines():
+                    print(line)
 
-            print(response.text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.translations.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-              language: "es"
+            const response = await client.fineTuning.content({
+              ft_id: "ft-id",
             });
 
-            console.log(response.text);
+            console.log(await response.blob());
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.translations.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-              language: "es"
+            const response = await client.fineTuning.content({
+              ft_id: "ft-id",
             });
 
-            console.log(response.text);
+            console.log(await response.blob());
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
+            curl "https://api.together.xyz/v1/finetune/download?ft_id=ft-id&checkpoint=merged"
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@audio.wav" \
-                 -F "model=openai/whisper-large-v3" \
-                 -F "language=es"
-      operationId: audio-translations
-      requestBody:
-        required: true
-        content:
-          multipart/form-data:
-            schema:
-              $ref: '#/components/schemas/AudioTranslationRequest'
+                 -H "Content-Type: application/json"
+      parameters:
+        - in: query
+          name: ft_id
+          schema:
+            type: string
+          required: true
+          description: Fine-tune ID to download. A string that starts with `ft-`.
+        - in: query
+          name: checkpoint_step
+          schema:
+            type: integer
+          required: false
+          description: Specifies step number for checkpoint to download. Ignores `checkpoint` value if set.
+        - in: query
+          name: checkpoint
+          schema:
+            type: string
+            enum:
+              - merged
+              - adapter
+              - model_output_path
+          description: Specifies checkpoint type to download - `merged` vs `adapter`. This field is required if the checkpoint_step is not set.
       responses:
         '200':
-          description: 'OK'
+          description: Successfully downloaded the fine-tuned model or checkpoint.
           content:
-            application/json:
+            application/octet-stream:
               schema:
-                $ref: '#/components/schemas/AudioTranslationResponse'
+                type: string
+                format: binary
         '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-  /clusters/availability-zones:
-    get:
-      tags: ['endpoints']
-      summary: List all available availability zones.
-      description: List all available availability zones.
-      operationId: availabilityZones
-      responses:
-        '200':
-          description: Success
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListAvailibilityZonesResponse'
+          description: Invalid request parameters.
+        '404':
+          description: Fine-tune ID not found.
+  /fine-tunes/{id}/cancel:
+    post:
+      tags: ['Fine-tuning']
+      summary: Cancel job
+      description: Cancel a currently running fine-tuning job. Returns a FinetuneResponseTruncated object.
       x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.fine_tuning.cancel(id="ft-id")
+
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3320,9 +3112,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list_avzones()
+            response = client.fine_tuning.cancel(id="ft-id")
 
-            print(response.avzones)
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3332,9 +3124,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.endpoints.listAvzones();
+            const response = await client.fineTuning.cancel("ft-id");
 
-            console.log(response.avzones);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3344,20 +3136,38 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.endpoints.listAvzones();
+            const response = await client.fineTuning.cancel("ft-id");
 
-            console.log(response.avzones);
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/clusters/availability-zones" \
+            curl -X POST "https://api.together.xyz/v1/fine-tunes/ft-id/cancel" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-  /endpoints:
-    get:
-      tags: ['Endpoints']
-      summary: List all endpoints, can be filtered by type
-      description: Returns a list of all endpoints associated with your account. You can filter the results by type (dedicated or serverless).
+      parameters:
+        - in: path
+          name: id
+          schema:
+            type: string
+          required: true
+          description: Fine-tune ID to cancel. A string that starts with `ft-`.
+      responses:
+        '200':
+          description: Successfully cancelled the fine-tuning job.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FinetuneResponseTruncated'
+        '400':
+          description: Invalid request parameters.
+        '404':
+          description: Fine-tune ID not found.
+  /rerank:
+    post:
+      tags: ['Rerank']
+      summary: Create a rerank request
+      description: Query a reranker model
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3370,10 +3180,35 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoints = client.endpoints.list()
+            documents = [
+                {
+                    "title": "Llama",
+                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                },
+                {
+                    "title": "Panda",
+                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                },
+                {
+                    "title": "Guanaco",
+                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                },
+                {
+                    "title": "Wild Bactrian camel",
+                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                }
+            ]
 
-            for endpoint in endpoints:
-                print(endpoint.id)
+            response = client.rerank.create(
+                model="Salesforce/Llama-Rank-v1",
+                query="What animals can I find near Peru?",
+                documents=documents,
+            )
+
+            for result in response.results:
+                print(f"Rank: {result.index + 1}")
+                print(f"Title: {documents[result.index]['title']}")
+                print(f"Text: {documents[result.index]['text']}")
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3384,10 +3219,35 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list()
+            documents = [
+                {
+                    "title": "Llama",
+                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                },
+                {
+                    "title": "Panda",
+                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                },
+                {
+                    "title": "Guanaco",
+                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                },
+                {
+                    "title": "Wild Bactrian camel",
+                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                }
+            ]
 
-            for endpoint in response.data:
-                print(endpoint.id)
+            response = client.rerank.create(
+                model="Salesforce/Llama-Rank-v1",
+                query="What animals can I find near Peru?",
+                documents=documents,
+            )
+
+            for result in response.results:
+                print(f"Rank: {result.index + 1}")
+                print(f"Title: {documents[result.index]['title']}")
+                print(f"Text: {documents[result.index]['text']}")
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3397,10 +3257,33 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoints = await client.endpoints.list();
+            const documents = [{
+              "title": "Llama",
+              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+            },
+            {
+              "title": "Panda",
+              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+            },
+            {
+              "title": "Guanaco",
+              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+            },
+            {
+              "title": "Wild Bactrian camel",
+              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+            }];
 
-            for (const endpoint of endpoints.data) {
-              console.log(endpoint);
+            const response = await client.rerank.create({
+              model: "Salesforce/Llama-Rank-v1",
+              query: "What animals can I find near Peru?",
+              documents,
+            });
+
+            for (const result of response.results) {
+              console.log(`Rank: ${result.index + 1}`);
+              console.log(`Title: ${documents[result.index].title}`);
+              console.log(`Text: ${documents[result.index].text}`);
             }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
@@ -3411,91 +3294,115 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoints = await client.endpoints.list();
+            const documents = [{
+              "title": "Llama",
+              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+            },
+            {
+              "title": "Panda",
+              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+            },
+            {
+              "title": "Guanaco",
+              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+            },
+            {
+              "title": "Wild Bactrian camel",
+              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+            }];
 
-            for (const endpoint of endpoints.data) {
-              console.log(endpoint);
-            }
-        - lang: Shell
-          label: cURL
+            const response = await client.rerank.create({
+              model: "Salesforce/Llama-Rank-v1",
+              query: "What animals can I find near Peru?",
+              documents,
+            });
+
+            for (const result of response.results) {
+              console.log(`Rank: ${result.index + 1}`);
+              console.log(`Title: ${documents[result.index].title}`);
+              console.log(`Text: ${documents[result.index].text}`);
+            }
+        - lang: Shell
+          label: cURL
           source: |
-            curl "https://api.together.xyz/v1/endpoints" \
+            curl -X POST "https://api.together.xyz/v1/rerank" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: listEndpoints
-      parameters:
-        - name: type
-          in: query
-          required: false
-          schema:
-            type: string
-            enum:
-              - dedicated
-              - serverless
-          description: Filter endpoints by type
-          example: dedicated
-        - name: usage_type
-          in: query
-          required: false
-          schema:
-            type: string
-            enum:
-              - on-demand
-              - reserved
-          description: Filter endpoints by usage type
-          example: on-demand
-        - name: mine
-          in: query
-          required: false
-          schema:
-            type: boolean
-          description: If true, return only endpoints owned by the caller
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "Salesforce/Llama-Rank-v1",
+                   "query": "What animals can I find near Peru?",
+                   "documents": [{
+                      "title": "Llama",
+                      "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                    },
+                    {
+                      "title": "Panda",
+                      "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                    },
+                    {
+                      "title": "Guanaco",
+                      "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                    },
+                    {
+                      "title": "Wild Bactrian camel",
+                      "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                    }]
+                 }'
+      operationId: rerank
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RerankRequest'
       responses:
         '200':
           description: '200'
           content:
             application/json:
               schema:
-                type: object
-                required:
-                  - object
-                  - data
-                properties:
-                  object:
-                    type: string
-                    enum:
-                      - list
-                  data:
-                    type: array
-                    items:
-                      $ref: '#/components/schemas/ListEndpoint'
-                example:
-                  object: 'list'
-                  data:
-                    - object: 'endpoint'
-                      id: 'endpoint-5c0c20db-62fe-4f41-8ffc-d9e4ea1a264e'
-                      name: 'allenai/OLMo-7B'
-                      model: 'allenai/OLMo-7B'
-                      type: 'serverless'
-                      owner: 'together'
-                      state: 'STARTED'
-                      created_at: '2024-02-28T21:34:35.444Z'
-        '403':
+                $ref: '#/components/schemas/RerankResponse'
+        '400':
+          description: 'BadRequest'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
           description: 'Unauthorized'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /audio/speech:
     post:
-      tags: ['Endpoints']
-      summary: Create a dedicated endpoint, it will start automatically
-      description: Creates a new dedicated endpoint for serving models. The endpoint will automatically start after creation. You can deploy any supported model on hardware configurations that meet the model's requirements.
+      tags: ['Audio']
+      summary: Create audio generation request
+      description: Generate audio from input text
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3508,14 +3415,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                hardware="1x_nvidia_a100_80gb_sxm",
-                min_replicas=2,
-                max_replicas=5,
+            response = client.audio.speech.create(
+                model="cartesia/sonic-2",
+                input="The quick brown fox jumps over the lazy dog.",
+                voice="laidback woman",
             )
 
-            print(endpoint.id)
+            response.stream_to_file("audio.wav")
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3526,402 +3432,545 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                hardware="1x_nvidia_a100_80gb_sxm",
-                autoscaling={
-                  "min_replicas": 2,
-                  "max_replicas": 5,
-                }
+            response = client.audio.speech.with_streaming_response.create(
+                model="cartesia/sonic-2",
+                input="The quick brown fox jumps over the lazy dog.",
+                voice="laidback woman",
             )
 
-            print(endpoint.id)
+            with response as stream:
+              stream.stream_to_file("audio.wav")
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { createWriteStream } from "fs";
+            import { join } from "path";
+            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              hardware: "1x_nvidia_a100_80gb_sxm",
-              autoscaling: {
-                max_replicas: 5,
-                min_replicas: 2,
-              }
+            const response = await client.audio.speech.create({
+              model: "cartesia/sonic-2",
+              input: "The quick brown fox jumps over the lazy dog.",
+              voice: "laidback woman",
             });
 
-            console.log(endpoint.id);
+            const filepath = join(process.cwd(), "audio.wav");
+            const writeStream = createWriteStream(filepath);
+
+            if (response.body) {
+              await pipeline(response.body, writeStream);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
+            import { createWriteStream } from "fs";
+            import { join } from "path";
+            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              hardware: "1x_nvidia_a100_80gb_sxm",
-              autoscaling: {
-                max_replicas: 5,
-                min_replicas: 2,
-              }
+            const response = await client.audio.speech.create({
+              model: "cartesia/sonic-2",
+              input: "The quick brown fox jumps over the lazy dog.",
+              voice: "laidback woman",
             });
 
-            console.log(endpoint.id);
+            const filepath = join(process.cwd(), "audio.wav");
+            const writeStream = createWriteStream(filepath);
+
+            if (response.body) {
+              await pipeline(response.body, writeStream);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/endpoints" \
+            curl -X POST "https://api.together.xyz/v1/audio/speech" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "hardware": "1x_nvidia_a100_80gb_sxm",
-                   "autoscaling": {
-                     "max_replicas": 5,
-                     "min_replicas": 2
-                   }
-                 }'
-      operationId: createEndpoint
+                   "model": "cartesia/sonic-2",
+                   "input": "The quick brown fox jumps over the lazy dog.",
+                   "voice": "laidback woman"
+                 }' \
+                 --output audio.wav
+      operationId: audio-speech
       requestBody:
-        required: true
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateEndpointRequest'
+              $ref: '#/components/schemas/AudioSpeechRequest'
       responses:
         '200':
-          description: '200'
+          description: 'OK'
           content:
-            application/json:
+            application/octet-stream:
               schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
-        '403':
-          description: 'Unauthorized'
+                type: string
+                format: binary
+            audio/wav:
+              schema:
+                type: string
+                format: binary
+            audio/mpeg:
+              schema:
+                type: string
+                format: binary
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/AudioSpeechStreamResponse'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-
-  /endpoints/{endpointId}:
+  /audio/speech/websocket:
     get:
-      tags: ['Endpoints']
-      summary: Get endpoint by ID
-      description: Retrieves details about a specific endpoint, including its current state, configuration, and scaling settings.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            endpoint = client.endpoints.get("endpoint-id")
-
-            print(endpoint.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            endpoint = client.endpoints.retrieve("endpoint-id")
-
-            print(endpoint.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
+      tags: ['Audio']
+      summary: Real-time text-to-speech via WebSocket
+      description: |
+        Establishes a WebSocket connection for real-time text-to-speech generation. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/audio/speech/websocket) for bidirectional streaming communication.
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+        **Connection Setup:**
+        - Protocol: WebSocket (wss://)
+        - Authentication: Pass API key as Bearer token in Authorization header
+        - Parameters: Sent as query parameters (model, voice, max_partial_length)
 
-            const endpoint = await client.endpoints.retrieve("endpoint-id");
+        **Client Events:**
+        - `tts_session.updated`: Update session parameters like voice
+          ```json
+          {
+            "type": "tts_session.updated",
+            "session": {
+              "voice": "tara"
+            }
+          }
+          ```
+        - `input_text_buffer.append`: Send text chunks for TTS generation
+          ```json
+          {
+            "type": "input_text_buffer.append",
+            "text": "Hello, this is a test."
+          }
+          ```
+        - `input_text_buffer.clear`: Clear the buffered text
+          ```json
+          {
+            "type": "input_text_buffer.clear"
+          }
+          ```
+        - `input_text_buffer.commit`: Signal end of text input and process remaining text
+          ```json
+          {
+            "type": "input_text_buffer.commit"
+          }
+          ```
 
-            console.log(endpoint);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
+        **Server Events:**
+        - `session.created`: Initial session confirmation (sent first)
+          ```json
+          {
+            "event_id": "evt_123456",
+            "type": "session.created",
+            "session": {
+              "id": "session-id",
+              "object": "realtime.tts.session",
+              "modalities": ["text", "audio"],
+              "model": "hexgrad/Kokoro-82M",
+              "voice": "tara"
+            }
+          }
+          ```
+        - `conversation.item.input_text.received`: Acknowledgment that text was received
+          ```json
+          {
+            "type": "conversation.item.input_text.received",
+            "text": "Hello, this is a test."
+          }
+          ```
+        - `conversation.item.audio_output.delta`: Audio chunks as base64-encoded data
+          ```json
+          {
+            "type": "conversation.item.audio_output.delta",
+            "item_id": "tts_1",
+            "delta": "<base64_encoded_audio_chunk>"
+          }
+          ```
+        - `conversation.item.audio_output.done`: Audio generation complete for an item
+          ```json
+          {
+            "type": "conversation.item.audio_output.done",
+            "item_id": "tts_1"
+          }
+          ```
+        - `conversation.item.tts.failed`: Error occurred
+          ```json
+          {
+            "type": "conversation.item.tts.failed",
+            "error": {
+              "message": "Error description",
+              "type": "invalid_request_error",
+              "param": null,
+              "code": "invalid_api_key"
+            }
+          }
+          ```
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+        **Text Processing:**
+        - Partial text (no sentence ending) is held in buffer until:
+          - We believe that the text is complete enough to be processed for TTS generation
+          - The partial text exceeds `max_partial_length` characters (default: 250)
+          - The `input_text_buffer.commit` event is received
 
-            const endpoint = await client.endpoints.retrieve("endpoint-id");
+        **Audio Format:**
+        - Format: WAV (PCM s16le)
+        - Sample Rate: 24000 Hz
+        - Encoding: Base64
+        - Delivered via `conversation.item.audio_output.delta` events
 
-            console.log(endpoint);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/endpoints/endpoint-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: getEndpoint
-      parameters:
-        - name: endpointId
-          in: path
-          required: true
-          schema:
-            type: string
-          description: The ID of the endpoint to retrieve
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
-      responses:
-        '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
-        '403':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'Not Found'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
+        **Error Codes:**
+        - `invalid_api_key`: Invalid API key provided (401)
+        - `missing_api_key`: Authorization header missing (401)
+        - `model_not_available`: Invalid or unavailable model (400)
+        - Invalid text format errors (400)
 
-    patch:
-      tags: ['Endpoints']
-      summary: Update endpoint, this can also be used to start or stop a dedicated endpoint
-      description: Updates an existing endpoint's configuration. You can modify the display name, autoscaling settings, or change the endpoint's state (start/stop).
+      operationId: realtime-tts
       x-codeSamples:
         - lang: Python
-          label: Together AI SDK (v1)
+          label: Python WebSocket Client
           source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
+            import asyncio
+            import websockets
+            import json
+            import base64
             import os
 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
+            async def generate_speech():
+                api_key = os.environ.get("TOGETHER_API_KEY")
+                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara"
 
-            endpoint = client.endpoints.update(
-                endpoint_id="endpoint-id",
-                state="STOPPED"
-            )
+                headers = {
+                    "Authorization": f"Bearer {api_key}"
+                }
 
-            print(endpoint)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
+                async with websockets.connect(url, additional_headers=headers) as ws:
+                    # Wait for session created
+                    session_msg = await ws.recv()
+                    session_data = json.loads(session_msg)
+                    print(f"Session created: {session_data['session']['id']}")
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+                    # Send text for TTS
+                    text_chunks = [
+                        "Hello, this is a test.",
+                        "This is the second sentence.",
+                        "And this is the final one."
+                    ]
 
-            const endpoint = await client.endpoints.update("endpoint-id", {
-              state: "STOPPED"
-            });
+                    async def send_text():
+                        for chunk in text_chunks:
+                            await ws.send(json.dumps({
+                                "type": "input_text_buffer.append",
+                                "text": chunk
+                            }))
+                            await asyncio.sleep(0.5)  # Simulate typing
 
-            console.log(endpoint);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
+                        # Commit to process any remaining text
+                        await ws.send(json.dumps({
+                            "type": "input_text_buffer.commit"
+                        }))
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+                    async def receive_audio():
+                        audio_data = bytearray()
+                        async for message in ws:
+                            data = json.loads(message)
 
-            const endpoint = await client.endpoints.update("endpoint-id", {
-              state: "STOPPED"
-            });
+                            if data["type"] == "conversation.item.input_text.received":
+                                print(f"Text received: {data['text']}")
+                            elif data["type"] == "conversation.item.audio_output.delta":
+                                # Decode base64 audio chunk
+                                audio_chunk = base64.b64decode(data['delta'])
+                                audio_data.extend(audio_chunk)
+                                print(f"Received audio chunk for item {data['item_id']}")
+                            elif data["type"] == "conversation.item.audio_output.done":
+                                print(f"Audio generation complete for item {data['item_id']}")
+                            elif data["type"] == "conversation.item.tts.failed":
+                                error = data.get("error", {})
+                                print(f"Error: {error.get('message')}")
+                                break
 
-            console.log(endpoint);
-        - lang: Shell
-          label: cURL
+                        # Save the audio to a file
+                        with open("output.wav", "wb") as f:
+                            f.write(audio_data)
+                        print("Audio saved to output.wav")
+
+                    # Run send and receive concurrently
+                    await asyncio.gather(send_text(), receive_audio())
+
+            asyncio.run(generate_speech())
+        - lang: JavaScript
+          label: Node.js WebSocket Client
           source: |
-            curl -X PATCH "https://api.together.xyz/v1/endpoints/endpoint-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "state": "STOPPED"
-                 }'
-      operationId: updateEndpoint
+            import WebSocket from 'ws';
+            import fs from 'fs';
+
+            const apiKey = process.env.TOGETHER_API_KEY;
+            const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara';
+
+            const ws = new WebSocket(url, {
+              headers: {
+                'Authorization': `Bearer ${apiKey}`
+              }
+            });
+
+            const audioData = [];
+
+            ws.on('open', () => {
+              console.log('WebSocket connection established!');
+            });
+
+            ws.on('message', (data) => {
+              const message = JSON.parse(data.toString());
+
+              if (message.type === 'session.created') {
+                console.log(`Session created: ${message.session.id}`);
+
+                // Send text chunks
+                const textChunks = [
+                  "Hello, this is a test.",
+                  "This is the second sentence.",
+                  "And this is the final one."
+                ];
+
+                textChunks.forEach((text, index) => {
+                  setTimeout(() => {
+                    ws.send(JSON.stringify({
+                      type: 'input_text_buffer.append',
+                      text: text
+                    }));
+                  }, index * 500);
+                });
+
+                // Commit after all chunks
+                setTimeout(() => {
+                  ws.send(JSON.stringify({
+                    type: 'input_text_buffer.commit'
+                  }));
+                }, textChunks.length * 500 + 100);
+
+              } else if (message.type === 'conversation.item.input_text.received') {
+                console.log(`Text received: ${message.text}`);
+              } else if (message.type === 'conversation.item.audio_output.delta') {
+                // Decode base64 audio chunk
+                const audioChunk = Buffer.from(message.delta, 'base64');
+                audioData.push(audioChunk);
+                console.log(`Received audio chunk for item ${message.item_id}`);
+              } else if (message.type === 'conversation.item.audio_output.done') {
+                console.log(`Audio generation complete for item ${message.item_id}`);
+              } else if (message.type === 'conversation.item.tts.failed') {
+                const errorMessage = message.error?.message ?? 'Unknown error';
+                console.error(`Error: ${errorMessage}`);
+                ws.close();
+              }
+            });
+
+            ws.on('close', () => {
+              // Save the audio to a file
+              if (audioData.length > 0) {
+                const completeAudio = Buffer.concat(audioData);
+                fs.writeFileSync('output.wav', completeAudio);
+                console.log('Audio saved to output.wav');
+              }
+            });
+
+            ws.on('error', (error) => {
+              console.error('WebSocket error:', error);
+            });
       parameters:
-        - name: endpointId
-          in: path
-          required: true
+        - in: query
+          name: model
+          required: false
           schema:
             type: string
-          description: The ID of the endpoint to update
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              properties:
-                display_name:
-                  type: string
-                  description: A human-readable name for the endpoint
-                  example: My Llama3 70b endpoint
-                state:
-                  type: string
-                  description: The desired state of the endpoint
-                  enum:
-                    - STARTED
-                    - STOPPED
-                  example: STARTED
-                autoscaling:
-                  $ref: '#/components/schemas/Autoscaling'
-                  description: New autoscaling configuration for the endpoint
-                inactive_timeout:
-                  type: integer
-                  description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
-                  nullable: true
-                  example: 60
+            enum:
+              - hexgrad/Kokoro-82M
+              - cartesia/sonic-english
+            default: hexgrad/Kokoro-82M
+          description: The TTS model to use for speech generation. Can also be set via `tts_session.updated` event.
+        - in: query
+          name: voice
+          required: false
+          schema:
+            type: string
+            default: tara
+          description: |
+            The voice to use for speech generation. Default is 'tara'.
+            Available voices vary by model. Can also be updated via `tts_session.updated` event.
+        - in: query
+          name: max_partial_length
+          required: false
+          schema:
+            type: integer
+            default: 250
+          description: |
+            Maximum number of characters in partial text before forcing TTS generation
+            even without a sentence ending. Helps reduce latency for long text without punctuation.
       responses:
-        '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
-        '403':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'Not Found'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
+        '101':
+          description: |
+            Switching Protocols - WebSocket connection established successfully.
 
-    delete:
-      tags: ['Endpoints']
-      summary: Delete endpoint
-      description: Permanently deletes an endpoint. This action cannot be undone.
+            Error message format:
+            ```json
+            {
+              "type": "conversation.item.tts.failed",
+              "error": {
+                "message": "Error description",
+                "type": "invalid_request_error",
+                "param": null,
+                "code": "error_code"
+              }
+            }
+            ```
+  /audio/transcriptions:
+    post:
+      tags: ['Audio']
+      summary: Create audio transcription request
+      description: Transcribes audio into text
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
           source: |
             # Docs for v2 can be found by changing the above selector ^
             from together import Together
-            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.delete(
-                endpoint_id="endpoint-id",
+            file = open("audio.wav", "rb")
+
+            response = client.audio.transcriptions.create(
+                model="openai/whisper-large-v3",
+                file=file,
             )
 
-            print(endpoint)
+            print(response.text)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            file = open("audio.wav", "rb")
+
+            response = client.audio.transcriptions.create(
+                model="openai/whisper-large-v3",
+                file=file,
+            )
+
+            print(response.text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.delete("endpoint-id");
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            console.log(endpoint);
+            const response = await client.audio.transcriptions.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+            });
+
+            console.log(response.text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-
+            import { readFileSync } from "fs";
+            import { join } from "path";
+
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.delete("endpoint-id");
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            console.log(endpoint);
+            const response = await client.audio.transcriptions.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+            });
+
+            console.log(response.text);
         - lang: Shell
           label: cURL
           source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/endpoints/endpoint-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY"
-      operationId: deleteEndpoint
-      parameters:
-        - name: endpointId
-          in: path
-          required: true
-          schema:
-            type: string
-          description: The ID of the endpoint to delete
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -F "file=@audio.wav" \
+                 -F "model=openai/whisper-large-v3"
+      operationId: audio-transcriptions
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/AudioTranscriptionRequest'
       responses:
-        '204':
-          description: 'No Content - Endpoint successfully deleted'
-        '403':
-          description: 'Unauthorized'
+        '200':
+          description: 'OK'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AudioTranscriptionResponse'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'Not Found'
+        '401':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-
-  /hardware:
-    get:
-      tags: ['Hardware']
-      summary: List available hardware configurations
-      description: >
-        Returns a list of available hardware configurations for deploying models.
-        When a model parameter is provided, it returns only hardware configurations compatible
-        with that model, including their current availability status.
+  /audio/translations:
+    post:
+      tags: ['Audio']
+      summary: Create audio translation request
+      description: Translates audio into English
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3934,10 +3983,15 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list_hardware()
+            file = open("audio.wav", "rb")
 
-            for hardware in response:
-                print(hardware.id)
+            response = client.audio.translations.create(
+                model="openai/whisper-large-v3",
+                file=file,
+                language="es",
+            )
+
+            print(response.text)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3948,107 +4002,113 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.hardware.list()
+            file = open("audio.wav", "rb")
 
-            for hardware in response.data:
-                print(hardware.id)
+            response = client.audio.translations.create(
+                model="openai/whisper-large-v3",
+                file=file,
+                language="es",
+            )
+
+            print(response.text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const hardware = await client.hardware.list();
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            console.log(hardware);
+            const response = await client.audio.translations.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+              language: "es"
+            });
+
+            console.log(response.text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const hardware = await client.hardware.list();
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            console.log(hardware);
+            const response = await client.audio.translations.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+              language: "es"
+            });
+
+            console.log(response.text);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/hardware" \
+            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: listHardware
-      parameters:
-        - name: model
-          in: query
-          required: false
-          schema:
-            type: string
-          description: >
-            Filter hardware configurations by model compatibility. When provided,
-            the response includes availability status for each compatible configuration.
-          example: meta-llama/Llama-3-70b-chat-hf
+                 -F "file=@audio.wav" \
+                 -F "model=openai/whisper-large-v3" \
+                 -F "language=es"
+      operationId: audio-translations
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/AudioTranslationRequest'
       responses:
         '200':
-          description: 'List of available hardware configurations'
+          description: 'OK'
           content:
             application/json:
               schema:
-                type: object
-                required:
-                  - object
-                  - data
-                properties:
-                  object:
-                    type: string
-                    enum:
-                      - list
-                  data:
-                    type: array
-                    items:
-                      $ref: '#/components/schemas/HardwareWithStatus'
-        '403':
+                $ref: '#/components/schemas/AudioTranslationResponse'
+        '400':
+          description: 'BadRequest'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
           description: 'Unauthorized'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /tci/execute:
-    post:
-      tags: ['Code Interpreter']
-      callbacks: {}
-      description: |
-        Executes the given code snippet and returns the output. Without a session_id, a new session will be created to run the code. If you do pass in a valid session_id, the code will be run in that session. This is useful for running multiple code snippets in the same environment, because dependencies and similar things are persisted
-        between calls to the same session.
+  /clusters/availability-zones:
+    get:
+      tags: ['endpoints']
+      summary: List all available availability zones.
+      description: List all available availability zones.
+      operationId: availabilityZones
+      responses:
+        '200':
+          description: Success
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListAvailibilityZonesResponse'
       x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.code_interpreter.run(
-                code="print('Hello world!')",
-                language="python",
-            )
-
-            print(response.data.outputs[0].data);
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4059,12 +4119,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.code_interpreter.execute(
-                code="print('Hello world!')",
-                language="python",
-            )
+            response = client.endpoints.list_avzones()
 
-            print(response.data.outputs[0].data);
+            print(response.avzones)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4074,12 +4131,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.codeInterpreter.execute({
-              code: "print('Hello world!')",
-              language: "python"
-            });
+            const response = await client.endpoints.listAvzones();
 
-            console.log(response.data?.outputs?.[0]?.data);
+            console.log(response.avzones);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4089,50 +4143,36 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.codeInterpreter.execute({
-              code: "print('Hello world!')",
-              language: "python"
-            });
+            const response = await client.endpoints.listAvzones();
 
-            console.log(response.data?.outputs?.[0]?.data);
+            console.log(response.avzones);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/tci/execute" \
+            curl "https://api.together.xyz/v1/clusters/availability-zones" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "code": "print(\'Hello world!\')",
-                   "language": "python"
-                 }'
-      operationId: tci/execute
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ExecuteRequest'
-        description: Execute Request
-        required: false
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ExecuteResponse'
-          description: Execute Response
-  /tci/sessions:
+                 -H "Content-Type: application/json"
+  /endpoints:
     get:
-      tags: ['Code Interpreter']
-      callbacks: {}
-      description: |
-        Lists all your currently active sessions.
+      tags: ['Endpoints']
+      summary: List all endpoints, can be filtered by type
+      description: Returns a list of all endpoints associated with your account. You can filter the results by type (dedicated or serverless).
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
           source: |
             # Docs for v2 can be found by changing the above selector ^
-            # together v1 does not support this method
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            endpoints = client.endpoints.list()
+
+            for endpoint in endpoints:
+                print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4143,10 +4183,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.code_interpreter.sessions.list()
+            response = client.endpoints.list()
 
-            for session in response.data.sessions:
-                print(session.id)
+            for endpoint in response.data:
+                print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4156,10 +4196,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.codeInterpreter.sessions.list();
+            const endpoints = await client.endpoints.list();
 
-            for (const session of response.data?.sessions) {
-              console.log(session.id);
+            for (const endpoint of endpoints.data) {
+              console.log(endpoint);
             }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
@@ -4170,31 +4210,91 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.codeInterpreter.sessions.list();
+            const endpoints = await client.endpoints.list();
 
-            for (const session of response.data?.sessions) {
-              console.log(session.id);
+            for (const endpoint of endpoints.data) {
+              console.log(endpoint);
             }
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/tci/sessions" \
+            curl "https://api.together.xyz/v1/endpoints" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-      operationId: sessions/list
-      parameters: []
+      operationId: listEndpoints
+      parameters:
+        - name: type
+          in: query
+          required: false
+          schema:
+            type: string
+            enum:
+              - dedicated
+              - serverless
+          description: Filter endpoints by type
+          example: dedicated
+        - name: usage_type
+          in: query
+          required: false
+          schema:
+            type: string
+            enum:
+              - on-demand
+              - reserved
+          description: Filter endpoints by usage type
+          example: on-demand
+        - name: mine
+          in: query
+          required: false
+          schema:
+            type: boolean
+          description: If true, return only endpoints owned by the caller
       responses:
         '200':
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/SessionListResponse'
-          description: List Response
-  /batches:
-    get:
-      tags: ['Batches']
-      summary: List batch jobs
-      description: List all batch jobs for the authenticated user
+                type: object
+                required:
+                  - object
+                  - data
+                properties:
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  data:
+                    type: array
+                    items:
+                      $ref: '#/components/schemas/ListEndpoint'
+                example:
+                  object: 'list'
+                  data:
+                    - object: 'endpoint'
+                      id: 'endpoint-5c0c20db-62fe-4f41-8ffc-d9e4ea1a264e'
+                      name: 'allenai/OLMo-7B'
+                      model: 'allenai/OLMo-7B'
+                      type: 'serverless'
+                      owner: 'together'
+                      state: 'STARTED'
+                      created_at: '2024-02-28T21:34:35.444Z'
+        '403':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+    post:
+      tags: ['Endpoints']
+      summary: Create a dedicated endpoint, it will start automatically
+      description: Creates a new dedicated endpoint for serving models. The endpoint will automatically start after creation. You can deploy any supported model on hardware configurations that meet the model's requirements.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4207,10 +4307,14 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batches = client.batches.list_batches()
+            endpoint = client.endpoints.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                hardware="1x_nvidia_a100_80gb_sxm",
+                min_replicas=2,
+                max_replicas=5,
+            )
 
-            for batch in batches:
-                print(batch.id)
+            print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4221,10 +4325,16 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batches = client.batches.list()
+            endpoint = client.endpoints.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                hardware="1x_nvidia_a100_80gb_sxm",
+                autoscaling={
+                  "min_replicas": 2,
+                  "max_replicas": 5,
+                }
+            )
 
-            for batch in batches:
-                print(batch.id)
+            print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4234,9 +4344,16 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batches = await client.batches.list();
+            const endpoint = await client.endpoints.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              hardware: "1x_nvidia_a100_80gb_sxm",
+              autoscaling: {
+                max_replicas: 5,
+                min_replicas: 2,
+              }
+            });
 
-            console.log(batches);
+            console.log(endpoint.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4246,42 +4363,62 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batches = await client.batches.list();
+            const endpoint = await client.endpoints.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              hardware: "1x_nvidia_a100_80gb_sxm",
+              autoscaling: {
+                max_replicas: 5,
+                min_replicas: 2,
+              }
+            });
 
-            console.log(batches);
+            console.log(endpoint.id);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/batches" \
+            curl -X POST "https://api.together.xyz/v1/endpoints" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "hardware": "1x_nvidia_a100_80gb_sxm",
+                   "autoscaling": {
+                     "max_replicas": 5,
+                     "min_replicas": 2
+                   }
+                 }'
+      operationId: createEndpoint
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateEndpointRequest'
       responses:
         '200':
-          description: OK
+          description: '200'
           content:
             application/json:
               schema:
-                type: array
-                items:
-                  $ref: '#/components/schemas/BatchJob'
-        '401':
-          description: Unauthorized
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '500':
-          description: Internal Server Error
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-    post:
-      tags: ['Batches']
-      summary: Create a batch job
-      description: Create a new batch job with the given input file and endpoint
+                $ref: '#/components/schemas/ErrorData'
+
+  /endpoints/{endpointId}:
+    get:
+      tags: ['Endpoints']
+      summary: Get endpoint by ID
+      description: Retrieves details about a specific endpoint, including its current state, configuration, and scaling settings.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4294,9 +4431,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.create_batch("file_id", endpoint="/v1/chat/completions")
+            endpoint = client.endpoints.get("endpoint-id")
 
-            print(batch.id)
+            print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4307,9 +4444,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.create(input_file_id="file_id", endpoint="/v1/chat/completions")
+            endpoint = client.endpoints.retrieve("endpoint-id")
 
-            print(batch.job)
+            print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4319,12 +4456,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.create({
-              endpoint: "/v1/chat/completions",
-              input_file_id: "file-id",
-            });
+            const endpoint = await client.endpoints.retrieve("endpoint-id");
 
-            console.log(batch);
+            console.log(endpoint);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4334,67 +4468,54 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.create({
-              endpoint: "/v1/chat/completions",
-              input_file_id: "file-id",
-            });
+            const endpoint = await client.endpoints.retrieve("endpoint-id");
 
-            console.log(batch);
+            console.log(endpoint);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/batches" \
+            curl "https://api.together.xyz/v1/endpoints/endpoint-id" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "endpoint": "/v1/chat/completions",
-                   "input_file_id": "file-id"
-                 }'
-      security:
-        - bearerAuth: []
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CreateBatchRequest'
+                 -H "Content-Type: application/json"
+      operationId: getEndpoint
+      parameters:
+        - name: endpointId
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the endpoint to retrieve
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
       responses:
-        '201':
-          description: Job created (potentially with warnings)
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchJobWithWarning'
-        '400':
-          description: Bad Request
+        '200':
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '401':
-          description: Unauthorized
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '429':
-          description: Too Many Requests
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'Not Found'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '500':
-          description: Internal Server Error
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
 
-  /batches/{id}:
-    get:
-      tags: ['Batches']
-      summary: Get a batch job
-      description: Get details of a batch job by ID
+    patch:
+      tags: ['Endpoints']
+      summary: Update endpoint, this can also be used to start or stop a dedicated endpoint
+      description: Updates an existing endpoint's configuration. You can modify the display name, autoscaling settings, or change the endpoint's state (start/stop).
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4407,22 +4528,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.get_batch("batch_id")
-
-            print(batch)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
+            endpoint = client.endpoints.update(
+                endpoint_id="endpoint-id",
+                state="STOPPED"
             )
 
-            batch = client.batches.retrieve("batch_id")
-
-            print(batch)
+            print(endpoint)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4432,9 +4543,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.retrieve("batch-id");
+            const endpoint = await client.endpoints.update("endpoint-id", {
+              state: "STOPPED"
+            });
 
-            console.log(batch);
+            console.log(endpoint);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4444,67 +4557,85 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.retrieve("batch-id");
+            const endpoint = await client.endpoints.update("endpoint-id", {
+              state: "STOPPED"
+            });
 
-            console.log(batch);
+            console.log(endpoint);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/batches/ID" \
+            curl -X PATCH "https://api.together.xyz/v1/endpoints/endpoint-id" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "state": "STOPPED"
+                 }'
+      operationId: updateEndpoint
       parameters:
-        - name: id
+        - name: endpointId
           in: path
           required: true
-          description: Job ID
           schema:
             type: string
-          example: 'batch_job_abc123def456'
+          description: The ID of the endpoint to update
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                display_name:
+                  type: string
+                  description: A human-readable name for the endpoint
+                  example: My Llama3 70b endpoint
+                state:
+                  type: string
+                  description: The desired state of the endpoint
+                  enum:
+                    - STARTED
+                    - STOPPED
+                  example: STARTED
+                autoscaling:
+                  $ref: '#/components/schemas/Autoscaling'
+                  description: New autoscaling configuration for the endpoint
+                inactive_timeout:
+                  type: integer
+                  description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
+                  nullable: true
+                  example: 60
       responses:
         '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchJob'
-        '400':
-          description: Bad Request
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '401':
-          description: Unauthorized
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/DedicatedEndpoint'
         '403':
-          description: Forbidden
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '404':
-          description: Not Found
+          description: 'Not Found'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '500':
-          description: Internal Server Error
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-  /batches/{id}/cancel:
-    post:
-      tags: ['Batches']
-      summary: Cancel a batch job
-      description: Cancel a batch job by ID
+                $ref: '#/components/schemas/ErrorData'
+
+    delete:
+      tags: ['Endpoints']
+      summary: Delete endpoint
+      description: Permanently deletes an endpoint. This action cannot be undone.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4517,22 +4648,11 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.cancel("batch_id")
-
-            print(batch)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
+            endpoint = client.endpoints.delete(
+                endpoint_id="endpoint-id",
             )
 
-            batch = client.batches.cancel("batch_id")
-
-            print(batch)
+            print(endpoint)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4542,9 +4662,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.cancel("batch-id");
+            const endpoint = await client.endpoints.delete("endpoint-id");
 
-            console.log(batch);
+            console.log(endpoint);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4554,68 +4674,53 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.cancel("batch-id");
+            const endpoint = await client.endpoints.delete("endpoint-id");
 
-            console.log(batch);
+            console.log(endpoint);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/batches/ID/cancel" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
+            curl -X "DELETE" "https://api.together.xyz/v1/endpoints/endpoint-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY"
+      operationId: deleteEndpoint
       parameters:
-        - name: id
+        - name: endpointId
           in: path
           required: true
-          description: Job ID
           schema:
             type: string
-          example: 'batch_job_abc123def456'
+          description: The ID of the endpoint to delete
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
       responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchJob'
-        '400':
-          description: Bad Request
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '401':
-          description: Unauthorized
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+        '204':
+          description: 'No Content - Endpoint successfully deleted'
         '403':
-          description: Forbidden
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '404':
-          description: Not Found
+          description: 'Not Found'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '500':
-          description: Internal Server Error
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-  /evaluation:
-    post:
-      tags:
-        - evaluation
-      summary: Create an evaluation job
-      operationId: createEvaluationJob
+                $ref: '#/components/schemas/ErrorData'
+
+  /hardware:
+    get:
+      tags: ['Hardware']
+      summary: List available hardware configurations
+      description: >
+        Returns a list of available hardware configurations for deploying models.
+        When a model parameter is provided, it returns only hardware configurations compatible
+        with that model, including their current availability status.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4628,17 +4733,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evaluation.create(
-                type="classify",
-                judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo",
-                judge_system_template="You are an expert evaluator...",
-                input_data_file_path="file-abc123",
-                labels=["good", "bad"],
-                pass_labels=["good"],
-                model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
-            )
+            response = client.endpoints.list_hardware()
 
-            print(response.workflow_id)
+            for hardware in response:
+                print(hardware.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4649,22 +4747,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evals.create(
-                type="classify",
-                parameters=ParametersEvaluationClassifyParameters(
-                    judge=ParametersEvaluationClassifyParametersJudge(
-                        model="meta-llama/Llama-3.1-70B-Instruct-Turbo",
-                        model_source="serverless",
-                        system_template="You are an expert evaluator...",
-                    ),
-                    input_data_file_path="file-abc123",
-                    labels=["good", "bad"],
-                    pass_labels=["good"],
-                    model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
-                )
-            )
+            response = client.hardware.list()
 
-            print(response.workflow_id)
+            for hardware in response.data:
+                print(hardware.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4674,22 +4760,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.create({
-              type: 'classify',
-              parameters: {
-                judge: {
-                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
-                  model_source: 'serverless',
-                  system_template: 'You are an expert evaluator...',
-                },
-                input_data_file_path: 'file-abc123',
-                labels: ['good', 'bad'],
-                pass_labels: ['good'],
-                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
-              },
-            });
+            const hardware = await client.hardware.list();
 
-            console.log(response.workflow_id);
+            console.log(hardware);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4699,54 +4772,64 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.create({
-              type: 'classify',
-              parameters: {
-                judge: {
-                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
-                  model_source: 'serverless',
-                  system_template: 'You are an expert evaluator...',
-                },
-                input_data_file_path: 'file-abc123',
-                labels: ['good', 'bad'],
-                pass_labels: ['good'],
-                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
-              },
-            });
-
-            console.log(response.workflow_id);
-
+            const hardware = await client.hardware.list();
 
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/EvaluationTypedRequest"
+            console.log(hardware);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/hardware" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: listHardware
+      parameters:
+        - name: model
+          in: query
+          required: false
+          schema:
+            type: string
+          description: >
+            Filter hardware configurations by model compatibility. When provided,
+            the response includes availability status for each compatible configuration.
+          example: meta-llama/Llama-3-70b-chat-hf
       responses:
-        "200":
-          description: "Evaluation job created successfully"
+        '200':
+          description: 'List of available hardware configurations'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/EvaluationResponse"
-        "400":
-          description: "Invalid request format"
+                type: object
+                required:
+                  - object
+                  - data
+                properties:
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  data:
+                    type: array
+                    items:
+                      $ref: '#/components/schemas/HardwareWithStatus'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Failed to create evaluation job"
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-    get:
-      tags:
-        - evaluation
-      summary: Get all evaluation jobs
-      operationId: getAllEvaluationJobs
+                $ref: '#/components/schemas/ErrorData'
+  /tci/execute:
+    post:
+      tags: ['Code Interpreter']
+      callbacks: {}
+      description: |
+        Executes the given code snippet and returns the output. Without a session_id, a new session will be created to run the code. If you do pass in a valid session_id, the code will be run in that session. This is useful for running multiple code snippets in the same environment, because dependencies and similar things are persisted
+        between calls to the same session.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4759,10 +4842,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            jobs = client.evaluation.list()
+            response = client.code_interpreter.run(
+                code="print('Hello world!')",
+                language="python",
+            )
 
-            for job in jobs:
-                print(job.workflow_id)
+            print(response.data.outputs[0].data);
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4773,10 +4858,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evals.list()
+            response = client.code_interpreter.execute(
+                code="print('Hello world!')",
+                language="python",
+            )
 
-            for job in response:
-                print(job.workflow_id)
+            print(response.data.outputs[0].data);
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4786,11 +4873,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.list();
+            const response = await client.codeInterpreter.execute({
+              code: "print('Hello world!')",
+              language: "python"
+            });
 
-            for (const job of response) {
-              console.log(job.workflow_id);
-            }
+            console.log(response.data?.outputs?.[0]?.data);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4800,100 +4888,53 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.list();
+            const response = await client.codeInterpreter.execute({
+              code: "print('Hello world!')",
+              language: "python"
+            });
 
-            for (const job of response) {
-              console.log(job.workflow_id);
-            }
-      parameters:
-        - name: status
-          in: query
-          required: false
-          schema:
-            type: string
-            default: "pending"
-        - name: limit
-          in: query
-          required: false
-          schema:
-            type: integer
-            default: 10
-        - name: userId
-          in: query
-          required: false
-          description: "Admin users can specify a user ID to filter jobs. Pass empty string to get all jobs."
-          schema:
-            type: string
-      responses:
-        "200":
-          description: "evaluation jobs retrieved successfully"
-          content:
-            application/json:
-              schema:
-                type: array
-                items:
-                  $ref: "#/components/schemas/EvaluationJob"
-        "400":
-          description: "Invalid request format"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Error retrieving jobs from manager"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ErrorData"
-  /evaluation/model-list:
-    get:
-      tags:
-        - evaluation
-      summary: Get model list
-      operationId: getModelList
-      parameters:
-        - name: model_source
-          in: query
-          required: false
-          schema:
-            type: string
-            default: "all"
+            console.log(response.data?.outputs?.[0]?.data);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/tci/execute" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "code": "print(\'Hello world!\')",
+                   "language": "python"
+                 }'
+      operationId: tci/execute
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ExecuteRequest'
+        description: Execute Request
+        required: false
       responses:
-        "200":
-          description: "Model list retrieved successfully"
-          content:
-            application/json:
-              schema:
-                type: object
-                properties:
-                  model_list:
-                    type: array
-                    items:
-                      type: string
-                      description: "The name of the model"
-        "400":
-          description: "Invalid request format"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Error retrieving model list"
+        '200':
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-  /evaluation/{id}:
+                $ref: '#/components/schemas/ExecuteResponse'
+          description: Execute Response
+  /tci/sessions:
     get:
-      tags:
-        - evaluation
-      summary: Get evaluation job details
-      operationId: getEvaluationJobDetails
+      tags: ['Code Interpreter']
+      callbacks: {}
+      description: |
+        Lists all your currently active sessions.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
           source: |
             # Docs for v2 can be found by changing the above selector ^
+            # together v1 does not support this method
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
             from together import Together
             import os
 
@@ -4901,10 +4942,75 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evaluation.retrieve('eval_id')
+            response = client.code_interpreter.sessions.list()
 
-            print(response)
-        - lang: Python
+            for session in response.data.sessions:
+                print(session.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.codeInterpreter.sessions.list();
+
+            for (const session of response.data?.sessions) {
+              console.log(session.id);
+            }
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.codeInterpreter.sessions.list();
+
+            for (const session of response.data?.sessions) {
+              console.log(session.id);
+            }
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/tci/sessions" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: sessions/list
+      parameters: []
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/SessionListResponse'
+          description: List Response
+  /batches:
+    get:
+      tags: ['Batches']
+      summary: List batch jobs
+      description: List all batch jobs for the authenticated user
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batches = client.batches.list_batches()
+
+            for batch in batches:
+                print(batch.id)
+        - lang: Python
           label: Together AI SDK (v2)
           source: |
             from together import Together
@@ -4914,9 +5020,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evals.retrieve('eval_id')
+            batches = client.batches.list()
 
-            print(response)
+            for batch in batches:
+                print(batch.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4926,9 +5033,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.retrieve('eval_id');
+            const batches = await client.batches.list();
 
-            console.log(response);
+            console.log(batches);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4938,41 +5045,42 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.retrieve('eval_id');
+            const batches = await client.batches.list();
 
-            console.log(response);
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+            console.log(batches);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/batches" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      security:
+        - bearerAuth: []
       responses:
-        "200":
-          description: "Evaluation job details retrieved successfully"
+        '200':
+          description: OK
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/EvaluationJob"
-        "404":
-          description: "Evaluation job not found"
+                type: array
+                items:
+                  $ref: '#/components/schemas/BatchJob'
+        '401':
+          description: Unauthorized
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Failed to get evaluation job"
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-
-  /evaluation/{id}/status:
-    get:
-      tags:
-        - evaluation
-      summary: Get evaluation job status and results
-      operationId: getEvaluationJobStatusAndResults
+                $ref: '#/components/schemas/BatchErrorResponse'
+    post:
+      tags: ['Batches']
+      summary: Create a batch job
+      description: Create a new batch job with the given input file and endpoint
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4985,10 +5093,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evaluation.status('eval_id')
+            batch = client.batches.create_batch("file_id", endpoint="/v1/chat/completions")
 
-            print(response.status)
-            print(response.results)
+            print(batch.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4999,10 +5106,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evals.status('eval_id')
+            batch = client.batches.create(input_file_id="file_id", endpoint="/v1/chat/completions")
 
-            print(response.status)
-            print(response.results)
+            print(batch.job)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -5012,10 +5118,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.status('eval_id');
+            const batch = await client.batches.create({
+              endpoint: "/v1/chat/completions",
+              input_file_id: "file-id",
+            });
 
-            console.log(response.status);
-            console.log(response.results);
+            console.log(batch);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -5025,289 +5133,1841 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.status('eval_id');
+            const batch = await client.batches.create({
+              endpoint: "/v1/chat/completions",
+              input_file_id: "file-id",
+            });
 
-            console.log(response.status);
-            console.log(response.results);
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+            console.log(batch);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/batches" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "endpoint": "/v1/chat/completions",
+                   "input_file_id": "file-id"
+                 }'
+      security:
+        - bearerAuth: []
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateBatchRequest'
       responses:
-        "200":
-          description: "Evaluation job status and results retrieved successfully"
+        '201':
+          description: Job created (potentially with warnings)
           content:
             application/json:
               schema:
-                type: object
-                properties:
-                  status:
-                    type: string
-                    description: "The status of the evaluation job"
-                    enum: ["completed", "error", "user_error", "running", "queued", "pending"]
-                  results:
-                    description: "The results of the evaluation job"
-                    oneOf:
-                      - $ref: "#/components/schemas/EvaluationClassifyResults"
-                      - $ref: "#/components/schemas/EvaluationScoreResults"
-                      - $ref: "#/components/schemas/EvaluationCompareResults"
-        "404":
-          description: "Evaluation job not found"
+                $ref: '#/components/schemas/BatchJobWithWarning'
+        '400':
+          description: Bad Request
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Failed to get evaluation job"
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '429':
+          description: Too Many Requests
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
 
-  /realtime:
+  /batches/{id}:
     get:
-      tags: ['Audio']
-      summary: Real-time audio transcription via WebSocket
-      description: |
-        Establishes a WebSocket connection for real-time audio transcription. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/realtime) for bidirectional streaming communication.
+      tags: ['Batches']
+      summary: Get a batch job
+      description: Get details of a batch job by ID
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-        **Connection Setup:**
-        - Protocol: WebSocket (wss://)
-        - Authentication: Pass API key as Bearer token in Authorization header
-        - Parameters: Sent as query parameters (model, input_audio_format)
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-        **Client Events:**
-        - `input_audio_buffer.append`: Send audio chunks as base64-encoded data
-          ```json
-          {
-            "type": "input_audio_buffer.append",
-            "audio": "<base64_encoded_audio_chunk>"
-          }
-          ```
-        - `input_audio_buffer.commit`: Signal end of audio stream
-          ```json
-          {
-            "type": "input_audio_buffer.commit"
-          }
-          ```
+            batch = client.batches.get_batch("batch_id")
 
-        **Server Events:**
-        - `session.created`: Initial session confirmation (sent first)
-          ```json
-          {
-            "type": "session.created",
-            "session": {
-              "id": "session-id",
-              "object": "realtime.session",
-              "modalities": ["audio"],
-              "model": "openai/whisper-large-v3"
-            }
-          }
-          ```
-        - `conversation.item.input_audio_transcription.delta`: Partial transcription results
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.delta",
-            "delta": "The quick brown"
-          }
-          ```
-        - `conversation.item.input_audio_transcription.completed`: Final transcription
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.completed",
-            "transcript": "The quick brown fox jumps over the lazy dog"
-          }
-          ```
-        - `conversation.item.input_audio_transcription.failed`: Error occurred
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.failed",
-            "error": {
-              "message": "Error description",
-              "type": "invalid_request_error",
-              "param": null,
-              "code": "invalid_api_key"
-            }
-          }
-          ```
-
-        **Error Codes:**
-        - `invalid_api_key`: Invalid API key provided (401)
-        - `missing_api_key`: Authorization header missing (401)
-        - `model_not_available`: Invalid or unavailable model (400)
-        - Unsupported audio format errors (400)
-
-      operationId: realtime-transcription
-      x-codeSamples:
+            print(batch)
         - lang: Python
-          label: Python WebSocket Client
+          label: Together AI SDK (v2)
           source: |
-            import asyncio
-            import websockets
-            import json
-            import base64
+            from together import Together
             import os
 
-            async def transcribe_audio():
-                api_key = os.environ.get("TOGETHER_API_KEY")
-                url = "wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000"
-
-                headers = {
-                    "Authorization": f"Bearer {api_key}"
-                }
-
-                async with websockets.connect(url, additional_headers=headers) as ws:
-                    # Read audio file
-                    with open("audio.wav", "rb") as f:
-                        audio_data = f.read()
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-                    # Send audio in chunks with delay to simulate real-time
-                    chunk_size = 8192
-                    bytes_per_second = 16000 * 2  # 16kHz * 2 bytes (16-bit)
-                    delay_per_chunk = chunk_size / bytes_per_second
+            batch = client.batches.retrieve("batch_id")
 
-                    for i in range(0, len(audio_data), chunk_size):
-                        chunk = audio_data[i:i+chunk_size]
-                        base64_chunk = base64.b64encode(chunk).decode('utf-8')
-                        await ws.send(json.dumps({
-                            "type": "input_audio_buffer.append",
-                            "audio": base64_chunk
-                        }))
-                        # Simulate real-time streaming
-                        if i + chunk_size < len(audio_data):
-                            await asyncio.sleep(delay_per_chunk)
+            print(batch)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-                    # Commit the audio buffer
-                    await ws.send(json.dumps({
-                        "type": "input_audio_buffer.commit"
-                    }))
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-                    # Receive transcription results
-                    async for message in ws:
-                        data = json.loads(message)
-                        if data["type"] == "conversation.item.input_audio_transcription.delta":
-                            print(f"Partial: {data['delta']}")
-                        elif data["type"] == "conversation.item.input_audio_transcription.completed":
-                            print(f"Final: {data['transcript']}")
-                            break
-                        elif data["type"] == "conversation.item.input_audio_transcription.failed":
-                            error = data.get("error", {})
-                            print(f"Error: {error.get('message')}")
-                            break
+            const batch = await client.batches.retrieve("batch-id");
 
-            asyncio.run(transcribe_audio())
+            console.log(batch);
         - lang: JavaScript
-          label: Node.js WebSocket Client
+          label: Together AI SDK (JavaScript)
           source: |
-            import WebSocket from 'ws';
-            import fs from 'fs';
-
-            const apiKey = process.env.TOGETHER_API_KEY;
-            const url = 'wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000';
+            import Together from "together-ai";
 
-            const ws = new WebSocket(url, {
-              headers: {
-                'Authorization': `Bearer ${apiKey}`
-              }
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            ws.on('open', async () => {
-              console.log('WebSocket connection established!');
+            const batch = await client.batches.retrieve("batch-id");
 
-              // Read audio file
-              const audioData = fs.readFileSync('audio.wav');
+            console.log(batch);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/batches/ID" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      security:
+        - bearerAuth: []
+      parameters:
+        - name: id
+          in: path
+          required: true
+          description: Job ID
+          schema:
+            type: string
+          example: 'batch_job_abc123def456'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchJob'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '403':
+          description: Forbidden
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '404':
+          description: Not Found
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+  /batches/{id}/cancel:
+    post:
+      tags: ['Batches']
+      summary: Cancel a batch job
+      description: Cancel a batch job by ID
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-              // Send audio in chunks with delay to simulate real-time
-              const chunkSize = 8192;
-              const bytesPerSecond = 16000 * 2;  // 16kHz * 2 bytes (16-bit)
-              const delayPerChunk = (chunkSize / bytesPerSecond) * 1000;  // Convert to ms
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-              for (let i = 0; i < audioData.length; i += chunkSize) {
-                const chunk = audioData.slice(i, i + chunkSize);
-                const base64Chunk = chunk.toString('base64');
-                ws.send(JSON.stringify({
-                  type: 'input_audio_buffer.append',
-                  audio: base64Chunk
-                }));
+            batch = client.batches.cancel("batch_id")
 
-                // Simulate real-time streaming
-                if (i + chunkSize < audioData.length) {
-                  await new Promise(resolve => setTimeout(resolve, delayPerChunk));
-                }
-              }
+            print(batch)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
 
-              // Commit audio buffer
-              ws.send(JSON.stringify({
-                type: 'input_audio_buffer.commit'
-              }));
-            });
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-            ws.on('message', (data) => {
-              const message = JSON.parse(data.toString());
+            batch = client.batches.cancel("batch_id")
 
-              if (message.type === 'conversation.item.input_audio_transcription.delta') {
-                console.log(`Partial: ${message.delta}`);
-              } else if (message.type === 'conversation.item.input_audio_transcription.completed') {
-                console.log(`Final: ${message.transcript}`);
-                ws.close();
-              } else if (message.type === 'conversation.item.input_audio_transcription.failed') {
-                const errorMessage = message.error?.message ?? message.message ?? 'Unknown error';
-                console.error(`Error: ${errorMessage}`);
-                ws.close();
-              }
-            });
+            print(batch)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-            ws.on('error', (error) => {
-              console.error('WebSocket error:', error);
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
-      parameters:
-        - in: query
-          name: model
-          required: true
-          schema:
-            type: string
-            enum:
-              - openai/whisper-large-v3
-            default: openai/whisper-large-v3
-          description: The Whisper model to use for transcription
-        - in: query
-          name: input_audio_format
+
+            const batch = await client.batches.cancel("batch-id");
+
+            console.log(batch);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.cancel("batch-id");
+
+            console.log(batch);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/batches/ID/cancel" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      security:
+        - bearerAuth: []
+      parameters:
+        - name: id
+          in: path
           required: true
+          description: Job ID
           schema:
             type: string
-            enum:
-              - pcm_s16le_16000
-            default: pcm_s16le_16000
-          description: Audio format specification. Currently supports 16-bit PCM at 16kHz sample rate.
+          example: 'batch_job_abc123def456'
       responses:
-        '101':
-          description: |
-            Switching Protocols - WebSocket connection established successfully.
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchJob'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '403':
+          description: Forbidden
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '404':
+          description: Not Found
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+  /evaluation:
+    post:
+      tags:
+        - evaluation
+      summary: Create an evaluation job
+      operationId: createEvaluationJob
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-            Error message format:
-            ```json
-            {
-              "type": "conversation.item.input_audio_transcription.failed",
-              "error": {
-                "message": "Error description",
-                "type": "invalid_request_error",
-                "param": null,
-                "code": "error_code"
-              }
-            }
-            ```
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-components:
-  securitySchemes:
-    bearerAuth:
-      type: http
-      scheme: bearer
-      x-bearer-format: bearer
-      x-default: default
+            response = client.evaluation.create(
+                type="classify",
+                judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo",
+                judge_system_template="You are an expert evaluator...",
+                input_data_file_path="file-abc123",
+                labels=["good", "bad"],
+                pass_labels=["good"],
+                model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+            )
 
-  schemas:
+            print(response.workflow_id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.create(
+                type="classify",
+                parameters=ParametersEvaluationClassifyParameters(
+                    judge=ParametersEvaluationClassifyParametersJudge(
+                        model="meta-llama/Llama-3.1-70B-Instruct-Turbo",
+                        model_source="serverless",
+                        system_template="You are an expert evaluator...",
+                    ),
+                    input_data_file_path="file-abc123",
+                    labels=["good", "bad"],
+                    pass_labels=["good"],
+                    model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+                )
+            )
+
+            print(response.workflow_id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.create({
+              type: 'classify',
+              parameters: {
+                judge: {
+                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
+                  model_source: 'serverless',
+                  system_template: 'You are an expert evaluator...',
+                },
+                input_data_file_path: 'file-abc123',
+                labels: ['good', 'bad'],
+                pass_labels: ['good'],
+                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
+              },
+            });
+
+            console.log(response.workflow_id);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.create({
+              type: 'classify',
+              parameters: {
+                judge: {
+                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
+                  model_source: 'serverless',
+                  system_template: 'You are an expert evaluator...',
+                },
+                input_data_file_path: 'file-abc123',
+                labels: ['good', 'bad'],
+                pass_labels: ['good'],
+                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
+              },
+            });
+
+            console.log(response.workflow_id);
+
+
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/EvaluationTypedRequest"
+      responses:
+        "200":
+          description: "Evaluation job created successfully"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/EvaluationResponse"
+        "400":
+          description: "Invalid request format"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to create evaluation job"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+    get:
+      tags:
+        - evaluation
+      summary: Get all evaluation jobs
+      operationId: getAllEvaluationJobs
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            jobs = client.evaluation.list()
+
+            for job in jobs:
+                print(job.workflow_id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.list()
+
+            for job in response:
+                print(job.workflow_id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.list();
+
+            for (const job of response) {
+              console.log(job.workflow_id);
+            }
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.list();
+
+            for (const job of response) {
+              console.log(job.workflow_id);
+            }
+      parameters:
+        - name: status
+          in: query
+          required: false
+          schema:
+            type: string
+            default: "pending"
+        - name: limit
+          in: query
+          required: false
+          schema:
+            type: integer
+            default: 10
+        - name: userId
+          in: query
+          required: false
+          description: "Admin users can specify a user ID to filter jobs. Pass empty string to get all jobs."
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "evaluation jobs retrieved successfully"
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/EvaluationJob"
+        "400":
+          description: "Invalid request format"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Error retrieving jobs from manager"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+  /evaluation/model-list:
+    get:
+      tags:
+        - evaluation
+      summary: Get model list
+      operationId: getModelList
+      parameters:
+        - name: model_source
+          in: query
+          required: false
+          schema:
+            type: string
+            default: "all"
+      responses:
+        "200":
+          description: "Model list retrieved successfully"
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  model_list:
+                    type: array
+                    items:
+                      type: string
+                      description: "The name of the model"
+        "400":
+          description: "Invalid request format"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Error retrieving model list"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+  /evaluation/{id}:
+    get:
+      tags:
+        - evaluation
+      summary: Get evaluation job details
+      operationId: getEvaluationJobDetails
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evaluation.retrieve('eval_id')
+
+            print(response)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.retrieve('eval_id')
+
+            print(response)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.retrieve('eval_id');
+
+            console.log(response);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.retrieve('eval_id');
+
+            console.log(response);
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "Evaluation job details retrieved successfully"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/EvaluationJob"
+        "404":
+          description: "Evaluation job not found"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to get evaluation job"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+
+  /evaluation/{id}/status:
+    get:
+      tags:
+        - evaluation
+      summary: Get evaluation job status and results
+      operationId: getEvaluationJobStatusAndResults
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evaluation.status('eval_id')
+
+            print(response.status)
+            print(response.results)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.status('eval_id')
+
+            print(response.status)
+            print(response.results)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.status('eval_id');
+
+            console.log(response.status);
+            console.log(response.results);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.status('eval_id');
+
+            console.log(response.status);
+            console.log(response.results);
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "Evaluation job status and results retrieved successfully"
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  status:
+                    type: string
+                    description: "The status of the evaluation job"
+                    enum: ["completed", "error", "user_error", "running", "queued", "pending"]
+                  results:
+                    description: "The results of the evaluation job"
+                    oneOf:
+                      - $ref: "#/components/schemas/EvaluationClassifyResults"
+                      - $ref: "#/components/schemas/EvaluationScoreResults"
+                      - $ref: "#/components/schemas/EvaluationCompareResults"
+        "404":
+          description: "Evaluation job not found"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to get evaluation job"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+
+  /realtime:
+    get:
+      tags: ['Audio']
+      summary: Real-time audio transcription via WebSocket
+      description: |
+        Establishes a WebSocket connection for real-time audio transcription. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/realtime) for bidirectional streaming communication.
+
+        **Connection Setup:**
+        - Protocol: WebSocket (wss://)
+        - Authentication: Pass API key as Bearer token in Authorization header
+        - Parameters: Sent as query parameters (model, input_audio_format)
+
+        **Client Events:**
+        - `input_audio_buffer.append`: Send audio chunks as base64-encoded data
+          ```json
+          {
+            "type": "input_audio_buffer.append",
+            "audio": "<base64_encoded_audio_chunk>"
+          }
+          ```
+        - `input_audio_buffer.commit`: Signal end of audio stream
+          ```json
+          {
+            "type": "input_audio_buffer.commit"
+          }
+          ```
+
+        **Server Events:**
+        - `session.created`: Initial session confirmation (sent first)
+          ```json
+          {
+            "type": "session.created",
+            "session": {
+              "id": "session-id",
+              "object": "realtime.session",
+              "modalities": ["audio"],
+              "model": "openai/whisper-large-v3"
+            }
+          }
+          ```
+        - `conversation.item.input_audio_transcription.delta`: Partial transcription results
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.delta",
+            "delta": "The quick brown"
+          }
+          ```
+        - `conversation.item.input_audio_transcription.completed`: Final transcription
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.completed",
+            "transcript": "The quick brown fox jumps over the lazy dog"
+          }
+          ```
+        - `conversation.item.input_audio_transcription.failed`: Error occurred
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.failed",
+            "error": {
+              "message": "Error description",
+              "type": "invalid_request_error",
+              "param": null,
+              "code": "invalid_api_key"
+            }
+          }
+          ```
+
+        **Error Codes:**
+        - `invalid_api_key`: Invalid API key provided (401)
+        - `missing_api_key`: Authorization header missing (401)
+        - `model_not_available`: Invalid or unavailable model (400)
+        - Unsupported audio format errors (400)
+
+      operationId: realtime-transcription
+      x-codeSamples:
+        - lang: Python
+          label: Python WebSocket Client
+          source: |
+            import asyncio
+            import websockets
+            import json
+            import base64
+            import os
+
+            async def transcribe_audio():
+                api_key = os.environ.get("TOGETHER_API_KEY")
+                url = "wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000"
+
+                headers = {
+                    "Authorization": f"Bearer {api_key}"
+                }
+
+                async with websockets.connect(url, additional_headers=headers) as ws:
+                    # Read audio file
+                    with open("audio.wav", "rb") as f:
+                        audio_data = f.read()
+
+                    # Send audio in chunks with delay to simulate real-time
+                    chunk_size = 8192
+                    bytes_per_second = 16000 * 2  # 16kHz * 2 bytes (16-bit)
+                    delay_per_chunk = chunk_size / bytes_per_second
+
+                    for i in range(0, len(audio_data), chunk_size):
+                        chunk = audio_data[i:i+chunk_size]
+                        base64_chunk = base64.b64encode(chunk).decode('utf-8')
+                        await ws.send(json.dumps({
+                            "type": "input_audio_buffer.append",
+                            "audio": base64_chunk
+                        }))
+                        # Simulate real-time streaming
+                        if i + chunk_size < len(audio_data):
+                            await asyncio.sleep(delay_per_chunk)
+
+                    # Commit the audio buffer
+                    await ws.send(json.dumps({
+                        "type": "input_audio_buffer.commit"
+                    }))
+
+                    # Receive transcription results
+                    async for message in ws:
+                        data = json.loads(message)
+                        if data["type"] == "conversation.item.input_audio_transcription.delta":
+                            print(f"Partial: {data['delta']}")
+                        elif data["type"] == "conversation.item.input_audio_transcription.completed":
+                            print(f"Final: {data['transcript']}")
+                            break
+                        elif data["type"] == "conversation.item.input_audio_transcription.failed":
+                            error = data.get("error", {})
+                            print(f"Error: {error.get('message')}")
+                            break
+
+            asyncio.run(transcribe_audio())
+        - lang: JavaScript
+          label: Node.js WebSocket Client
+          source: |
+            import WebSocket from 'ws';
+            import fs from 'fs';
+
+            const apiKey = process.env.TOGETHER_API_KEY;
+            const url = 'wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000';
+
+            const ws = new WebSocket(url, {
+              headers: {
+                'Authorization': `Bearer ${apiKey}`
+              }
+            });
+
+            ws.on('open', async () => {
+              console.log('WebSocket connection established!');
+
+              // Read audio file
+              const audioData = fs.readFileSync('audio.wav');
+
+              // Send audio in chunks with delay to simulate real-time
+              const chunkSize = 8192;
+              const bytesPerSecond = 16000 * 2;  // 16kHz * 2 bytes (16-bit)
+              const delayPerChunk = (chunkSize / bytesPerSecond) * 1000;  // Convert to ms
+
+              for (let i = 0; i < audioData.length; i += chunkSize) {
+                const chunk = audioData.slice(i, i + chunkSize);
+                const base64Chunk = chunk.toString('base64');
+                ws.send(JSON.stringify({
+                  type: 'input_audio_buffer.append',
+                  audio: base64Chunk
+                }));
+
+                // Simulate real-time streaming
+                if (i + chunkSize < audioData.length) {
+                  await new Promise(resolve => setTimeout(resolve, delayPerChunk));
+                }
+              }
+
+              // Commit audio buffer
+              ws.send(JSON.stringify({
+                type: 'input_audio_buffer.commit'
+              }));
+            });
+
+            ws.on('message', (data) => {
+              const message = JSON.parse(data.toString());
+
+              if (message.type === 'conversation.item.input_audio_transcription.delta') {
+                console.log(`Partial: ${message.delta}`);
+              } else if (message.type === 'conversation.item.input_audio_transcription.completed') {
+                console.log(`Final: ${message.transcript}`);
+                ws.close();
+              } else if (message.type === 'conversation.item.input_audio_transcription.failed') {
+                const errorMessage = message.error?.message ?? message.message ?? 'Unknown error';
+                console.error(`Error: ${errorMessage}`);
+                ws.close();
+              }
+            });
+
+            ws.on('error', (error) => {
+              console.error('WebSocket error:', error);
+            });
+      parameters:
+        - in: query
+          name: model
+          required: true
+          schema:
+            type: string
+            enum:
+              - openai/whisper-large-v3
+            default: openai/whisper-large-v3
+          description: The Whisper model to use for transcription
+        - in: query
+          name: input_audio_format
+          required: true
+          schema:
+            type: string
+            enum:
+              - pcm_s16le_16000
+            default: pcm_s16le_16000
+          description: Audio format specification. Currently supports 16-bit PCM at 16kHz sample rate.
+      responses:
+        '101':
+          description: |
+            Switching Protocols - WebSocket connection established successfully.
+
+            Error message format:
+            ```json
+            {
+              "type": "conversation.item.input_audio_transcription.failed",
+              "error": {
+                "message": "Error description",
+                "type": "invalid_request_error",
+                "param": null,
+                "code": "error_code"
+              }
+            }
+            ```
+
+components:
+  securitySchemes:
+    bearerAuth:
+      type: http
+      scheme: bearer
+      x-bearer-format: bearer
+      x-default: default
+
+  schemas:
+    api_v1.SignedURLResponse:
+      properties:
+        url:
+          type: string
+      type: object
+    files.AbortMultiPartRequest:
+      properties:
+        filename:
+          description: Filename is the name of the file to upload. Can contain
+            alphanumeric characters, underscores, hyphens, spaces, periods, and
+            forward slashes with an optional file extension (e.g.,
+            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
+          type: string
+        upload_id:
+          description: UploadID is the unique identifier returned from the multi-part
+            upload initialization. Aborting will discard all uploaded parts
+          type: string
+      required:
+        - filename
+        - upload_id
+      type: object
+    files.CompleteMultiPartRequest:
+      properties:
+        filename:
+          description: Filename is the name of the file to upload. Can contain
+            alphanumeric characters, underscores, hyphens, spaces, periods, and
+            forward slashes with an optional file extension (e.g.,
+            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
+          type: string
+        parts:
+          description: Parts is the list of successfully uploaded parts with their ETags.
+            Must include all parts in order
+          items:
+            $ref: "#/components/schemas/files.CompletedPart"
+          minItems: 1
+          type: array
+        upload_id:
+          description: UploadID is the unique identifier returned from the multi-part
+            upload initialization
+          type: string
+      required:
+        - filename
+        - parts
+        - upload_id
+      type: object
+    files.CompleteUploadResponse:
+      properties:
+        completed_at:
+          description: CompletedAt is the timestamp when the upload was completed
+          type: string
+        path:
+          description: Path is the storage path where the uploaded file can be accessed
+          type: string
+        size:
+          description: Size is the total size of the uploaded file in bytes
+          type: integer
+      type: object
+    files.CompletedPart:
+      properties:
+        etag:
+          description: ETag is the entity tag returned by the storage service after
+            uploading this part. This is used to verify the part's integrity
+          type: string
+        part_number:
+          description: PartNumber is the sequential number of this part (starting from 1)
+          type: integer
+      type: object
+    files.FileRequest:
+      properties:
+        filename:
+          description: Filename is the name of the file to upload. Can contain
+            alphanumeric characters, underscores, hyphens, spaces, periods, and
+            forward slashes with an optional file extension (e.g.,
+            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
+          type: string
+      required:
+        - filename
+      type: object
+    files.InitiateMultiPartRequest:
+      properties:
+        filename:
+          description: Filename is the name of the file to upload. Can contain
+            alphanumeric characters, underscores, hyphens, spaces, periods, and
+            forward slashes with an optional file extension (e.g.,
+            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
+          type: string
+        parts_count:
+          description: PartsCount is the number of parts to split the file into for
+            parallel upload. Must be between 1 and 10,000. Use multi-part upload
+            for files larger than 100MB for better performance and reliability
+          maximum: 10000
+          minimum: 1
+          type: integer
+      required:
+        - filename
+        - parts_count
+      type: object
+    files.MultiPartInitResponse:
+      properties:
+        part_upload_urls:
+          description: PartUploadURLs is the list of pre-signed URLs for uploading each
+            part in parallel
+          items:
+            $ref: "#/components/schemas/files.PartUploadURL"
+          type: array
+        path:
+          description: Path is the storage path where the file will be accessible after
+            upload completion
+          type: string
+        upload_id:
+          description: UploadID is the unique identifier for this multi-part upload
+            session. Use this when completing or aborting the upload
+          type: string
+      type: object
+    files.PartUploadURL:
+      properties:
+        headers:
+          additionalProperties:
+            type: string
+          description: Headers are the required HTTP headers to include when uploading
+            this part
+          type: object
+        method:
+          description: Method is the HTTP method to use for uploading this part (typically
+            "PUT")
+          type: string
+        part_number:
+          description: PartNumber is the sequential number identifying this part (starting
+            from 1)
+          type: integer
+        url:
+          description: URL is the pre-signed URL for uploading this specific part
+          type: string
+      type: object
+    files.UploadResponse:
+      properties:
+        path:
+          description: Path is the storage path where the file will be accessible after
+            upload (e.g., "user-files/model_weights.bin")
+          type: string
+        upload_url:
+          allOf:
+            - $ref: "#/components/schemas/files.UploadURL"
+          description: UploadURL contains the signed URL and metadata needed to upload the
+            file
+      type: object
+    files.UploadURL:
+      properties:
+        form_data:
+          additionalProperties:
+            type: string
+          description: FormData contains form fields required for multipart/form-data
+            uploads (if applicable)
+          type: object
+        headers:
+          additionalProperties:
+            type: string
+          description: Headers are the required HTTP headers to include in the upload
+            request
+          type: object
+        method:
+          description: Method is the HTTP method to use (typically "PUT" or "POST")
+          type: string
+        url:
+          description: URL is the pre-signed URL endpoint for uploading the file
+          type: string
+      type: object
+    ContainerStatus:
+      properties:
+        finishedAt:
+          description: FinishedAt is the timestamp when the container finished execution
+            (if terminated)
+          type: string
+        message:
+          description: Message provides a human-readable message with details about the
+            container's status
+          type: string
+        name:
+          description: Name is the name of the container
+          type: string
+        reason:
+          description: Reason provides a brief machine-readable reason for the container's
+            current status
+          type: string
+        startedAt:
+          description: StartedAt is the timestamp when the container started execution
+          type: string
+        status:
+          description: Status is the current state of the container (e.g., "Running",
+            "Terminated", "Waiting")
+          type: string
+      type: object
+    CreateDeploymentRequest:
+      properties:
+        args:
+          description: Args overrides the container's CMD. Provide as an array of
+            arguments (e.g., ["python", "app.py"])
+          items:
+            type: string
+          type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
+            "QueueBacklogPerWorker", "target": "10"} to scale based on queue
+            backlog'
+          type: object
+        command:
+          description: Command overrides the container's ENTRYPOINT. Provide as an array
+            (e.g., ["/bin/sh", "-c"])
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the number of CPU cores to allocate per container instance
+            (e.g., 0.1 = 100 milli cores)
+          minimum: 0.1
+          type: number
+        description:
+          description: Description is an optional human-readable description of your
+            deployment
+          type: string
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables to set in
+            the container. Each must have a name and either a value or
+            value_from_secret
+          items:
+            $ref: "#/components/schemas/v1.EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs to allocate per container instance.
+            Defaults to 0 if not specified
+          type: integer
+        gpu_type:
+          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
+          enum:
+            - h100-80gb
+            - " a100-80gb"
+          type: string
+        health_check_path:
+          description: HealthCheckPath is the HTTP path for health checks (e.g.,
+            "/health"). If set, the platform will check this endpoint to
+            determine container health
+          type: string
+        image:
+          description: Image is the container image to deploy from registry.together.ai.
+          type: string
+        max_replicas:
+          description: MaxReplicas is the maximum number of container instances that can
+            be scaled up to. If not set, will be set to MinReplicas
+          type: integer
+        memory:
+          description: Memory is the amount of RAM to allocate per container instance in
+            GiB (e.g., 0.5 = 512MiB)
+          minimum: 0.1
+          type: number
+        min_replicas:
+          description: MinReplicas is the minimum number of container instances to run.
+            Defaults to 1 if not specified
+          type: integer
+        name:
+          description: Name is the unique identifier for your deployment. Must contain
+            only alphanumeric characters, underscores, or hyphens (1-100
+            characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        port:
+          description: Port is the container port your application listens on (e.g., 8080
+            for web servers). Required if your application serves traffic
+          type: integer
+        storage:
+          description: Storage is the amount of ephemeral disk storage to allocate per
+            container instance (e.g., 10 = 10GiB)
+          type: integer
+        termination_grace_period_seconds:
+          description: TerminationGracePeriodSeconds is the time in seconds to wait for
+            graceful shutdown before forcefully terminating the replica
+          type: integer
+        volumes:
+          description: Volumes is a list of volume mounts to attach to the container. Each
+            mount must reference an existing volume by name
+          items:
+            $ref: "#/components/schemas/v1.VolumeMount"
+          type: array
+      required:
+        - gpu_type
+        - image
+        - name
+      type: object
+    CreateSecretRequest:
+      properties:
+        description:
+          description: Description is an optional human-readable description of the
+            secret's purpose (max 500 characters)
+          maxLength: 500
+          type: string
+        name:
+          description: Name is the unique identifier for the secret. Can contain
+            alphanumeric characters, underscores, hyphens, forward slashes, and
+            periods (1-100 characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        project_id:
+          description: ProjectID is ignored - the project is automatically determined from
+            your authentication
+          type: string
+        value:
+          description: Value is the sensitive data to store securely (e.g., API keys,
+            passwords, tokens). This value will be encrypted at rest
+          minLength: 1
+          type: string
+      required:
+        - name
+        - value
+      type: object
+    CreateVolumeRequest:
+      properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeContent"
+          description: Content specifies the content configuration for this volume
+        name:
+          description: Name is the unique identifier for the volume within the project
+          type: string
+        type:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeType"
+          description: Type is the volume type (currently only "readOnly" is supported)
+      required:
+        - content
+        - name
+        - type
+      type: object
+    DeploymentListResponse:
+      properties:
+        data:
+          description: Data is the array of deployment items
+          items:
+            $ref: "#/components/schemas/v1.DeploymentResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
+          type: string
+      type: object
+    DeploymentLogs:
+      properties:
+        lines:
+          items:
+            type: string
+          type: array
+      type: object
+    DeploymentResponseItem:
+      properties:
+        args:
+          description: Args are the arguments passed to the container's command
+          items:
+            type: string
+          type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: Autoscaling contains autoscaling configuration parameters for this
+            deployment
+          type: object
+        command:
+          description: Command is the entrypoint command run in the container
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the amount of CPU resource allocated to each replica in
+            cores (fractional value is allowed)
+          type: number
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this deployment was created
+          type: string
+        description:
+          description: Description provides a human-readable explanation of the
+            deployment's purpose or content
+          type: string
+        desired_replicas:
+          description: DesiredReplicas is the number of replicas that the orchestrator is
+            targeting
+          type: integer
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables set in the
+            container
+          items:
+            $ref: "#/components/schemas/v1.EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs allocated to each replica in this
+            deployment
+          type: integer
+        gpu_type:
+          description: GPUType specifies the type of GPU requested (if any) for this
+            deployment
+          enum:
+            - h100-80gb
+            - " a100-80gb"
+          type: string
+        health_check_path:
+          description: HealthCheckPath is the HTTP path used for health checks of the
+            application
+          type: string
+        id:
+          description: ID is the unique identifier of the deployment
+          type: string
+        image:
+          description: Image specifies the container image used for this deployment
+          type: string
+        max_replicas:
+          description: MaxReplicas is the maximum number of replicas to run for this
+            deployment
+          type: integer
+        memory:
+          description: Memory is the amount of memory allocated to each replica in GiB
+            (fractional value is allowed)
+          type: number
+        min_replicas:
+          description: MinReplicas is the minimum number of replicas to run for this
+            deployment
+          type: integer
+        name:
+          description: Name is the name of the deployment
+          type: string
+        object:
+          description: Object is the type identifier for this response (always "deployment")
+          type: string
+        port:
+          description: Port is the container port that the deployment exposes
+          type: integer
+        ready_replicas:
+          description: ReadyReplicas is the current number of replicas that are in the
+            Ready state
+          type: integer
+        replica_events:
+          additionalProperties:
+            $ref: "#/components/schemas/v1.ReplicaEvent"
+          description: ReplicaEvents is a mapping of replica names or IDs to their status
+            events
+          type: object
+        status:
+          allOf:
+            - $ref: "#/components/schemas/v1.DeploymentStatus"
+          description: Status represents the overall status of the deployment (e.g.,
+            Updating, Scaling, Ready, Failed)
+          enum:
+            - Updating
+            - Scaling
+            - Ready
+            - Failed
+        storage:
+          description: Storage is the amount of storage (in MB or units as defined by the
+            platform) allocated to each replica
+          type: integer
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this deployment was last
+            updated
+          type: string
+        volumes:
+          description: Volumes is a list of volume mounts for this deployment
+          items:
+            $ref: "#/components/schemas/v1.VolumeMount"
+          type: array
+      type: object
+    DeploymentStatus:
+      enum:
+        - Updating
+        - Scaling
+        - Ready
+        - Failed
+      type: string
+      x-enum-varnames:
+        - DeploymentStatusUpdating
+        - DeploymentStatusScaling
+        - DeploymentStatusReady
+        - DeploymentStatusFailed
+    EnvironmentVariable:
+      properties:
+        name:
+          description: Name is the environment variable name (e.g., "DATABASE_URL"). Must
+            start with a letter or underscore, followed by letters, numbers, or
+            underscores
+          type: string
+        value:
+          description: Value is the plain text value for the environment variable. Use
+            this for non-sensitive values. Either Value or ValueFromSecret must
+            be set, but not both
+          type: string
+        value_from_secret:
+          description: ValueFromSecret references a secret by name or ID to use as the
+            value. Use this for sensitive values like API keys or passwords.
+            Either Value or ValueFromSecret must be set, but not both
+          type: string
+      required:
+        - name
+      type: object
+    ImageListResponse:
+      properties:
+        data:
+          description: Data is the array of image items
+          items:
+            $ref: "#/components/schemas/v1.ImageResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
+          type: string
+      type: object
+    ImageResponseItem:
+      properties:
+        object:
+          description: Object is the type identifier for this response (always "image")
+          type: string
+        tag:
+          description: Tag is the image tag/version identifier (e.g., "latest", "v1.0.0")
+          type: string
+        url:
+          description: URL is the full registry URL for this image including tag (e.g.,
+            "registry.together.ai/project-id/repository:tag")
+          type: string
+      type: object
+    KubernetesEvent:
+      properties:
+        action:
+          description: Action is the action taken or reported by this event
+          type: string
+        count:
+          description: Count is the number of times this event has occurred
+          type: integer
+        first_seen:
+          description: FirstSeen is the timestamp when this event was first observed
+          type: string
+        last_seen:
+          description: LastSeen is the timestamp when this event was last observed
+          type: string
+        message:
+          description: Message is a human-readable description of the event
+          type: string
+        reason:
+          description: Reason is a brief machine-readable reason for this event (e.g.,
+            "Pulling", "Started", "Failed")
+          type: string
+      type: object
+    ListSecretsResponse:
+      properties:
+        data:
+          description: Data is the array of secret items
+          items:
+            $ref: "#/components/schemas/v1.SecretResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
+          type: string
+      type: object
+    ListVolumesResponse:
+      properties:
+        data:
+          description: Data is the array of volume items
+          items:
+            $ref: "#/components/schemas/v1.VolumeResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
+          type: string
+      type: object
+    ReplicaEvent:
+      properties:
+        container_status:
+          allOf:
+            - $ref: "#/components/schemas/v1.ContainerStatus"
+          description: ContainerStatus provides detailed status information about the
+            container within this replica
+        events:
+          description: Events is a list of Kubernetes events related to this replica for
+            troubleshooting
+          items:
+            $ref: "#/components/schemas/v1.KubernetesEvent"
+          type: array
+        replica_completed_at:
+          description: ReplicaCompletedAt is the timestamp when the replica finished
+            execution
+          type: string
+        replica_marked_for_termination_at:
+          description: ReplicaMarkedForTerminationAt is the timestamp when the replica was
+            marked for termination
+          type: string
+        replica_ready_since:
+          description: ReplicaReadySince is the timestamp when the replica became ready to
+            serve traffic
+          type: string
+        replica_running_since:
+          description: ReplicaRunningSince is the timestamp when the replica entered the
+            running state
+          type: string
+        replica_started_at:
+          description: ReplicaStartedAt is the timestamp when the replica was created
+          type: string
+        replica_status:
+          description: ReplicaStatus is the current status of the replica (e.g.,
+            "Running", "Pending", "Failed")
+          type: string
+        replica_status_message:
+          description: ReplicaStatusMessage provides a human-readable message explaining
+            the replica's status
+          type: string
+        replica_status_reason:
+          description: ReplicaStatusReason provides a brief machine-readable reason for
+            the replica's status
+          type: string
+        scheduled_on_cluster:
+          description: ScheduledOnCluster identifies which cluster this replica is
+            scheduled on
+          type: string
+      type: object
+    RepositoryListResponse:
+      properties:
+        data:
+          description: Data is the array of repository items
+          items:
+            $ref: "#/components/schemas/v1.RepositoryResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
+          type: string
+      type: object
+    RepositoryResponseItem:
+      properties:
+        id:
+          description: ID is the unique identifier for this repository (repository name
+            with slashes replaced by "___")
+          type: string
+        object:
+          description: Object is the type identifier for this response (always
+            "image-repository")
+          type: string
+        url:
+          description: URL is the full registry URL for this repository (e.g.,
+            "registry.together.ai/project-id/repository-name")
+          type: string
+      type: object
+    SecretResponseItem:
+      properties:
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this secret was created
+          type: string
+        created_by:
+          description: CreatedBy is the identifier of the user who created this secret
+          type: string
+        description:
+          description: Description is a human-readable description of the secret's purpose
+          type: string
+        id:
+          description: ID is the unique identifier for this secret
+          type: string
+        last_updated_by:
+          description: LastUpdatedBy is the identifier of the user who last updated this
+            secret
+          type: string
+        name:
+          description: Name is the name/key of the secret
+          type: string
+        object:
+          description: Object is the type identifier for this response (always "secret")
+          type: string
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this secret was last updated
+          type: string
+      type: object
+    UpdateDeploymentRequest:
+      properties:
+        args:
+          description: Args overrides the container's CMD. Provide as an array of
+            arguments (e.g., ["python", "app.py"])
+          items:
+            type: string
+          type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
+            "QueueBacklogPerWorker", "target": "10"} to scale based on queue
+            backlog'
+          type: object
+        command:
+          description: Command overrides the container's ENTRYPOINT. Provide as an array
+            (e.g., ["/bin/sh", "-c"])
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the number of CPU cores to allocate per container instance
+            (e.g., 0.1 = 100 milli cores)
+          minimum: 0.1
+          type: number
+        description:
+          description: Description is an optional human-readable description of your
+            deployment
+          type: string
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables to set in
+            the container. This will replace all existing environment variables
+          items:
+            $ref: "#/components/schemas/v1.EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs to allocate per container instance
+          type: integer
+        gpu_type:
+          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
+          enum:
+            - h100-80gb
+            - " a100-80gb"
+          type: string
+        health_check_path:
+          description: HealthCheckPath is the HTTP path for health checks (e.g.,
+            "/health"). Set to empty string to disable health checks
+          type: string
+        image:
+          description: Image is the container image to deploy from registry.together.ai.
+          type: string
+        max_replicas:
+          description: MaxReplicas is the maximum number of replicas that can be scaled up
+            to.
+          type: integer
+        memory:
+          description: Memory is the amount of RAM to allocate per container instance in
+            GiB (e.g., 0.5 = 512MiB)
+          minimum: 0.1
+          type: number
+        min_replicas:
+          description: MinReplicas is the minimum number of replicas to run
+          type: integer
+        name:
+          description: Name is the new unique identifier for your deployment. Must contain
+            only alphanumeric characters, underscores, or hyphens (1-100
+            characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        port:
+          description: Port is the container port your application listens on (e.g., 8080
+            for web servers)
+          type: integer
+        storage:
+          description: Storage is the amount of ephemeral disk storage to allocate per
+            container instance (e.g., 10 = 10GiB)
+          type: integer
+        termination_grace_period_seconds:
+          description: TerminationGracePeriodSeconds is the time in seconds to wait for
+            graceful shutdown before forcefully terminating the replica
+          type: integer
+        volumes:
+          description: Volumes is a list of volume mounts to attach to the container. This
+            will replace all existing volumes
+          items:
+            $ref: "#/components/schemas/v1.VolumeMount"
+          type: array
+      type: object
+    UpdateSecretRequest:
+      properties:
+        description:
+          description: Description is an optional human-readable description of the
+            secret's purpose (max 500 characters)
+          maxLength: 500
+          type: string
+        name:
+          description: Name is the new unique identifier for the secret. Can contain
+            alphanumeric characters, underscores, hyphens, forward slashes, and
+            periods (1-100 characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        project_id:
+          description: ProjectID is ignored - the project is automatically determined from
+            your authentication
+          type: string
+        value:
+          description: Value is the new sensitive data to store securely. Updating this
+            will replace the existing secret value
+          minLength: 1
+          type: string
+      type: object
+    UpdateVolumeRequest:
+      properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeContent"
+          description: Content specifies the new content that will be preloaded to this
+            volume
+        name:
+          description: Name is the new unique identifier for the volume within the project
+          type: string
+        type:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeType"
+          description: Type is the new volume type (currently only "readOnly" is supported)
+      type: object
+    VolumeMount:
+      properties:
+        mount_path:
+          description: MountPath is the path in the container where the volume will be
+            mounted (e.g., "/data")
+          type: string
+        name:
+          description: Name is the name of the volume to mount. Must reference an existing
+            volume by name or ID
+          type: string
+      required:
+        - mount_path
+        - name
+      type: object
+    VolumeResponseItem:
+      properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeContent"
+          description: Content specifies the content that will be preloaded to this volume
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this volume was created
+          type: string
+        id:
+          description: ID is the unique identifier for this volume
+          type: string
+        name:
+          description: Name is the name of the volume
+          type: string
+        object:
+          description: Object is the type identifier for this response (always "volume")
+          type: string
+        type:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeType"
+          description: Type is the volume type (e.g., "readOnly")
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this volume was last updated
+          type: string
+      type: object
+    volumes.VolumeContent:
+      properties:
+        source_prefix:
+          description: SourcePrefix is the file path prefix for the content to be
+            preloaded into the volume
+          example: models/
+          type: string
+        type:
+          description: Type is the content type (currently only "files" is supported which
+            allows preloading files uploaded via Files API into the volume)
+          enum:
+            - files
+          example: files
+          type: string
+      type: object
+    volumes.VolumeType:
+      enum:
+        - readOnly
+      type: string
+      x-enum-varnames:
+        - VolumeTypeReadOnly
     ListVoicesResponse:
       description: Response containing a list of models and their available voices.
       type: object

From 309fa0996c7a4bc7aff0ac61390017b8fc1c9571 Mon Sep 17 00:00:00 2001
From: Blaine Kasten <blainekasten@gmail.com>
Date: Wed, 17 Dec 2025 10:39:08 -0600
Subject: [PATCH 2/4] simplify diff

---
 openapi.yaml | 7759 ++++++++++++++++++++++++--------------------------
 1 file changed, 3785 insertions(+), 3974 deletions(-)

diff --git a/openapi.yaml b/openapi.yaml
index 0fa9121..ee16ea7 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -15,956 +15,1072 @@ servers:
 security:
   - bearerAuth: []
 paths:
-  /deployments:
+  /voices:
     get:
-      description: Get a list of all deployments in your project
+      tags: ['Voices']
+      summary: Fetch available voices for each model
+      description: Fetch available voices for each model
+      operationId: fetchVoices
       responses:
-        "200":
-          description: List of deployments
+        '200':
+          description: Success
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/DeploymentListResponse"
-        "500":
-          description: Internal server error
+                $ref: '#/components/schemas/ListVoicesResponse'
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.audio.voices.list()
+
+            print(response.data)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.audio.voices.list()
+
+            print(response.data)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.audio.voices.list()
+
+            console.log(response.data);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.audio.voices.list()
+
+            console.log(response.data);
+  /videos/{id}:
+    get:
+      tags: ['Video']
+      summary: Fetch video metadata
+      description: Fetch video metadata
+      servers:
+        - url: https://api.together.xyz/v2
+      operationId: retrieveVideo
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.videos.retrieve(video_id)
+
+            print(response.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.videos.retrieve(video_id)
+
+            print(response.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.videos.retrieve(videoId);
+
+            console.log(response.status);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.videos.retrieve(videoId);
+
+            console.log(response.status);
+      parameters:
+        - in: path
+          name: id
+          schema:
+            type: string
+          required: true
+          description: Identifier of video from create response.
+      responses:
+        '200':
+          description: Success
           content:
             application/json:
               schema:
-                type: object
-      summary: Get the list of deployments
-      tags:
-        - Deployments
+                $ref: '#/components/schemas/VideoJob'
+        '400':
+          description: Invalid request parameters.
+        '404':
+          description: Video ID not found.
+  /videos:
     post:
-      description: Create a new deployment with specified configuration
+      tags: ['Video']
+      summary: Create video
+      description: Create a video
+      operationId: createVideo
+      servers:
+        - url: https://api.together.xyz/v2
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.videos.create(
+                model="together/video-model",
+                prompt="A cartoon of an astronaut riding a horse on the moon"
+            )
+
+            print(response.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.videos.create(
+                model="together/video-model",
+                prompt="A cartoon of an astronaut riding a horse on the moon"
+            )
+
+            print(response.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.videos.create({
+              model: "together/video-model",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
+
+            console.log(response.id);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.videos.create({
+              model: "together/video-model",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
+
+            console.log(response.id);
       requestBody:
         content:
           application/json:
             schema:
-              $ref: "#/components/schemas/CreateDeploymentRequest"
-        description: Deployment configuration
-        required: true
+              $ref: '#/components/schemas/CreateVideoBody'
       responses:
-        "200":
-          description: Deployment created successfully
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/DeploymentResponseItem"
-        "400":
-          description: Invalid request
-          content:
-            application/json:
-              schema:
-                type: object
-        "500":
-          description: Internal server error
+        '200':
+          description: Success
           content:
             application/json:
               schema:
-                type: object
-      summary: Create a new deployment
-      tags:
-        - Deployments
-  "/v1/deployments/{id}":
-    delete:
-      description: Delete an existing deployment
-      parameters:
-        - description: Deployment ID or name
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
-      responses:
-        "200":
-          description: Deployment deleted successfully
-          content:
-            application/json:
-              schema:
-                type: object
-        "404":
-          description: Deployment not found
-          content:
-            application/json:
-              schema:
-                type: object
-        "500":
-          description: Internal server error
-          content:
-            application/json:
-              schema:
-                type: object
-      summary: Delete a deployment
-      tags:
-        - Deployments
-    get:
-      description: Retrieve details of a specific deployment by its ID or name
-      parameters:
-        - description: Deployment ID or name
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
-      responses:
-        "200":
-          description: Deployment details
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/DeploymentResponseItem"
-        "404":
-          description: Deployment not found
-          content:
-            application/json:
-              schema:
-                type: object
-        "500":
-          description: Internal server error
-          content:
-            application/json:
-              schema:
-                type: object
-      summary: Get a deployment by ID or name
-      tags:
-        - Deployments
-    patch:
-      description: Update an existing deployment configuration
-      parameters:
-        - description: Deployment ID or name
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
+                $ref: '#/components/schemas/VideoJob'
+  /chat/completions:
+    post:
+      tags: ['Chat']
+      summary: Create chat completion
+      description: Query a chat model.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.chat.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What are some fun things to do in New York?"},
+                ]
+            )
+
+            print(response.choices[0].message.content)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.chat.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What are some fun things to do in New York?"},
+                ]
+            )
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.chat.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              messages: [
+                { role: "system", content: "You are a helpful assistant." },
+                { role: "user", "content": "What are some fun things to do in New York?" },
+              ],
+            });
+
+            console.log(response.choices[0].message?.content);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.chat.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              messages: [
+                { role: "system", content: "You are a helpful assistant." },
+                { role: "user", "content": "What are some fun things to do in New York?" },
+              ],
+            });
+
+            console.log(response.choices[0].message?.content);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/chat/completions" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "messages": [
+                     {"role": "system", "content": "You are a helpful assistant."},
+                     {"role": "user", "content": "What are some fun things to do in New York?"}
+                   ]
+                 }'
+      operationId: chat-completions
       requestBody:
         content:
           application/json:
             schema:
-              $ref: "#/components/schemas/UpdateDeploymentRequest"
-        description: Updated deployment configuration
-        required: true
+              $ref: '#/components/schemas/ChatCompletionRequest'
       responses:
-        "200":
-          description: Deployment updated successfully
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/DeploymentResponseItem"
-        "400":
-          description: Invalid request
+        '200':
+          description: '200'
           content:
             application/json:
               schema:
-                type: object
-        "404":
-          description: Deployment not found
-          content:
-            application/json:
+                $ref: '#/components/schemas/ChatCompletionResponse'
+            text/event-stream:
               schema:
-                type: object
-        "500":
-          description: Internal server error
+                $ref: '#/components/schemas/ChatCompletionStream'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
-                type: object
-      summary: Update a deployment
-      tags:
-        - Deployments
-  "/v1/deployments/{id}/logs":
-    get:
-      description: Retrieve logs from a deployment, optionally filtered by replica ID.
-        Use follow=true to stream logs in real-time.
-      parameters:
-        - description: Deployment ID or name
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
-        - description: Replica ID to filter logs
-          in: query
-          name: replica_id
-          schema:
-            type: string
-        - description: Stream logs in real-time (ndjson format)
-          in: query
-          name: follow
-          schema:
-            type: boolean
-      responses:
-        "200":
-          description: Deployment logs
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/DeploymentLogs"
-        "404":
-          description: Deployment not found
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
           content:
             application/json:
               schema:
-                type: object
-        "500":
-          description: Internal server error
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
-                type: object
-      summary: Get logs for a deployment
-      tags:
-        - Deployments
-  /image-repositories:
-    get:
-      description: Retrieve all container image repositories available in your project
-      responses:
-        "200":
-          description: List of repositories
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/RepositoryListResponse"
-        "500":
-          description: Internal server error
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
           content:
             application/json:
               schema:
-                type: object
-      summary: Get the list of image repositories in your project
-      tags:
-        - Images
-  "/v1/image-repositories/{id}/images":
-    get:
-      description: Retrieve all container images (tags) available in a specific repository
-      parameters:
-        - description: Repository ID
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
-      responses:
-        "200":
-          description: List of images
-          content:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /completions:
+    post:
+      tags: ['Completion']
+      summary: Create completion
+      description: Query a language, code, or image model.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                prompt="The largest city in France is",
+                max_tokens=1
+            )
+
+            print(response.choices[0].text)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                prompt="The largest city in France is",
+                max_tokens=1
+            )
+
+            print(response.choices[0].text)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              prompt: "The largest city in France is",
+              max_tokens: 1,
+            });
+
+            console.log(response.choices[0].text);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              prompt: "The largest city in France is",
+              max_tokens: 1
+            });
+
+            console.log(response.choices[0].text);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/completions" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "prompt": "The largest city in France is",
+                   "max_tokens": 1
+                 }'
+      operationId: completions
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CompletionRequest'
+      responses:
+        '200':
+          description: '200'
+          content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ImageListResponse"
-        "404":
-          description: Repository not found
+                $ref: '#/components/schemas/CompletionResponse'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/CompletionStream'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
-                type: object
-        "500":
-          description: Internal server error
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                type: object
-      summary: Get the list of images available under a repository
-      tags:
-        - Images
-  /secrets:
-    get:
-      description: Retrieve all secrets in your project
-      responses:
-        "200":
-          description: List of secrets
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ListSecretsResponse"
-        "500":
-          description: Internal server error
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
-                type: object
-      summary: Get the list of project secrets
-      tags:
-        - Secrets
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /embeddings:
     post:
-      description: Create a new secret to store sensitive configuration values
+      tags: ['Embeddings']
+      summary: Create embedding
+      description: Query an embedding model for a given string of text.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.embeddings.create(
+                model="BAAI/bge-large-en-v1.5",
+                input="New York City",
+            )
+
+            print(response.data[0].embedding)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.embeddings.create(
+                model="BAAI/bge-large-en-v1.5",
+                input="New York City",
+            )
+
+            print(response.data[0].embedding)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.embeddings.create({
+              model: "BAAI/bge-large-en-v1.5",
+              input: "New York City",
+            });
+
+            console.log(response.data[0].embedding);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.embeddings.create({
+              model: "BAAI/bge-large-en-v1.5",
+              input: "New York City",
+            });
+
+            console.log(response.data[0].embedding);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/embeddings" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "BAAI/bge-large-en-v1.5",
+                   "input": "New York City"
+                 }'
+      operationId: embeddings
       requestBody:
         content:
           application/json:
             schema:
-              $ref: "#/components/schemas/CreateSecretRequest"
-        description: Secret configuration
-        required: true
+              $ref: '#/components/schemas/EmbeddingsRequest'
       responses:
-        "200":
-          description: Secret created successfully
+        '200':
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/SecretResponseItem"
-        "400":
-          description: Invalid request
+                $ref: '#/components/schemas/EmbeddingsResponse'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
-                type: object
-        "500":
-          description: Internal server error
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                type: object
-      summary: Create a new secret
-      tags:
-        - Secrets
-  "/v1/secrets/{id}":
-    delete:
-      description: Delete an existing secret
-      parameters:
-        - description: Secret ID or name
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
-      responses:
-        "200":
-          description: Secret deleted successfully
-          content:
-            application/json:
-              schema:
-                type: object
-        "404":
-          description: Secret not found
-          content:
-            application/json:
-              schema:
-                type: object
-        "500":
-          description: Internal server error
-          content:
-            application/json:
-              schema:
-                type: object
-      summary: Delete a secret
-      tags:
-        - Secrets
-    get:
-      description: Retrieve details of a specific secret by its ID or name
-      parameters:
-        - description: Secret ID or name
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
-      responses:
-        "200":
-          description: Secret details
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/SecretResponseItem"
-        "404":
-          description: Secret not found
-          content:
-            application/json:
-              schema:
-                type: object
-        "500":
-          description: Internal server error
-          content:
-            application/json:
-              schema:
-                type: object
-      summary: Get a secret by ID or name
-      tags:
-        - Secrets
-    patch:
-      description: Update an existing secret's value or metadata
-      parameters:
-        - description: Secret ID or name
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/UpdateSecretRequest"
-        description: Updated secret configuration
-        required: true
-      responses:
-        "200":
-          description: Secret updated successfully
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/SecretResponseItem"
-        "400":
-          description: Invalid request
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
-                type: object
-        "404":
-          description: Secret not found
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
           content:
             application/json:
               schema:
-                type: object
-        "500":
-          description: Internal server error
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
           content:
             application/json:
               schema:
-                type: object
-      summary: Update a secret
-      tags:
-        - Secrets
-  "/v1/storage/{filename}":
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /models:
     get:
-      description: Download a file by redirecting to a signed URL
+      tags: ['Models']
+      summary: List all models
+      description: Lists all of Together's open-source models
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            models = client.models.list()
+
+            for model in models:
+                print(model.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            models = client.models.list()
+
+            for model in models:
+                print(model.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const models = await client.models.list();
+
+            for (const model of models) {
+              console.log(model.id);
+            }
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const models = await client.models.list();
+
+            for (const model of models) {
+              console.log(model.id);
+            }
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/models" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: models
       parameters:
-        - description: Filename
-          in: path
-          name: filename
-          required: true
+        - name: dedicated
+          in: query
+          description: Filter models to only return dedicated models
           schema:
-            type: string
+            type: boolean
       responses:
-        "307":
-          description: Redirect to signed download URL
-          content:
-            application/json:
-              schema:
-                type: string
-        "400":
-          description: Invalid request
-          content:
-            application/json:
-              schema:
-                additionalProperties:
-                  type: string
-                type: object
-        "404":
-          description: File not found
+        '200':
+          description: '200'
           content:
             application/json:
               schema:
-                additionalProperties:
-                  type: string
-                type: object
-        "500":
-          description: Internal error
+                $ref: '#/components/schemas/ModelInfoList'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
-                additionalProperties:
-                  type: string
-                type: object
-      summary: Download a file
-      tags:
-        - files
-  "/v1/storage/{filename}/url":
-    get:
-      description: Get a presigned download URL for a file
-      parameters:
-        - description: Filename
-          in: path
-          name: filename
-          required: true
-          schema:
-            type: string
-      responses:
-        "200":
-          description: Signed URL
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/api_v1.SignedURLResponse"
-        "400":
-          description: Invalid request
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
           content:
             application/json:
               schema:
-                additionalProperties:
-                  type: string
-                type: object
-        "404":
-          description: File not found
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
-                additionalProperties:
-                  type: string
-                type: object
-        "500":
-          description: Internal error
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
           content:
             application/json:
               schema:
-                additionalProperties:
-                  type: string
-                type: object
-      summary: Get a signed URL for a file
-      tags:
-        - files
-  /storage/multipart/abort:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
     post:
-      description: Abort a multi-part upload and discard all uploaded parts
+      tags: ['Models']
+      summary: Upload a custom model or adapter
+      description: Upload a custom model or adapter from Hugging Face or S3
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.models.upload(
+                "My-Fine-Tuned-Model",
+                "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            )
+
+            print(response.job_id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.models.upload(
+                model_name="My-Fine-Tuned-Model",
+                model_source="https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            )
+
+            print(response.data.job_id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.models.upload({
+              model_name: "My-Fine-Tuned-Model",
+              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            })
+
+            console.log(response);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.models.upload({
+              model_name: "My-Fine-Tuned-Model",
+              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            })
+
+            console.log(response);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/models" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                    "model_name": "My-Fine-Tuned-Model",
+                    "model_source": "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz"
+                  }'
+      operationId: uploadModel
       requestBody:
+        required: true
         content:
           application/json:
             schema:
-              $ref: "#/components/schemas/files.AbortMultiPartRequest"
-        description: Abort multi-part upload request
-        required: true
+              $ref: '#/components/schemas/ModelUploadRequest'
       responses:
-        "200":
-          description: Multi-part upload aborted successfully
+        '200':
+          description: Model / adapter upload job created successfully
           content:
             application/json:
               schema:
-                additionalProperties:
-                  type: string
-                type: object
-        "400":
-          description: Invalid request
+                $ref: '#/components/schemas/ModelUploadSuccessResponse'
+
+  /jobs/{jobId}:
+    get:
+      tags: ['Jobs']
+      summary: Get job status
+      description: Get the status of a specific job
+      operationId: getJob
+      parameters:
+        - name: jobId
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the job to retrieve
+          example: job-a15dad11-8d8e-4007-97c5-a211304de284
+      responses:
+        '200':
+          description: Job status retrieved successfully
           content:
             application/json:
               schema:
-                additionalProperties:
-                  type: string
-                type: object
-        "500":
-          description: Internal error
+                $ref: '#/components/schemas/JobInfoSuccessResponse'
+
+  /jobs:
+    get:
+      tags: ['Jobs']
+      summary: List all jobs
+      description: List all jobs and their statuses
+      operationId: listJobs
+      responses:
+        '200':
+          description: Jobs retrieved successfully
           content:
             application/json:
               schema:
-                additionalProperties:
-                  type: string
-                type: object
-      summary: Abort multi-part upload
-      tags:
-        - files
-  /storage/multipart/complete:
+                $ref: '#/components/schemas/JobsInfoSuccessResponse'
+
+  /images/generations:
     post:
-      description: Complete a multi-part upload by providing all part ETags
+      tags: ['Images']
+      summary: Create image
+      description: Use an image model to generate an image for a given prompt.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.images.generate(
+                model="black-forest-labs/FLUX.1-schnell",
+                steps=4,
+                prompt="A cartoon of an astronaut riding a horse on the moon",
+            )
+
+            print(response.data[0].url)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.images.generate(
+                model="black-forest-labs/FLUX.1-schnell",
+                steps=4,
+                prompt="A cartoon of an astronaut riding a horse on the moon",
+            )
+
+            print(response.data[0].url)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.images.generate({
+              model: "black-forest-labs/FLUX.1-schnell",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
+
+            console.log(response.data[0].url);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.images.generate({
+              model: "black-forest-labs/FLUX.1-schnell",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
+
+            console.log(response.data[0].url);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/images/generations" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "black-forest-labs/FLUX.1-schnell",
+                   "prompt": "A cartoon of an astronaut riding a horse on the moon"
+                 }'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
-              $ref: "#/components/schemas/files.CompleteMultiPartRequest"
-        description: Complete multi-part upload request
-        required: true
-      responses:
-        "200":
-          description: Multi-part upload completed
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/files.CompleteUploadResponse"
-        "400":
-          description: Invalid request
-          content:
-            application/json:
-              schema:
-                additionalProperties:
+              type: object
+              required:
+                - prompt
+                - model
+              properties:
+                prompt:
                   type: string
-                type: object
-        "500":
-          description: Internal error
-          content:
-            application/json:
-              schema:
-                additionalProperties:
+                  description: A description of the desired images. Maximum length varies by model.
+                  example: cat floating in space, cinematic
+                model:
                   type: string
-                type: object
-      summary: Complete multi-part upload
-      tags:
-        - files
-  /storage/multipart/init:
-    post:
-      description: Initiate a multi-part upload and get presigned URLs for each part
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/files.InitiateMultiPartRequest"
-        description: Multi-part upload init request
-        required: true
-      responses:
-        "200":
-          description: Multi-part upload info
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/files.MultiPartInitResponse"
-        "400":
-          description: Invalid request
-          content:
-            application/json:
-              schema:
-                additionalProperties:
-                  type: string
-                type: object
-        "500":
-          description: Internal error
-          content:
-            application/json:
-              schema:
-                additionalProperties:
-                  type: string
-                type: object
-      summary: Initiate multi-part upload
-      tags:
-        - files
-  /storage/upload-request:
-    post:
-      description: Request a presigned upload URL for a file
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/files.FileRequest"
-        description: Upload request
-        required: true
-      responses:
-        "200":
-          description: Upload URL and path
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/files.UploadResponse"
-        "400":
-          description: Invalid request
-          content:
-            application/json:
-              schema:
-                additionalProperties:
-                  type: string
-                type: object
-        "500":
-          description: Internal error
-          content:
-            application/json:
-              schema:
-                additionalProperties:
-                  type: string
-                type: object
-      summary: Request an upload URL for a file
-      tags:
-        - files
-  /storage/volumes:
-    get:
-      description: Retrieve all volumes in your project
-      responses:
-        "200":
-          description: List of volumes
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ListVolumesResponse"
-        "500":
-          description: Internal server error
-          content:
-            application/json:
-              schema:
-                type: object
-      summary: Get the list of project volumes
-      tags:
-        - Volumes
-    post:
-      description: Create a new volume to preload files in deployments
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/CreateVolumeRequest"
-        description: Volume configuration
-        required: true
-      responses:
-        "200":
-          description: Volume created successfully
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/VolumeResponseItem"
-        "400":
-          description: Invalid request
-          content:
-            application/json:
-              schema:
-                type: object
-        "500":
-          description: Internal server error
-          content:
-            application/json:
-              schema:
-                type: object
-      summary: Create a new volume
-      tags:
-        - Volumes
-  "/v1/storage/volumes/{id}":
-    delete:
-      description: Delete an existing volume
-      parameters:
-        - description: Volume ID or name
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
-      responses:
-        "200":
-          description: Volume deleted successfully
-          content:
-            application/json:
-              schema:
-                type: object
-        "404":
-          description: Volume not found
-          content:
-            application/json:
-              schema:
-                type: object
-        "500":
-          description: Internal server error
-          content:
-            application/json:
-              schema:
-                type: object
-      summary: Delete a volume
-      tags:
-        - Volumes
-    get:
-      description: Retrieve details of a specific volume by its ID or name
-      parameters:
-        - description: Volume ID or name
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
-      responses:
-        "200":
-          description: Volume details
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/VolumeResponseItem"
-        "404":
-          description: Volume not found
-          content:
-            application/json:
-              schema:
-                type: object
-        "500":
-          description: Internal server error
-          content:
-            application/json:
-              schema:
-                type: object
-      summary: Get a volume by ID or name
-      tags:
-        - Volumes
-    patch:
-      description: Update an existing volume's configuration or contents
-      parameters:
-        - description: Volume ID or name
-          in: path
-          name: id
-          required: true
-          schema:
-            type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/UpdateVolumeRequest"
-        description: Updated volume configuration
-        required: true
-      responses:
-        "200":
-          description: Volume updated successfully
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/VolumeResponseItem"
-        "400":
-          description: Invalid request
-          content:
-            application/json:
-              schema:
-                type: object
-        "404":
-          description: Volume not found
-          content:
-            application/json:
-              schema:
-                type: object
-        "500":
-          description: Internal server error
-          content:
-            application/json:
-              schema:
-                type: object
-      summary: Update a volume
-      tags:
-        - Volumes
-  /voices:
-    get:
-      tags: ['Voices']
-      summary: Fetch available voices for each model
-      description: Fetch available voices for each model
-      operationId: fetchVoices
-      responses:
-        '200':
-          description: Success
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListVoicesResponse'
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.audio.voices.list()
-
-            print(response.data)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.audio.voices.list()
-
-            print(response.data)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.audio.voices.list()
-
-            console.log(response.data);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.audio.voices.list()
-
-            console.log(response.data);
-  /videos/{id}:
-    get:
-      tags: ['Video']
-      summary: Fetch video metadata
-      description: Fetch video metadata
-      servers:
-        - url: https://api.together.xyz/v2
-      operationId: retrieveVideo
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.retrieve(video_id)
-
-            print(response.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.retrieve(video_id)
-
-            print(response.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.retrieve(videoId);
-
-            console.log(response.status);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.retrieve(videoId);
-
-            console.log(response.status);
-      parameters:
-        - in: path
-          name: id
-          schema:
-            type: string
-          required: true
-          description: Identifier of video from create response.
+                  description: >
+                    The model to use for image generation.<br>
+                    <br>
+                    [See all of Together AI's image models](https://docs.together.ai/docs/serverless-models#image-models)
+                  example: black-forest-labs/FLUX.1-schnell
+                  anyOf:
+                    - type: string
+                      enum:
+                        - black-forest-labs/FLUX.1-schnell-Free
+                        - black-forest-labs/FLUX.1-schnell
+                        - black-forest-labs/FLUX.1.1-pro
+                    - type: string
+                steps:
+                  type: integer
+                  default: 20
+                  description: Number of generation steps.
+                image_url:
+                  type: string
+                  description: URL of an image to use for image models that support it.
+                seed:
+                  type: integer
+                  description: Seed used for generation. Can be used to reproduce image generations.
+                n:
+                  type: integer
+                  default: 1
+                  description: Number of image results to generate.
+                height:
+                  type: integer
+                  default: 1024
+                  description: Height of the image to generate in number of pixels.
+                width:
+                  type: integer
+                  default: 1024
+                  description: Width of the image to generate in number of pixels.
+                negative_prompt:
+                  type: string
+                  description: The prompt or prompts not to guide the image generation.
+                response_format:
+                  type: string
+                  description: Format of the image response. Can be either a base64 string or a URL.
+                  enum:
+                    - base64
+                    - url
+                guidance_scale:
+                  type: number
+                  description: Adjusts the alignment of the generated image with the input prompt. Higher values (e.g., 8-10) make the output more faithful to the prompt, while lower values (e.g., 1-5) encourage more creative freedom.
+                  default: 3.5
+                output_format:
+                  type: string
+                  description: The format of the image response. Can be either be `jpeg` or `png`. Defaults to `jpeg`.
+                  default: jpeg
+                  enum:
+                    - jpeg
+                    - png
+                image_loras:
+                  description: An array of objects that define LoRAs (Low-Rank Adaptations) to influence the generated image.
+                  type: array
+                  items:
+                    type: object
+                    required: [path, scale]
+                    properties:
+                      path:
+                        type: string
+                        description: The URL of the LoRA to apply (e.g. https://huggingface.co/strangerzonehf/Flux-Midjourney-Mix2-LoRA).
+                      scale:
+                        type: number
+                        description: The strength of the LoRA's influence. Most LoRA's recommend a value of 1.
+                reference_images:
+                  description: An array of image URLs that guide the overall appearance and style of the generated image. These reference images influence the visual characteristics consistently across the generation.
+                  type: array
+                  items:
+                    type: string
+                    description: URL of a reference image to guide the image generation.
+                disable_safety_checker:
+                  type: boolean
+                  description: If true, disables the safety checker for image generation.
       responses:
         '200':
-          description: Success
+          description: Image generated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VideoJob'
-        '400':
-          description: Invalid request parameters.
-        '404':
-          description: Video ID not found.
-  /videos:
-    post:
-      tags: ['Video']
-      summary: Create video
-      description: Create a video
-      operationId: createVideo
-      servers:
-        - url: https://api.together.xyz/v2
+                $ref: '#/components/schemas/ImageResponse'
+  /files:
+    get:
+      tags: ['Files']
+      summary: List all files
+      description: List the metadata for all uploaded data files.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -977,12 +1093,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.videos.create(
-                model="together/video-model",
-                prompt="A cartoon of an astronaut riding a horse on the moon"
-            )
+            response = client.files.list()
 
-            print(response.id)
+            for file in response.data:
+                print(file.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -993,12 +1107,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.videos.create(
-                model="together/video-model",
-                prompt="A cartoon of an astronaut riding a horse on the moon"
-            )
+            response = client.files.list()
 
-            print(response.id)
+            for file in response.data:
+                print(file.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1008,12 +1120,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.videos.create({
-              model: "together/video-model",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
+            const response = await client.files.list();
 
-            console.log(response.id);
+            for (const file of response.data) {
+              console.log(file.id);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1023,29 +1134,29 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.videos.create({
-              model: "together/video-model",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
+            const response = await client.files.list();
 
-            console.log(response.id);
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CreateVideoBody'
+            for (const file of response.data) {
+              console.log(file.id);
+            }
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/files" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
       responses:
         '200':
-          description: Success
+          description: List of files
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VideoJob'
-  /chat/completions:
-    post:
-      tags: ['Chat']
-      summary: Create chat completion
-      description: Query a chat model.
+                $ref: '#/components/schemas/FileList'
+  /files/{id}:
+    get:
+      tags: ['Files']
+      summary: List file
+      description: List the metadata for a single uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1058,15 +1169,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.chat.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                messages=[
-                    {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": "What are some fun things to do in New York?"},
-                ]
-            )
+            file = client.files.retrieve(id="file-id")
 
-            print(response.choices[0].message.content)
+            print(file)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1077,13 +1182,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.chat.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                messages=[
-                    {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": "What are some fun things to do in New York?"},
-                ]
-            )
+            file = client.files.retrieve(id="file-id")
+
+            print(file)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1093,15 +1194,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.chat.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              messages: [
-                { role: "system", content: "You are a helpful assistant." },
-                { role: "user", "content": "What are some fun things to do in New York?" },
-              ],
-            });
+            const file = await client.files.retrieve("file-id");
 
-            console.log(response.choices[0].message?.content);
+            console.log(file);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1111,86 +1206,32 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.chat.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              messages: [
-                { role: "system", content: "You are a helpful assistant." },
-                { role: "user", "content": "What are some fun things to do in New York?" },
-              ],
-            });
+            const file = await client.files.retrieve("file-id");
 
-            console.log(response.choices[0].message?.content);
+            console.log(file);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/chat/completions" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "messages": [
-                     {"role": "system", "content": "You are a helpful assistant."},
-                     {"role": "user", "content": "What are some fun things to do in New York?"}
-                   ]
-                 }'
-      operationId: chat-completions
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ChatCompletionRequest'
+            curl "https://api.together.xyz/v1/files/ID" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ChatCompletionResponse'
-            text/event-stream:
-              schema:
-                $ref: '#/components/schemas/ChatCompletionStream'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+          description: File retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /completions:
-    post:
-      tags: ['Completion']
-      summary: Create completion
-      description: Query a language, code, or image model.
+                $ref: '#/components/schemas/FileResponse'
+    delete:
+      tags: ['Files']
+      summary: Delete a file
+      description: Delete a previously uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1203,13 +1244,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                prompt="The largest city in France is",
-                max_tokens=1
-            )
+            response = client.files.delete(id="file-id")
 
-            print(response.choices[0].text)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1220,13 +1257,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                prompt="The largest city in France is",
-                max_tokens=1
-            )
+            response = client.files.delete(id="file-id")
 
-            print(response.choices[0].text)
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1236,13 +1269,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              prompt: "The largest city in France is",
-              max_tokens: 1,
-            });
+            const response = await client.files.delete("file-id");
 
-            console.log(response.choices[0].text);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1252,82 +1281,32 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              prompt: "The largest city in France is",
-              max_tokens: 1
-            });
+            const response = await client.files.delete("file-id");
 
-            console.log(response.choices[0].text);
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/completions" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "prompt": "The largest city in France is",
-                   "max_tokens": 1
-                 }'
-      operationId: completions
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CompletionRequest'
+            curl -X "DELETE" "https://api.together.xyz/v1/files/file-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/CompletionResponse'
-            text/event-stream:
-              schema:
-                $ref: '#/components/schemas/CompletionStream'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+          description: File deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /embeddings:
-    post:
-      tags: ['Embeddings']
-      summary: Create embedding
-      description: Query an embedding model for a given string of text.
+                $ref: '#/components/schemas/FileDeleteResponse'
+  /files/{id}/content:
+    get:
+      tags: ['Files']
+      summary: Get file contents
+      description: Get the contents of a single uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1340,12 +1319,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.embeddings.create(
-                model="BAAI/bge-large-en-v1.5",
-                input="New York City",
-            )
+            file = client.files.retrieve_content(id="file-id")
 
-            print(response.data[0].embedding)
+            print(file.filename)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1356,12 +1332,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.embeddings.create(
-                model="BAAI/bge-large-en-v1.5",
-                input="New York City",
-            )
-
-            print(response.data[0].embedding)
+            with client.files.with_streaming_response.content(id="file-id") as response:
+              for line in response.iter_lines():
+                print(line)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1371,12 +1344,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.embeddings.create({
-              model: "BAAI/bge-large-en-v1.5",
-              input: "New York City",
-            });
+            const response = await client.files.content("file-id");
+            const content = await response.text();
 
-            console.log(response.data[0].embedding);
+            console.log(content);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1386,77 +1357,40 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.embeddings.create({
-              model: "BAAI/bge-large-en-v1.5",
-              input: "New York City",
-            });
+            const response = await client.files.content("file-id");
+            const content = await response.text();
 
-            console.log(response.data[0].embedding);
+            console.log(content);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/embeddings" \
+            curl "https://api.together.xyz/v1/files/file-id/content" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "BAAI/bge-large-en-v1.5",
-                   "input": "New York City"
-                 }'
-      operationId: embeddings
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EmbeddingsRequest'
+                 -H "Content-Type: application/json"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/EmbeddingsResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+          description: File content retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                $ref: '#/components/schemas/FileObject'
+        '500':
+          description: Internal Server Error
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /models:
-    get:
-      tags: ['Models']
-      summary: List all models
-      description: Lists all of Together's open-source models
+  /files/upload:
+    post:
+      tags: ['Files']
+      summary: Upload a file
+      description: Upload a file with specified purpose, file name, and file type.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1469,10 +1403,11 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            models = client.models.list()
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            file_path = os.path.join(current_dir, "data.jsonl")
+            file = client.files.upload(file=file_path)
 
-            for model in models:
-                print(model.id)
+            print(file.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1483,93 +1418,98 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            models = client.models.list()
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            file_path = os.path.join(current_dir, "data.jsonl")
+            file = client.files.upload(file=file_path)
 
-            for model in models:
-                print(model.id)
+            print(file.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+            import { upload } from "together-ai/lib/upload"
+            import path from "path";
+            import { fileURLToPath } from "url";
 
-            const models = await client.models.list();
+            const __filename = fileURLToPath(import.meta.url);
+            const __dirname = path.dirname(__filename);
+            const filepath = path.join(__dirname, "data.jsonl");
+            const file = await upload(filepath);
 
-            for (const model of models) {
-              console.log(model.id);
-            }
+            console.log(file.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+            import { upload } from "together-ai/lib/upload"
+            import path from "path";
+            import { fileURLToPath } from "url";
 
-            const models = await client.models.list();
+            const __filename = fileURLToPath(import.meta.url);
+            const __dirname = path.dirname(__filename);
+            const filepath = path.join(__dirname, "data.jsonl");
+            const file = await upload(filepath);
 
-            for (const model of models) {
-              console.log(model.id);
-            }
+            console.log(file.id);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/models" \
+            curl "https://api.together.xyz/v1/files/upload" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: models
-      parameters:
-        - name: dedicated
-          in: query
-          description: Filter models to only return dedicated models
-          schema:
-            type: boolean
+                 -F "file=@/path/to/data.jsonl" \
+                 -F "file_name=data.jsonl" \
+                 -F "purpose=fine-tune"
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              type: object
+              required:
+                - purpose
+                - file_name
+                - file
+              properties:
+                purpose:
+                  $ref: '#/components/schemas/FilePurpose'
+                file_name:
+                  type: string
+                  description: The name of the file being uploaded
+                  example: 'dataset.csv'
+                file_type:
+                  $ref: '#/components/schemas/FileType'
+                file:
+                  type: string
+                  format: binary
+                  description: The content of the file being uploaded
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ModelInfoList'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+          description: File uploaded successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                $ref: '#/components/schemas/FileResponse'
+        '500':
+          description: Internal Server Error
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+        '400':
+          description: Bad Request
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+        '401':
+          description: Unauthorized
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-      deprecated: false
+  /fine-tunes:
     post:
-      tags: ['Models']
-      summary: Upload a custom model or adapter
-      description: Upload a custom model or adapter from Hugging Face or S3
+      tags: ['Fine-tuning']
+      summary: Create job
+      description: Create a fine-tuning job with the provided model and training data.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1582,12 +1522,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.models.upload(
-                "My-Fine-Tuned-Model",
-                "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            response = client.fine_tuning.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                training_file="file-id"
             )
 
-            print(response.job_id)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1598,12 +1538,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.models.upload(
-                model_name="My-Fine-Tuned-Model",
-                model_source="https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            response = client.fine_tuning.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                training_file="file-id"
             )
 
-            print(response.data.job_id)
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1613,10 +1553,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.models.upload({
-              model_name: "My-Fine-Tuned-Model",
-              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            })
+            const response = await client.fineTuning.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+              training_file: "file-id",
+            });
 
             console.log(response);
         - lang: JavaScript
@@ -1628,78 +1568,148 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.models.upload({
-              model_name: "My-Fine-Tuned-Model",
-              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            })
+            const response = await client.fineTuning.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+              training_file: "file-id",
+            });
 
             console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/models" \
+            curl -X POST "https://api.together.xyz/v1/fine-tunes" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
-                    "model_name": "My-Fine-Tuned-Model",
-                    "model_source": "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz"
-                  }'
-      operationId: uploadModel
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                   "training_file": "file-id"
+                 }'
       requestBody:
         required: true
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/ModelUploadRequest'
-      responses:
-        '200':
-          description: Model / adapter upload job created successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ModelUploadSuccessResponse'
-
-  /jobs/{jobId}:
-    get:
-      tags: ['Jobs']
-      summary: Get job status
-      description: Get the status of a specific job
-      operationId: getJob
-      parameters:
-        - name: jobId
-          in: path
-          required: true
-          schema:
-            type: string
-          description: The ID of the job to retrieve
-          example: job-a15dad11-8d8e-4007-97c5-a211304de284
+              type: object
+              required:
+                - training_file
+                - model
+              properties:
+                training_file:
+                  type: string
+                  description: File-ID of a training file uploaded to the Together API
+                validation_file:
+                  type: string
+                  description: File-ID of a validation file uploaded to the Together API
+                model:
+                  type: string
+                  description: Name of the base model to run fine-tune job on
+                n_epochs:
+                  type: integer
+                  default: 1
+                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
+                n_checkpoints:
+                  type: integer
+                  default: 1
+                  description: Number of intermediate model versions saved during training for evaluation
+                n_evals:
+                  type: integer
+                  default: 0
+                  description: Number of evaluations to be run on a given validation set during training
+                batch_size:
+                  oneOf:
+                    - type: integer
+                    - type: string
+                      enum:
+                        - max
+                  default: 'max'
+                  description: Number of training examples processed together (larger batches use more memory but may train faster). Defaults to "max". We use training optimizations like packing, so the effective batch size may be different than the value you set.
+                learning_rate:
+                  type: number
+                  format: float
+                  default: 0.00001
+                  description: Controls how quickly the model adapts to new information (too high may cause instability, too low may slow convergence)
+                lr_scheduler:
+                  type: object
+                  default: none
+                  $ref: '#/components/schemas/LRScheduler'
+                  description: The learning rate scheduler to use. It specifies how the learning rate is adjusted during training.
+                warmup_ratio:
+                  type: number
+                  format: float
+                  default: 0.0
+                  description: The percent of steps at the start of training to linearly increase the learning rate.
+                max_grad_norm:
+                  type: number
+                  format: float
+                  default: 1.0
+                  description: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
+                weight_decay:
+                  type: number
+                  format: float
+                  default: 0.0
+                  description: Weight decay. Regularization parameter for the optimizer.
+                suffix:
+                  type: string
+                  description: Suffix that will be added to your fine-tuned model name
+                wandb_api_key:
+                  type: string
+                  description: Integration key for tracking experiments and model metrics on W&B platform
+                wandb_base_url:
+                  type: string
+                  description: The base URL of a dedicated Weights & Biases instance.
+                wandb_project_name:
+                  type: string
+                  description: The Weights & Biases project for your run. If not specified, will use `together` as the project name.
+                wandb_name:
+                  type: string
+                  description: The Weights & Biases name for your run.
+                train_on_inputs:
+                  oneOf:
+                    - type: boolean
+                    - type: string
+                      enum:
+                        - auto
+                  type: boolean
+                  default: auto
+                  description: Whether to mask the user messages in conversational data or prompts in instruction data.
+                  deprecated: true
+                training_method:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/TrainingMethodSFT'
+                    - $ref: '#/components/schemas/TrainingMethodDPO'
+                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
+                training_type:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/FullTrainingType'
+                    - $ref: '#/components/schemas/LoRATrainingType'
+                from_checkpoint:
+                  type: string
+                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
+                from_hf_model:
+                  type: string
+                  description: The Hugging Face Hub repo to start training from. Should be as close as possible to the base model (specified by the `model` argument) in terms of architecture and size.
+                hf_model_revision:
+                  type: string
+                  description: The revision of the Hugging Face Hub model to continue training from. E.g., hf_model_revision=main (default, used if the argument is not provided) or hf_model_revision='607a30d783dfa663caf39e06633721c8d4cfcd7e' (specific commit).
+                hf_api_token:
+                  type: string
+                  description: The API token for the Hugging Face Hub.
+                hf_output_repo_name:
+                  type: string
+                  description: The name of the Hugging Face repository to upload the fine-tuned model to.
       responses:
         '200':
-          description: Job status retrieved successfully
+          description: Fine-tuning job initiated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/JobInfoSuccessResponse'
-
-  /jobs:
+                $ref: '#/components/schemas/FinetuneResponseTruncated'
     get:
-      tags: ['Jobs']
+      tags: ['Fine-tuning']
       summary: List all jobs
-      description: List all jobs and their statuses
-      operationId: listJobs
-      responses:
-        '200':
-          description: Jobs retrieved successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/JobsInfoSuccessResponse'
-
-  /images/generations:
-    post:
-      tags: ['Images']
-      summary: Create image
-      description: Use an image model to generate an image for a given prompt.
+      description: List the metadata for all fine-tuning jobs. Returns a list of FinetuneResponseTruncated objects.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1712,13 +1722,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.images.generate(
-                model="black-forest-labs/FLUX.1-schnell",
-                steps=4,
-                prompt="A cartoon of an astronaut riding a horse on the moon",
-            )
+            response = client.fine_tuning.list()
 
-            print(response.data[0].url)
+            for fine_tune in response.data:
+                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1729,13 +1736,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.images.generate(
-                model="black-forest-labs/FLUX.1-schnell",
-                steps=4,
-                prompt="A cartoon of an astronaut riding a horse on the moon",
-            )
+            response = client.fine_tuning.list()
 
-            print(response.data[0].url)
+            for fine_tune in response.data:
+                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1745,12 +1749,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.images.generate({
-              model: "black-forest-labs/FLUX.1-schnell",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
+            const response = await client.fineTuning.list();
 
-            console.log(response.data[0].url);
+            for (const fineTune of response.data) {
+              console.log(fineTune.id, fineTune.status);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1760,22 +1763,29 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.images.generate({
-              model: "black-forest-labs/FLUX.1-schnell",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
+            const response = await client.fineTuning.list();
 
-            console.log(response.data[0].url);
+            for (const fineTune of response.data) {
+              console.log(fineTune.id, fineTune.status);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/images/generations" \
+            curl "https://api.together.xyz/v1/fine-tunes" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "black-forest-labs/FLUX.1-schnell",
-                   "prompt": "A cartoon of an astronaut riding a horse on the moon"
-                 }'
+                 -H "Content-Type: application/json"
+      responses:
+        '200':
+          description: List of fine-tune jobs
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FinetuneTruncatedList'
+  /fine-tunes/estimate-price:
+    post:
+      tags: ['Fine-tuning']
+      summary: Estimate price
+      description: Estimate the price of a fine-tuning job.
       requestBody:
         required: true
         content:
@@ -1783,103 +1793,74 @@ paths:
             schema:
               type: object
               required:
-                - prompt
-                - model
+                - training_file
               properties:
-                prompt:
+                training_file:
                   type: string
-                  description: A description of the desired images. Maximum length varies by model.
-                  example: cat floating in space, cinematic
-                model:
+                  description: File-ID of a training file uploaded to the Together API
+                validation_file:
                   type: string
-                  description: >
-                    The model to use for image generation.<br>
-                    <br>
-                    [See all of Together AI's image models](https://docs.together.ai/docs/serverless-models#image-models)
-                  example: black-forest-labs/FLUX.1-schnell
-                  anyOf:
-                    - type: string
-                      enum:
-                        - black-forest-labs/FLUX.1-schnell-Free
-                        - black-forest-labs/FLUX.1-schnell
-                        - black-forest-labs/FLUX.1.1-pro
-                    - type: string
-                steps:
-                  type: integer
-                  default: 20
-                  description: Number of generation steps.
-                image_url:
+                  description: File-ID of a validation file uploaded to the Together API
+                model:
                   type: string
-                  description: URL of an image to use for image models that support it.
-                seed:
-                  type: integer
-                  description: Seed used for generation. Can be used to reproduce image generations.
-                n:
+                  description: Name of the base model to run fine-tune job on
+                n_epochs:
                   type: integer
                   default: 1
-                  description: Number of image results to generate.
-                height:
-                  type: integer
-                  default: 1024
-                  description: Height of the image to generate in number of pixels.
-                width:
+                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
+                n_evals:
                   type: integer
-                  default: 1024
-                  description: Width of the image to generate in number of pixels.
-                negative_prompt:
-                  type: string
-                  description: The prompt or prompts not to guide the image generation.
-                response_format:
-                  type: string
-                  description: Format of the image response. Can be either a base64 string or a URL.
-                  enum:
-                    - base64
-                    - url
-                guidance_scale:
-                  type: number
-                  description: Adjusts the alignment of the generated image with the input prompt. Higher values (e.g., 8-10) make the output more faithful to the prompt, while lower values (e.g., 1-5) encourage more creative freedom.
-                  default: 3.5
-                output_format:
+                  default: 0
+                  description: Number of evaluations to be run on a given validation set during training
+                training_method:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/TrainingMethodSFT'
+                    - $ref: '#/components/schemas/TrainingMethodDPO'
+                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
+                training_type:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/FullTrainingType'
+                    - $ref: '#/components/schemas/LoRATrainingType'
+                from_checkpoint:
                   type: string
-                  description: The format of the image response. Can be either be `jpeg` or `png`. Defaults to `jpeg`.
-                  default: jpeg
-                  enum:
-                    - jpeg
-                    - png
-                image_loras:
-                  description: An array of objects that define LoRAs (Low-Rank Adaptations) to influence the generated image.
-                  type: array
-                  items:
-                    type: object
-                    required: [path, scale]
-                    properties:
-                      path:
-                        type: string
-                        description: The URL of the LoRA to apply (e.g. https://huggingface.co/strangerzonehf/Flux-Midjourney-Mix2-LoRA).
-                      scale:
-                        type: number
-                        description: The strength of the LoRA's influence. Most LoRA's recommend a value of 1.
-                reference_images:
-                  description: An array of image URLs that guide the overall appearance and style of the generated image. These reference images influence the visual characteristics consistently across the generation.
-                  type: array
-                  items:
-                    type: string
-                    description: URL of a reference image to guide the image generation.
-                disable_safety_checker:
-                  type: boolean
-                  description: If true, disables the safety checker for image generation.
+                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
       responses:
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
         '200':
-          description: Image generated successfully
+          description: Price estimated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ImageResponse'
-  /files:
+                type: object
+                properties:
+                  estimated_total_price:
+                    type: number
+                    description: The price of the fine-tuning job
+                  allowed_to_proceed:
+                    type: boolean
+                    description: Whether the user is allowed to proceed with the fine-tuning job
+                    example: true
+                  user_limit:
+                    type: number
+                    description: The user's credit limit in dollars
+                  estimated_train_token_count:
+                    type: number
+                    description: The estimated number of tokens to be trained
+                  estimated_eval_token_count:
+                    type: number
+                    description: The estimated number of tokens for evaluation
+  /fine-tunes/{id}:
     get:
-      tags: ['Files']
-      summary: List all files
-      description: List the metadata for all uploaded data files.
+      tags: ['Fine-tuning']
+      summary: List job
+      description: List the metadata for a single fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1892,10 +1873,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.list()
+            fine_tune = client.fine_tuning.retrieve(id="ft-id")
 
-            for file in response.data:
-                print(file.id)
+            print(fine_tune)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1906,10 +1886,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.list()
+            fine_tune = client.fine_tuning.retrieve(id="ft-id")
 
-            for file in response.data:
-                print(file.id)
+            print(fine_tune)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1919,11 +1898,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.list();
+            const fineTune = await client.fineTuning.retrieve("ft-id");
 
-            for (const file of response.data) {
-              console.log(file.id);
-            }
+            console.log(fineTune);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1933,29 +1910,32 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.list();
+            const fineTune = await client.fineTuning.retrieve("ft-id");
 
-            for (const file of response.data) {
-              console.log(file.id);
-            }
+            console.log(fineTune);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/files" \
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
       responses:
         '200':
-          description: List of files
+          description: Fine-tune job details retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileList'
-  /files/{id}:
-    get:
-      tags: ['Files']
-      summary: List file
-      description: List the metadata for a single uploaded data file.
+                $ref: '#/components/schemas/FinetuneResponse'
+    delete:
+      tags: ['Fine-tuning']
+      summary: Delete a fine-tune job
+      description: Delete a fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1968,9 +1948,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve(id="file-id")
+            response = client.fine_tuning.delete(id="ft-id")
 
-            print(file)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1981,9 +1961,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve(id="file-id")
+            response = client.fine_tuning.delete(id="ft-id")
 
-            print(file)
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1993,9 +1973,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const file = await client.files.retrieve("file-id");
+            const response = await client.fineTuning.delete("ft-id");
 
-            console.log(file);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2005,13 +1985,13 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const file = await client.files.retrieve("file-id");
+            const response = await client.fineTuning.delete("ft-id");
 
-            console.log(file);
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/files/ID" \
+            curl -X "DELETE" "https://api.together.xyz/v1/fine-tunes/ft-id?force=false" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
       parameters:
@@ -2020,17 +2000,35 @@ paths:
           required: true
           schema:
             type: string
+        - name: force
+          in: query
+          schema:
+            type: boolean
+            default: false
       responses:
         '200':
-          description: File retrieved successfully
+          description: Fine-tune job deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileResponse'
-    delete:
-      tags: ['Files']
-      summary: Delete a file
-      description: Delete a previously uploaded data file.
+                $ref: '#/components/schemas/FinetuneDeleteResponse'
+        '404':
+          description: Fine-tune job not found
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /fine-tunes/{id}/events:
+    get:
+      tags: ['Fine-tuning']
+      summary: List job events
+      description: List the events for a single fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2043,9 +2041,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.delete(id="file-id")
+            events = client.fine_tuning.list_events(id="ft-id")
 
-            print(response)
+            print(events)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2056,9 +2054,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.delete(id="file-id")
+            response = client.fine_tuning.list_events(id="ft-id")
 
-            print(response)
+            for event in response.data:
+                print(event)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2068,9 +2067,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.delete("file-id");
+            const events = await client.fineTuning.listEvents("ft-id");
 
-            console.log(response);
+            console.log(events);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2080,14 +2079,15 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.delete("file-id");
+            const events = await client.fineTuning.listEvents("ft-id");
 
-            console.log(response);
+            console.log(events);
         - lang: Shell
           label: cURL
           source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/files/file-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY"
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id/events" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
       parameters:
         - name: id
           in: path
@@ -2096,16 +2096,16 @@ paths:
             type: string
       responses:
         '200':
-          description: File deleted successfully
+          description: List of fine-tune events
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileDeleteResponse'
-  /files/{id}/content:
+                $ref: '#/components/schemas/FinetuneListEvents'
+  /fine-tunes/{id}/checkpoints:
     get:
-      tags: ['Files']
-      summary: Get file contents
-      description: Get the contents of a single uploaded data file.
+      tags: ['Fine-tuning']
+      summary: List checkpoints
+      description: List the checkpoints for a single fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2118,9 +2118,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve_content(id="file-id")
+            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
 
-            print(file.filename)
+            print(checkpoints)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2131,9 +2131,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            with client.files.with_streaming_response.content(id="file-id") as response:
-              for line in response.iter_lines():
-                print(line)
+            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
+
+            print(checkpoints)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2143,10 +2143,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.content("file-id");
-            const content = await response.text();
+            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
 
-            console.log(content);
+            console.log(checkpoints);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2156,14 +2155,13 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.content("file-id");
-            const content = await response.text();
+            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
 
-            console.log(content);
+            console.log(checkpoints);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/files/file-id/content" \
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id/checkpoints" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
       parameters:
@@ -2174,22 +2172,16 @@ paths:
             type: string
       responses:
         '200':
-          description: File content retrieved successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FileObject'
-        '500':
-          description: Internal Server Error
+          description: List of fine-tune checkpoints
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /files/upload:
-    post:
-      tags: ['Files']
-      summary: Upload a file
-      description: Upload a file with specified purpose, file name, and file type.
+                $ref: '#/components/schemas/FinetuneListCheckpoints'
+  /finetune/download:
+    get:
+      tags: ['Fine-tuning']
+      summary: Download model
+      description: Receive a compressed fine-tuned model or checkpoint.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2202,11 +2194,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            file_path = os.path.join(current_dir, "data.jsonl")
-            file = client.files.upload(file=file_path)
+            # This will download the content to a location on disk
+            response = client.fine_tuning.download(id="ft-id")
 
-            print(file.id)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2217,98 +2208,86 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            file_path = os.path.join(current_dir, "data.jsonl")
-            file = client.files.upload(file=file_path)
+            # Using `with_streaming_response` gives you control to do what you want with the response.
+            stream = client.fine_tuning.with_streaming_response.content(ft_id="ft-id")
+
+            with stream as response:
+                for line in response.iter_lines():
+                    print(line)
 
-            print(file.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
-            import { upload } from "together-ai/lib/upload"
-            import path from "path";
-            import { fileURLToPath } from "url";
+            import Together from "together-ai";
 
-            const __filename = fileURLToPath(import.meta.url);
-            const __dirname = path.dirname(__filename);
-            const filepath = path.join(__dirname, "data.jsonl");
-            const file = await upload(filepath);
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-            console.log(file.id);
+            const response = await client.fineTuning.content({
+              ft_id: "ft-id",
+            });
+
+            console.log(await response.blob());
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
-            import { upload } from "together-ai/lib/upload"
-            import path from "path";
-            import { fileURLToPath } from "url";
-
-            const __filename = fileURLToPath(import.meta.url);
-            const __dirname = path.dirname(__filename);
-            const filepath = path.join(__dirname, "data.jsonl");
-            const file = await upload(filepath);
+            import Together from "together-ai";
 
-            console.log(file.id);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/files/upload" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@/path/to/data.jsonl" \
-                 -F "file_name=data.jsonl" \
-                 -F "purpose=fine-tune"
-      requestBody:
-        required: true
-        content:
-          multipart/form-data:
-            schema:
-              type: object
-              required:
-                - purpose
-                - file_name
-                - file
-              properties:
-                purpose:
-                  $ref: '#/components/schemas/FilePurpose'
-                file_name:
-                  type: string
-                  description: The name of the file being uploaded
-                  example: 'dataset.csv'
-                file_type:
-                  $ref: '#/components/schemas/FileType'
-                file:
-                  type: string
-                  format: binary
-                  description: The content of the file being uploaded
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.fineTuning.content({
+              ft_id: "ft-id",
+            });
+
+            console.log(await response.blob());
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/finetune/download?ft_id=ft-id&checkpoint=merged"
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      parameters:
+        - in: query
+          name: ft_id
+          schema:
+            type: string
+          required: true
+          description: Fine-tune ID to download. A string that starts with `ft-`.
+        - in: query
+          name: checkpoint_step
+          schema:
+            type: integer
+          required: false
+          description: Specifies step number for checkpoint to download. Ignores `checkpoint` value if set.
+        - in: query
+          name: checkpoint
+          schema:
+            type: string
+            enum:
+              - merged
+              - adapter
+              - model_output_path
+          description: Specifies checkpoint type to download - `merged` vs `adapter`. This field is required if the checkpoint_step is not set.
       responses:
         '200':
-          description: File uploaded successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FileResponse'
-        '500':
-          description: Internal Server Error
+          description: Successfully downloaded the fine-tuned model or checkpoint.
           content:
-            application/json:
+            application/octet-stream:
               schema:
-                $ref: '#/components/schemas/ErrorData'
+                type: string
+                format: binary
         '400':
-          description: Bad Request
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: Unauthorized
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-  /fine-tunes:
+          description: Invalid request parameters.
+        '404':
+          description: Fine-tune ID not found.
+  /fine-tunes/{id}/cancel:
     post:
       tags: ['Fine-tuning']
-      summary: Create job
-      description: Create a fine-tuning job with the provided model and training data.
+      summary: Cancel job
+      description: Cancel a currently running fine-tuning job. Returns a FinetuneResponseTruncated object.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2321,10 +2300,7 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                training_file="file-id"
-            )
+            response = client.fine_tuning.cancel(id="ft-id")
 
             print(response)
         - lang: Python
@@ -2337,10 +2313,7 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                training_file="file-id"
-            )
+            response = client.fine_tuning.cancel(id="ft-id")
 
             print(response)
         - lang: TypeScript
@@ -2352,10 +2325,7 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-              training_file: "file-id",
-            });
+            const response = await client.fineTuning.cancel("ft-id");
 
             console.log(response);
         - lang: JavaScript
@@ -2367,148 +2337,38 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-              training_file: "file-id",
-            });
+            const response = await client.fineTuning.cancel("ft-id");
 
             console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/fine-tunes" \
+            curl -X POST "https://api.together.xyz/v1/fine-tunes/ft-id/cancel" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                   "training_file": "file-id"
-                 }'
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              required:
-                - training_file
-                - model
-              properties:
-                training_file:
-                  type: string
-                  description: File-ID of a training file uploaded to the Together API
-                validation_file:
-                  type: string
-                  description: File-ID of a validation file uploaded to the Together API
-                model:
-                  type: string
-                  description: Name of the base model to run fine-tune job on
-                n_epochs:
-                  type: integer
-                  default: 1
-                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
-                n_checkpoints:
-                  type: integer
-                  default: 1
-                  description: Number of intermediate model versions saved during training for evaluation
-                n_evals:
-                  type: integer
-                  default: 0
-                  description: Number of evaluations to be run on a given validation set during training
-                batch_size:
-                  oneOf:
-                    - type: integer
-                    - type: string
-                      enum:
-                        - max
-                  default: 'max'
-                  description: Number of training examples processed together (larger batches use more memory but may train faster). Defaults to "max". We use training optimizations like packing, so the effective batch size may be different than the value you set.
-                learning_rate:
-                  type: number
-                  format: float
-                  default: 0.00001
-                  description: Controls how quickly the model adapts to new information (too high may cause instability, too low may slow convergence)
-                lr_scheduler:
-                  type: object
-                  default: none
-                  $ref: '#/components/schemas/LRScheduler'
-                  description: The learning rate scheduler to use. It specifies how the learning rate is adjusted during training.
-                warmup_ratio:
-                  type: number
-                  format: float
-                  default: 0.0
-                  description: The percent of steps at the start of training to linearly increase the learning rate.
-                max_grad_norm:
-                  type: number
-                  format: float
-                  default: 1.0
-                  description: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
-                weight_decay:
-                  type: number
-                  format: float
-                  default: 0.0
-                  description: Weight decay. Regularization parameter for the optimizer.
-                suffix:
-                  type: string
-                  description: Suffix that will be added to your fine-tuned model name
-                wandb_api_key:
-                  type: string
-                  description: Integration key for tracking experiments and model metrics on W&B platform
-                wandb_base_url:
-                  type: string
-                  description: The base URL of a dedicated Weights & Biases instance.
-                wandb_project_name:
-                  type: string
-                  description: The Weights & Biases project for your run. If not specified, will use `together` as the project name.
-                wandb_name:
-                  type: string
-                  description: The Weights & Biases name for your run.
-                train_on_inputs:
-                  oneOf:
-                    - type: boolean
-                    - type: string
-                      enum:
-                        - auto
-                  type: boolean
-                  default: auto
-                  description: Whether to mask the user messages in conversational data or prompts in instruction data.
-                  deprecated: true
-                training_method:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/TrainingMethodSFT'
-                    - $ref: '#/components/schemas/TrainingMethodDPO'
-                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
-                training_type:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/FullTrainingType'
-                    - $ref: '#/components/schemas/LoRATrainingType'
-                from_checkpoint:
-                  type: string
-                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
-                from_hf_model:
-                  type: string
-                  description: The Hugging Face Hub repo to start training from. Should be as close as possible to the base model (specified by the `model` argument) in terms of architecture and size.
-                hf_model_revision:
-                  type: string
-                  description: The revision of the Hugging Face Hub model to continue training from. E.g., hf_model_revision=main (default, used if the argument is not provided) or hf_model_revision='607a30d783dfa663caf39e06633721c8d4cfcd7e' (specific commit).
-                hf_api_token:
-                  type: string
-                  description: The API token for the Hugging Face Hub.
-                hf_output_repo_name:
-                  type: string
-                  description: The name of the Hugging Face repository to upload the fine-tuned model to.
+                 -H "Content-Type: application/json"
+      parameters:
+        - in: path
+          name: id
+          schema:
+            type: string
+          required: true
+          description: Fine-tune ID to cancel. A string that starts with `ft-`.
       responses:
         '200':
-          description: Fine-tuning job initiated successfully
+          description: Successfully cancelled the fine-tuning job.
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/FinetuneResponseTruncated'
-    get:
-      tags: ['Fine-tuning']
-      summary: List all jobs
-      description: List the metadata for all fine-tuning jobs. Returns a list of FinetuneResponseTruncated objects.
+        '400':
+          description: Invalid request parameters.
+        '404':
+          description: Fine-tune ID not found.
+  /rerank:
+    post:
+      tags: ['Rerank']
+      summary: Create a rerank request
+      description: Query a reranker model
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2521,10 +2381,35 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.list()
-
-            for fine_tune in response.data:
-                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
+            documents = [
+                {
+                    "title": "Llama",
+                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                },
+                {
+                    "title": "Panda",
+                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                },
+                {
+                    "title": "Guanaco",
+                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                },
+                {
+                    "title": "Wild Bactrian camel",
+                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                }
+            ]
+
+            response = client.rerank.create(
+                model="Salesforce/Llama-Rank-v1",
+                query="What animals can I find near Peru?",
+                documents=documents,
+            )
+
+            for result in response.results:
+                print(f"Rank: {result.index + 1}")
+                print(f"Title: {documents[result.index]['title']}")
+                print(f"Text: {documents[result.index]['text']}")
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2535,10 +2420,35 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.list()
+            documents = [
+                {
+                    "title": "Llama",
+                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                },
+                {
+                    "title": "Panda",
+                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                },
+                {
+                    "title": "Guanaco",
+                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                },
+                {
+                    "title": "Wild Bactrian camel",
+                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                }
+            ]
 
-            for fine_tune in response.data:
-                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
+            response = client.rerank.create(
+                model="Salesforce/Llama-Rank-v1",
+                query="What animals can I find near Peru?",
+                documents=documents,
+            )
+
+            for result in response.results:
+                print(f"Rank: {result.index + 1}")
+                print(f"Title: {documents[result.index]['title']}")
+                print(f"Text: {documents[result.index]['text']}")
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2548,10 +2458,33 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.list();
+            const documents = [{
+              "title": "Llama",
+              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+            },
+            {
+              "title": "Panda",
+              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+            },
+            {
+              "title": "Guanaco",
+              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+            },
+            {
+              "title": "Wild Bactrian camel",
+              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+            }];
 
-            for (const fineTune of response.data) {
-              console.log(fineTune.id, fineTune.status);
+            const response = await client.rerank.create({
+              model: "Salesforce/Llama-Rank-v1",
+              query: "What animals can I find near Peru?",
+              documents,
+            });
+
+            for (const result of response.results) {
+              console.log(`Rank: ${result.index + 1}`);
+              console.log(`Title: ${documents[result.index].title}`);
+              console.log(`Text: ${documents[result.index].text}`);
             }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
@@ -2562,104 +2495,115 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.list();
+            const documents = [{
+              "title": "Llama",
+              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+            },
+            {
+              "title": "Panda",
+              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+            },
+            {
+              "title": "Guanaco",
+              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+            },
+            {
+              "title": "Wild Bactrian camel",
+              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+            }];
 
-            for (const fineTune of response.data) {
-              console.log(fineTune.id, fineTune.status);
+            const response = await client.rerank.create({
+              model: "Salesforce/Llama-Rank-v1",
+              query: "What animals can I find near Peru?",
+              documents,
+            });
+
+            for (const result of response.results) {
+              console.log(`Rank: ${result.index + 1}`);
+              console.log(`Title: ${documents[result.index].title}`);
+              console.log(`Text: ${documents[result.index].text}`);
             }
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes" \
+            curl -X POST "https://api.together.xyz/v1/rerank" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      responses:
-        '200':
-          description: List of fine-tune jobs
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuneTruncatedList'
-  /fine-tunes/estimate-price:
-    post:
-      tags: ['Fine-tuning']
-      summary: Estimate price
-      description: Estimate the price of a fine-tuning job.
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "Salesforce/Llama-Rank-v1",
+                   "query": "What animals can I find near Peru?",
+                   "documents": [{
+                      "title": "Llama",
+                      "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                    },
+                    {
+                      "title": "Panda",
+                      "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                    },
+                    {
+                      "title": "Guanaco",
+                      "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                    },
+                    {
+                      "title": "Wild Bactrian camel",
+                      "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                    }]
+                 }'
+      operationId: rerank
       requestBody:
-        required: true
         content:
           application/json:
             schema:
-              type: object
-              required:
-                - training_file
-              properties:
-                training_file:
-                  type: string
-                  description: File-ID of a training file uploaded to the Together API
-                validation_file:
-                  type: string
-                  description: File-ID of a validation file uploaded to the Together API
-                model:
-                  type: string
-                  description: Name of the base model to run fine-tune job on
-                n_epochs:
-                  type: integer
-                  default: 1
-                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
-                n_evals:
-                  type: integer
-                  default: 0
-                  description: Number of evaluations to be run on a given validation set during training
-                training_method:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/TrainingMethodSFT'
-                    - $ref: '#/components/schemas/TrainingMethodDPO'
-                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
-                training_type:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/FullTrainingType'
-                    - $ref: '#/components/schemas/LoRATrainingType'
-                from_checkpoint:
-                  type: string
-                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
+              $ref: '#/components/schemas/RerankRequest'
       responses:
-        '500':
-          description: Internal Server Error
+        '200':
+          description: '200'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RerankResponse'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '200':
-          description: Price estimated successfully
+        '401':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                type: object
-                properties:
-                  estimated_total_price:
-                    type: number
-                    description: The price of the fine-tuning job
-                  allowed_to_proceed:
-                    type: boolean
-                    description: Whether the user is allowed to proceed with the fine-tuning job
-                    example: true
-                  user_limit:
-                    type: number
-                    description: The user's credit limit in dollars
-                  estimated_train_token_count:
-                    type: number
-                    description: The estimated number of tokens to be trained
-                  estimated_eval_token_count:
-                    type: number
-                    description: The estimated number of tokens for evaluation
-  /fine-tunes/{id}:
-    get:
-      tags: ['Fine-tuning']
-      summary: List job
-      description: List the metadata for a single fine-tuning job.
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /audio/speech:
+    post:
+      tags: ['Audio']
+      summary: Create audio generation request
+      description: Generate audio from input text
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2672,9 +2616,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            fine_tune = client.fine_tuning.retrieve(id="ft-id")
+            response = client.audio.speech.create(
+                model="cartesia/sonic-2",
+                input="The quick brown fox jumps over the lazy dog.",
+                voice="laidback woman",
+            )
 
-            print(fine_tune)
+            response.stream_to_file("audio.wav")
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2685,226 +2633,545 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            fine_tune = client.fine_tuning.retrieve(id="ft-id")
+            response = client.audio.speech.with_streaming_response.create(
+                model="cartesia/sonic-2",
+                input="The quick brown fox jumps over the lazy dog.",
+                voice="laidback woman",
+            )
 
-            print(fine_tune)
+            with response as stream:
+              stream.stream_to_file("audio.wav")
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { createWriteStream } from "fs";
+            import { join } from "path";
+            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const fineTune = await client.fineTuning.retrieve("ft-id");
+            const response = await client.audio.speech.create({
+              model: "cartesia/sonic-2",
+              input: "The quick brown fox jumps over the lazy dog.",
+              voice: "laidback woman",
+            });
 
-            console.log(fineTune);
+            const filepath = join(process.cwd(), "audio.wav");
+            const writeStream = createWriteStream(filepath);
+
+            if (response.body) {
+              await pipeline(response.body, writeStream);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
+            import { createWriteStream } from "fs";
+            import { join } from "path";
+            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const fineTune = await client.fineTuning.retrieve("ft-id");
+            const response = await client.audio.speech.create({
+              model: "cartesia/sonic-2",
+              input: "The quick brown fox jumps over the lazy dog.",
+              voice: "laidback woman",
+            });
 
-            console.log(fineTune);
+            const filepath = join(process.cwd(), "audio.wav");
+            const writeStream = createWriteStream(filepath);
+
+            if (response.body) {
+              await pipeline(response.body, writeStream);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id" \
+            curl -X POST "https://api.together.xyz/v1/audio/speech" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "cartesia/sonic-2",
+                   "input": "The quick brown fox jumps over the lazy dog.",
+                   "voice": "laidback woman"
+                 }' \
+                 --output audio.wav
+      operationId: audio-speech
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/AudioSpeechRequest'
       responses:
         '200':
-          description: Fine-tune job details retrieved successfully
+          description: 'OK'
+          content:
+            application/octet-stream:
+              schema:
+                type: string
+                format: binary
+            audio/wav:
+              schema:
+                type: string
+                format: binary
+            audio/mpeg:
+              schema:
+                type: string
+                format: binary
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/AudioSpeechStreamResponse'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneResponse'
-    delete:
-      tags: ['Fine-tuning']
-      summary: Delete a fine-tune job
-      description: Delete a fine-tuning job.
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /audio/speech/websocket:
+    get:
+      tags: ['Audio']
+      summary: Real-time text-to-speech via WebSocket
+      description: |
+        Establishes a WebSocket connection for real-time text-to-speech generation. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/audio/speech/websocket) for bidirectional streaming communication.
+
+        **Connection Setup:**
+        - Protocol: WebSocket (wss://)
+        - Authentication: Pass API key as Bearer token in Authorization header
+        - Parameters: Sent as query parameters (model, voice, max_partial_length)
+
+        **Client Events:**
+        - `tts_session.updated`: Update session parameters like voice
+          ```json
+          {
+            "type": "tts_session.updated",
+            "session": {
+              "voice": "tara"
+            }
+          }
+          ```
+        - `input_text_buffer.append`: Send text chunks for TTS generation
+          ```json
+          {
+            "type": "input_text_buffer.append",
+            "text": "Hello, this is a test."
+          }
+          ```
+        - `input_text_buffer.clear`: Clear the buffered text
+          ```json
+          {
+            "type": "input_text_buffer.clear"
+          }
+          ```
+        - `input_text_buffer.commit`: Signal end of text input and process remaining text
+          ```json
+          {
+            "type": "input_text_buffer.commit"
+          }
+          ```
+
+        **Server Events:**
+        - `session.created`: Initial session confirmation (sent first)
+          ```json
+          {
+            "event_id": "evt_123456",
+            "type": "session.created",
+            "session": {
+              "id": "session-id",
+              "object": "realtime.tts.session",
+              "modalities": ["text", "audio"],
+              "model": "hexgrad/Kokoro-82M",
+              "voice": "tara"
+            }
+          }
+          ```
+        - `conversation.item.input_text.received`: Acknowledgment that text was received
+          ```json
+          {
+            "type": "conversation.item.input_text.received",
+            "text": "Hello, this is a test."
+          }
+          ```
+        - `conversation.item.audio_output.delta`: Audio chunks as base64-encoded data
+          ```json
+          {
+            "type": "conversation.item.audio_output.delta",
+            "item_id": "tts_1",
+            "delta": "<base64_encoded_audio_chunk>"
+          }
+          ```
+        - `conversation.item.audio_output.done`: Audio generation complete for an item
+          ```json
+          {
+            "type": "conversation.item.audio_output.done",
+            "item_id": "tts_1"
+          }
+          ```
+        - `conversation.item.tts.failed`: Error occurred
+          ```json
+          {
+            "type": "conversation.item.tts.failed",
+            "error": {
+              "message": "Error description",
+              "type": "invalid_request_error",
+              "param": null,
+              "code": "invalid_api_key"
+            }
+          }
+          ```
+
+        **Text Processing:**
+        - Partial text (no sentence ending) is held in buffer until:
+          - We believe that the text is complete enough to be processed for TTS generation
+          - The partial text exceeds `max_partial_length` characters (default: 250)
+          - The `input_text_buffer.commit` event is received
+
+        **Audio Format:**
+        - Format: WAV (PCM s16le)
+        - Sample Rate: 24000 Hz
+        - Encoding: Base64
+        - Delivered via `conversation.item.audio_output.delta` events
+
+        **Error Codes:**
+        - `invalid_api_key`: Invalid API key provided (401)
+        - `missing_api_key`: Authorization header missing (401)
+        - `model_not_available`: Invalid or unavailable model (400)
+        - Invalid text format errors (400)
+
+      operationId: realtime-tts
       x-codeSamples:
         - lang: Python
-          label: Together AI SDK (v1)
+          label: Python WebSocket Client
           source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
+            import asyncio
+            import websockets
+            import json
+            import base64
             import os
 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
+            async def generate_speech():
+                api_key = os.environ.get("TOGETHER_API_KEY")
+                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara"
+
+                headers = {
+                    "Authorization": f"Bearer {api_key}"
+                }
+
+                async with websockets.connect(url, additional_headers=headers) as ws:
+                    # Wait for session created
+                    session_msg = await ws.recv()
+                    session_data = json.loads(session_msg)
+                    print(f"Session created: {session_data['session']['id']}")
+
+                    # Send text for TTS
+                    text_chunks = [
+                        "Hello, this is a test.",
+                        "This is the second sentence.",
+                        "And this is the final one."
+                    ]
+
+                    async def send_text():
+                        for chunk in text_chunks:
+                            await ws.send(json.dumps({
+                                "type": "input_text_buffer.append",
+                                "text": chunk
+                            }))
+                            await asyncio.sleep(0.5)  # Simulate typing
+
+                        # Commit to process any remaining text
+                        await ws.send(json.dumps({
+                            "type": "input_text_buffer.commit"
+                        }))
+
+                    async def receive_audio():
+                        audio_data = bytearray()
+                        async for message in ws:
+                            data = json.loads(message)
+
+                            if data["type"] == "conversation.item.input_text.received":
+                                print(f"Text received: {data['text']}")
+                            elif data["type"] == "conversation.item.audio_output.delta":
+                                # Decode base64 audio chunk
+                                audio_chunk = base64.b64decode(data['delta'])
+                                audio_data.extend(audio_chunk)
+                                print(f"Received audio chunk for item {data['item_id']}")
+                            elif data["type"] == "conversation.item.audio_output.done":
+                                print(f"Audio generation complete for item {data['item_id']}")
+                            elif data["type"] == "conversation.item.tts.failed":
+                                error = data.get("error", {})
+                                print(f"Error: {error.get('message')}")
+                                break
+
+                        # Save the audio to a file
+                        with open("output.wav", "wb") as f:
+                            f.write(audio_data)
+                        print("Audio saved to output.wav")
 
-            response = client.fine_tuning.delete(id="ft-id")
+                    # Run send and receive concurrently
+                    await asyncio.gather(send_text(), receive_audio())
 
-            print(response)
-        - lang: Python
-          label: Together AI SDK (v2)
+            asyncio.run(generate_speech())
+        - lang: JavaScript
+          label: Node.js WebSocket Client
           source: |
-            from together import Together
-            import os
+            import WebSocket from 'ws';
+            import fs from 'fs';
 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
+            const apiKey = process.env.TOGETHER_API_KEY;
+            const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara';
 
-            response = client.fine_tuning.delete(id="ft-id")
+            const ws = new WebSocket(url, {
+              headers: {
+                'Authorization': `Bearer ${apiKey}`
+              }
+            });
 
-            print(response)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
+            const audioData = [];
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
+            ws.on('open', () => {
+              console.log('WebSocket connection established!');
             });
 
-            const response = await client.fineTuning.delete("ft-id");
+            ws.on('message', (data) => {
+              const message = JSON.parse(data.toString());
 
-            console.log(response);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
+              if (message.type === 'session.created') {
+                console.log(`Session created: ${message.session.id}`);
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
+                // Send text chunks
+                const textChunks = [
+                  "Hello, this is a test.",
+                  "This is the second sentence.",
+                  "And this is the final one."
+                ];
+
+                textChunks.forEach((text, index) => {
+                  setTimeout(() => {
+                    ws.send(JSON.stringify({
+                      type: 'input_text_buffer.append',
+                      text: text
+                    }));
+                  }, index * 500);
+                });
+
+                // Commit after all chunks
+                setTimeout(() => {
+                  ws.send(JSON.stringify({
+                    type: 'input_text_buffer.commit'
+                  }));
+                }, textChunks.length * 500 + 100);
+
+              } else if (message.type === 'conversation.item.input_text.received') {
+                console.log(`Text received: ${message.text}`);
+              } else if (message.type === 'conversation.item.audio_output.delta') {
+                // Decode base64 audio chunk
+                const audioChunk = Buffer.from(message.delta, 'base64');
+                audioData.push(audioChunk);
+                console.log(`Received audio chunk for item ${message.item_id}`);
+              } else if (message.type === 'conversation.item.audio_output.done') {
+                console.log(`Audio generation complete for item ${message.item_id}`);
+              } else if (message.type === 'conversation.item.tts.failed') {
+                const errorMessage = message.error?.message ?? 'Unknown error';
+                console.error(`Error: ${errorMessage}`);
+                ws.close();
+              }
             });
 
-            const response = await client.fineTuning.delete("ft-id");
+            ws.on('close', () => {
+              // Save the audio to a file
+              if (audioData.length > 0) {
+                const completeAudio = Buffer.concat(audioData);
+                fs.writeFileSync('output.wav', completeAudio);
+                console.log('Audio saved to output.wav');
+              }
+            });
 
-            console.log(response);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/fine-tunes/ft-id?force=false" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
+            ws.on('error', (error) => {
+              console.error('WebSocket error:', error);
+            });
       parameters:
-        - name: id
-          in: path
-          required: true
+        - in: query
+          name: model
+          required: false
           schema:
             type: string
-        - name: force
-          in: query
+            enum:
+              - hexgrad/Kokoro-82M
+              - cartesia/sonic-english
+            default: hexgrad/Kokoro-82M
+          description: The TTS model to use for speech generation. Can also be set via `tts_session.updated` event.
+        - in: query
+          name: voice
+          required: false
           schema:
-            type: boolean
-            default: false
+            type: string
+            default: tara
+          description: |
+            The voice to use for speech generation. Default is 'tara'.
+            Available voices vary by model. Can also be updated via `tts_session.updated` event.
+        - in: query
+          name: max_partial_length
+          required: false
+          schema:
+            type: integer
+            default: 250
+          description: |
+            Maximum number of characters in partial text before forcing TTS generation
+            even without a sentence ending. Helps reduce latency for long text without punctuation.
       responses:
-        '200':
-          description: Fine-tune job deleted successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuneDeleteResponse'
-        '404':
-          description: Fine-tune job not found
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: Internal server error
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-  /fine-tunes/{id}/events:
-    get:
-      tags: ['Fine-tuning']
-      summary: List job events
-      description: List the events for a single fine-tuning job.
+        '101':
+          description: |
+            Switching Protocols - WebSocket connection established successfully.
+
+            Error message format:
+            ```json
+            {
+              "type": "conversation.item.tts.failed",
+              "error": {
+                "message": "Error description",
+                "type": "invalid_request_error",
+                "param": null,
+                "code": "error_code"
+              }
+            }
+            ```
+  /audio/transcriptions:
+    post:
+      tags: ['Audio']
+      summary: Create audio transcription request
+      description: Transcribes audio into text
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
           source: |
             # Docs for v2 can be found by changing the above selector ^
             from together import Together
-            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            events = client.fine_tuning.list_events(id="ft-id")
+            file = open("audio.wav", "rb")
 
-            print(events)
+            response = client.audio.transcriptions.create(
+                model="openai/whisper-large-v3",
+                file=file,
+            )
+
+            print(response.text)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
             from together import Together
-            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.list_events(id="ft-id")
+            file = open("audio.wav", "rb")
 
-            for event in response.data:
-                print(event)
+            response = client.audio.transcriptions.create(
+                model="openai/whisper-large-v3",
+                file=file,
+            )
+
+            print(response.text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const events = await client.fineTuning.listEvents("ft-id");
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            console.log(events);
+            const response = await client.audio.transcriptions.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+            });
+
+            console.log(response.text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const events = await client.fineTuning.listEvents("ft-id");
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            console.log(events);
+            const response = await client.audio.transcriptions.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+            });
+
+            console.log(response.text);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id/events" \
+            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+                 -F "file=@audio.wav" \
+                 -F "model=openai/whisper-large-v3"
+      operationId: audio-transcriptions
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/AudioTranscriptionRequest'
       responses:
         '200':
-          description: List of fine-tune events
+          description: 'OK'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneListEvents'
-  /fine-tunes/{id}/checkpoints:
-    get:
-      tags: ['Fine-tuning']
-      summary: List checkpoints
-      description: List the checkpoints for a single fine-tuning job.
+                $ref: '#/components/schemas/AudioTranscriptionResponse'
+        '400':
+          description: 'BadRequest'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /audio/translations:
+    post:
+      tags: ['Audio']
+      summary: Create audio translation request
+      description: Translates audio into English
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2917,9 +3184,15 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
+            file = open("audio.wav", "rb")
 
-            print(checkpoints)
+            response = client.audio.translations.create(
+                model="openai/whisper-large-v3",
+                file=file,
+                language="es",
+            )
+
+            print(response.text)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2930,73 +3203,113 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
+            file = open("audio.wav", "rb")
 
-            print(checkpoints)
+            response = client.audio.translations.create(
+                model="openai/whisper-large-v3",
+                file=file,
+                language="es",
+            )
+
+            print(response.text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            console.log(checkpoints);
+            const response = await client.audio.translations.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+              language: "es"
+            });
+
+            console.log(response.text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            console.log(checkpoints);
+            const response = await client.audio.translations.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+              language: "es"
+            });
+
+            console.log(response.text);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id/checkpoints" \
+            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+                 -F "file=@audio.wav" \
+                 -F "model=openai/whisper-large-v3" \
+                 -F "language=es"
+      operationId: audio-translations
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/AudioTranslationRequest'
       responses:
         '200':
-          description: List of fine-tune checkpoints
+          description: 'OK'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneListCheckpoints'
-  /finetune/download:
+                $ref: '#/components/schemas/AudioTranslationResponse'
+        '400':
+          description: 'BadRequest'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /clusters/availability-zones:
     get:
-      tags: ['Fine-tuning']
-      summary: Download model
-      description: Receive a compressed fine-tuned model or checkpoint.
+      tags: ['endpoints']
+      summary: List all available availability zones.
+      description: List all available availability zones.
+      operationId: availabilityZones
+      responses:
+        '200':
+          description: Success
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListAvailibilityZonesResponse'
       x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            # This will download the content to a location on disk
-            response = client.fine_tuning.download(id="ft-id")
-
-            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3007,13 +3320,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            # Using `with_streaming_response` gives you control to do what you want with the response.
-            stream = client.fine_tuning.with_streaming_response.content(ft_id="ft-id")
-
-            with stream as response:
-                for line in response.iter_lines():
-                    print(line)
+            response = client.endpoints.list_avzones()
 
+            print(response.avzones)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3023,11 +3332,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.content({
-              ft_id: "ft-id",
-            });
+            const response = await client.endpoints.listAvzones();
 
-            console.log(await response.blob());
+            console.log(response.avzones);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3037,56 +3344,20 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.content({
-              ft_id: "ft-id",
-            });
+            const response = await client.endpoints.listAvzones();
 
-            console.log(await response.blob());
+            console.log(response.avzones);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/finetune/download?ft_id=ft-id&checkpoint=merged"
+            curl "https://api.together.xyz/v1/clusters/availability-zones" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-      parameters:
-        - in: query
-          name: ft_id
-          schema:
-            type: string
-          required: true
-          description: Fine-tune ID to download. A string that starts with `ft-`.
-        - in: query
-          name: checkpoint_step
-          schema:
-            type: integer
-          required: false
-          description: Specifies step number for checkpoint to download. Ignores `checkpoint` value if set.
-        - in: query
-          name: checkpoint
-          schema:
-            type: string
-            enum:
-              - merged
-              - adapter
-              - model_output_path
-          description: Specifies checkpoint type to download - `merged` vs `adapter`. This field is required if the checkpoint_step is not set.
-      responses:
-        '200':
-          description: Successfully downloaded the fine-tuned model or checkpoint.
-          content:
-            application/octet-stream:
-              schema:
-                type: string
-                format: binary
-        '400':
-          description: Invalid request parameters.
-        '404':
-          description: Fine-tune ID not found.
-  /fine-tunes/{id}/cancel:
-    post:
-      tags: ['Fine-tuning']
-      summary: Cancel job
-      description: Cancel a currently running fine-tuning job. Returns a FinetuneResponseTruncated object.
+  /endpoints:
+    get:
+      tags: ['Endpoints']
+      summary: List all endpoints, can be filtered by type
+      description: Returns a list of all endpoints associated with your account. You can filter the results by type (dedicated or serverless).
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3099,9 +3370,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.cancel(id="ft-id")
+            endpoints = client.endpoints.list()
 
-            print(response)
+            for endpoint in endpoints:
+                print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3112,9 +3384,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.cancel(id="ft-id")
+            response = client.endpoints.list()
 
-            print(response)
+            for endpoint in response.data:
+                print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3124,9 +3397,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.cancel("ft-id");
+            const endpoints = await client.endpoints.list();
 
-            console.log(response);
+            for (const endpoint of endpoints.data) {
+              console.log(endpoint);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3136,38 +3411,91 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.cancel("ft-id");
+            const endpoints = await client.endpoints.list();
 
-            console.log(response);
+            for (const endpoint of endpoints.data) {
+              console.log(endpoint);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/fine-tunes/ft-id/cancel" \
+            curl "https://api.together.xyz/v1/endpoints" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
+      operationId: listEndpoints
       parameters:
-        - in: path
-          name: id
+        - name: type
+          in: query
+          required: false
+          schema:
+            type: string
+            enum:
+              - dedicated
+              - serverless
+          description: Filter endpoints by type
+          example: dedicated
+        - name: usage_type
+          in: query
+          required: false
           schema:
             type: string
-          required: true
-          description: Fine-tune ID to cancel. A string that starts with `ft-`.
+            enum:
+              - on-demand
+              - reserved
+          description: Filter endpoints by usage type
+          example: on-demand
+        - name: mine
+          in: query
+          required: false
+          schema:
+            type: boolean
+          description: If true, return only endpoints owned by the caller
       responses:
         '200':
-          description: Successfully cancelled the fine-tuning job.
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneResponseTruncated'
-        '400':
-          description: Invalid request parameters.
-        '404':
-          description: Fine-tune ID not found.
-  /rerank:
+                type: object
+                required:
+                  - object
+                  - data
+                properties:
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  data:
+                    type: array
+                    items:
+                      $ref: '#/components/schemas/ListEndpoint'
+                example:
+                  object: 'list'
+                  data:
+                    - object: 'endpoint'
+                      id: 'endpoint-5c0c20db-62fe-4f41-8ffc-d9e4ea1a264e'
+                      name: 'allenai/OLMo-7B'
+                      model: 'allenai/OLMo-7B'
+                      type: 'serverless'
+                      owner: 'together'
+                      state: 'STARTED'
+                      created_at: '2024-02-28T21:34:35.444Z'
+        '403':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
     post:
-      tags: ['Rerank']
-      summary: Create a rerank request
-      description: Query a reranker model
+      tags: ['Endpoints']
+      summary: Create a dedicated endpoint, it will start automatically
+      description: Creates a new dedicated endpoint for serving models. The endpoint will automatically start after creation. You can deploy any supported model on hardware configurations that meet the model's requirements.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3180,35 +3508,14 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            documents = [
-                {
-                    "title": "Llama",
-                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                },
-                {
-                    "title": "Panda",
-                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                },
-                {
-                    "title": "Guanaco",
-                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                },
-                {
-                    "title": "Wild Bactrian camel",
-                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                }
-            ]
-
-            response = client.rerank.create(
-                model="Salesforce/Llama-Rank-v1",
-                query="What animals can I find near Peru?",
-                documents=documents,
+            endpoint = client.endpoints.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                hardware="1x_nvidia_a100_80gb_sxm",
+                min_replicas=2,
+                max_replicas=5,
             )
 
-            for result in response.results:
-                print(f"Rank: {result.index + 1}")
-                print(f"Title: {documents[result.index]['title']}")
-                print(f"Text: {documents[result.index]['text']}")
+            print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3219,35 +3526,16 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            documents = [
-                {
-                    "title": "Llama",
-                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                },
-                {
-                    "title": "Panda",
-                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                },
-                {
-                    "title": "Guanaco",
-                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                },
-                {
-                    "title": "Wild Bactrian camel",
-                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+            endpoint = client.endpoints.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                hardware="1x_nvidia_a100_80gb_sxm",
+                autoscaling={
+                  "min_replicas": 2,
+                  "max_replicas": 5,
                 }
-            ]
-
-            response = client.rerank.create(
-                model="Salesforce/Llama-Rank-v1",
-                query="What animals can I find near Peru?",
-                documents=documents,
             )
 
-            for result in response.results:
-                print(f"Rank: {result.index + 1}")
-                print(f"Title: {documents[result.index]['title']}")
-                print(f"Text: {documents[result.index]['text']}")
+            print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3257,34 +3545,16 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const documents = [{
-              "title": "Llama",
-              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-            },
-            {
-              "title": "Panda",
-              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-            },
-            {
-              "title": "Guanaco",
-              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-            },
-            {
-              "title": "Wild Bactrian camel",
-              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-            }];
-
-            const response = await client.rerank.create({
-              model: "Salesforce/Llama-Rank-v1",
-              query: "What animals can I find near Peru?",
-              documents,
+            const endpoint = await client.endpoints.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              hardware: "1x_nvidia_a100_80gb_sxm",
+              autoscaling: {
+                max_replicas: 5,
+                min_replicas: 2,
+              }
             });
 
-            for (const result of response.results) {
-              console.log(`Rank: ${result.index + 1}`);
-              console.log(`Title: ${documents[result.index].title}`);
-              console.log(`Text: ${documents[result.index].text}`);
-            }
+            console.log(endpoint.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3294,115 +3564,62 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const documents = [{
-              "title": "Llama",
-              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-            },
-            {
-              "title": "Panda",
-              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-            },
-            {
-              "title": "Guanaco",
-              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-            },
-            {
-              "title": "Wild Bactrian camel",
-              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-            }];
-
-            const response = await client.rerank.create({
-              model: "Salesforce/Llama-Rank-v1",
-              query: "What animals can I find near Peru?",
-              documents,
+            const endpoint = await client.endpoints.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              hardware: "1x_nvidia_a100_80gb_sxm",
+              autoscaling: {
+                max_replicas: 5,
+                min_replicas: 2,
+              }
             });
 
-            for (const result of response.results) {
-              console.log(`Rank: ${result.index + 1}`);
-              console.log(`Title: ${documents[result.index].title}`);
-              console.log(`Text: ${documents[result.index].text}`);
-            }
+            console.log(endpoint.id);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/rerank" \
+            curl -X POST "https://api.together.xyz/v1/endpoints" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
-                   "model": "Salesforce/Llama-Rank-v1",
-                   "query": "What animals can I find near Peru?",
-                   "documents": [{
-                      "title": "Llama",
-                      "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                    },
-                    {
-                      "title": "Panda",
-                      "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                    },
-                    {
-                      "title": "Guanaco",
-                      "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                    },
-                    {
-                      "title": "Wild Bactrian camel",
-                      "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                    }]
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "hardware": "1x_nvidia_a100_80gb_sxm",
+                   "autoscaling": {
+                     "max_replicas": 5,
+                     "min_replicas": 2
+                   }
                  }'
-      operationId: rerank
+      operationId: createEndpoint
       requestBody:
+        required: true
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RerankRequest'
+              $ref: '#/components/schemas/CreateEndpointRequest'
       responses:
         '200':
           description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RerankResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+        '500':
+          description: 'Internal error'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /audio/speech:
-    post:
-      tags: ['Audio']
-      summary: Create audio generation request
-      description: Generate audio from input text
+
+  /endpoints/{endpointId}:
+    get:
+      tags: ['Endpoints']
+      summary: Get endpoint by ID
+      description: Retrieves details about a specific endpoint, including its current state, configuration, and scaling settings.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3415,13 +3632,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.audio.speech.create(
-                model="cartesia/sonic-2",
-                input="The quick brown fox jumps over the lazy dog.",
-                voice="laidback woman",
-            )
+            endpoint = client.endpoints.get("endpoint-id")
 
-            response.stream_to_file("audio.wav")
+            print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3432,566 +3645,494 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.audio.speech.with_streaming_response.create(
-                model="cartesia/sonic-2",
-                input="The quick brown fox jumps over the lazy dog.",
-                voice="laidback woman",
-            )
+            endpoint = client.endpoints.retrieve("endpoint-id")
 
-            with response as stream:
-              stream.stream_to_file("audio.wav")
+            print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { createWriteStream } from "fs";
-            import { join } from "path";
-            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.audio.speech.create({
-              model: "cartesia/sonic-2",
-              input: "The quick brown fox jumps over the lazy dog.",
-              voice: "laidback woman",
-            });
-
-            const filepath = join(process.cwd(), "audio.wav");
-            const writeStream = createWriteStream(filepath);
+            const endpoint = await client.endpoints.retrieve("endpoint-id");
 
-            if (response.body) {
-              await pipeline(response.body, writeStream);
-            }
+            console.log(endpoint);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { createWriteStream } from "fs";
-            import { join } from "path";
-            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.audio.speech.create({
-              model: "cartesia/sonic-2",
-              input: "The quick brown fox jumps over the lazy dog.",
-              voice: "laidback woman",
-            });
-
-            const filepath = join(process.cwd(), "audio.wav");
-            const writeStream = createWriteStream(filepath);
+            const endpoint = await client.endpoints.retrieve("endpoint-id");
 
-            if (response.body) {
-              await pipeline(response.body, writeStream);
-            }
+            console.log(endpoint);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/speech" \
+            curl "https://api.together.xyz/v1/endpoints/endpoint-id" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "cartesia/sonic-2",
-                   "input": "The quick brown fox jumps over the lazy dog.",
-                   "voice": "laidback woman"
-                 }' \
-                 --output audio.wav
-      operationId: audio-speech
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/AudioSpeechRequest'
+                 -H "Content-Type: application/json"
+      operationId: getEndpoint
+      parameters:
+        - name: endpointId
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the endpoint to retrieve
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
       responses:
         '200':
-          description: 'OK'
+          description: '200'
           content:
-            application/octet-stream:
-              schema:
-                type: string
-                format: binary
-            audio/wav:
-              schema:
-                type: string
-                format: binary
-            audio/mpeg:
+            application/json:
               schema:
-                type: string
-                format: binary
-            text/event-stream:
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
+          content:
+            application/json:
               schema:
-                $ref: '#/components/schemas/AudioSpeechStreamResponse'
-        '400':
-          description: 'BadRequest'
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'Not Found'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+        '500':
+          description: 'Internal error'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /audio/speech/websocket:
-    get:
-      tags: ['Audio']
-      summary: Real-time text-to-speech via WebSocket
-      description: |
-        Establishes a WebSocket connection for real-time text-to-speech generation. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/audio/speech/websocket) for bidirectional streaming communication.
 
-        **Connection Setup:**
-        - Protocol: WebSocket (wss://)
-        - Authentication: Pass API key as Bearer token in Authorization header
-        - Parameters: Sent as query parameters (model, voice, max_partial_length)
+    patch:
+      tags: ['Endpoints']
+      summary: Update endpoint, this can also be used to start or stop a dedicated endpoint
+      description: Updates an existing endpoint's configuration. You can modify the display name, autoscaling settings, or change the endpoint's state (start/stop).
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-        **Client Events:**
-        - `tts_session.updated`: Update session parameters like voice
-          ```json
-          {
-            "type": "tts_session.updated",
-            "session": {
-              "voice": "tara"
-            }
-          }
-          ```
-        - `input_text_buffer.append`: Send text chunks for TTS generation
-          ```json
-          {
-            "type": "input_text_buffer.append",
-            "text": "Hello, this is a test."
-          }
-          ```
-        - `input_text_buffer.clear`: Clear the buffered text
-          ```json
-          {
-            "type": "input_text_buffer.clear"
-          }
-          ```
-        - `input_text_buffer.commit`: Signal end of text input and process remaining text
-          ```json
-          {
-            "type": "input_text_buffer.commit"
-          }
-          ```
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-        **Server Events:**
-        - `session.created`: Initial session confirmation (sent first)
-          ```json
-          {
-            "event_id": "evt_123456",
-            "type": "session.created",
-            "session": {
-              "id": "session-id",
-              "object": "realtime.tts.session",
-              "modalities": ["text", "audio"],
-              "model": "hexgrad/Kokoro-82M",
-              "voice": "tara"
-            }
-          }
-          ```
-        - `conversation.item.input_text.received`: Acknowledgment that text was received
-          ```json
-          {
-            "type": "conversation.item.input_text.received",
-            "text": "Hello, this is a test."
-          }
-          ```
-        - `conversation.item.audio_output.delta`: Audio chunks as base64-encoded data
-          ```json
-          {
-            "type": "conversation.item.audio_output.delta",
-            "item_id": "tts_1",
-            "delta": "<base64_encoded_audio_chunk>"
-          }
-          ```
-        - `conversation.item.audio_output.done`: Audio generation complete for an item
-          ```json
-          {
-            "type": "conversation.item.audio_output.done",
-            "item_id": "tts_1"
-          }
-          ```
-        - `conversation.item.tts.failed`: Error occurred
-          ```json
-          {
-            "type": "conversation.item.tts.failed",
-            "error": {
-              "message": "Error description",
-              "type": "invalid_request_error",
-              "param": null,
-              "code": "invalid_api_key"
-            }
-          }
-          ```
+            endpoint = client.endpoints.update(
+                endpoint_id="endpoint-id",
+                state="STOPPED"
+            )
 
-        **Text Processing:**
-        - Partial text (no sentence ending) is held in buffer until:
-          - We believe that the text is complete enough to be processed for TTS generation
-          - The partial text exceeds `max_partial_length` characters (default: 250)
-          - The `input_text_buffer.commit` event is received
+            print(endpoint)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const endpoint = await client.endpoints.update("endpoint-id", {
+              state: "STOPPED"
+            });
+
+            console.log(endpoint);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-        **Audio Format:**
-        - Format: WAV (PCM s16le)
-        - Sample Rate: 24000 Hz
-        - Encoding: Base64
-        - Delivered via `conversation.item.audio_output.delta` events
+            const endpoint = await client.endpoints.update("endpoint-id", {
+              state: "STOPPED"
+            });
 
-        **Error Codes:**
-        - `invalid_api_key`: Invalid API key provided (401)
-        - `missing_api_key`: Authorization header missing (401)
-        - `model_not_available`: Invalid or unavailable model (400)
-        - Invalid text format errors (400)
+            console.log(endpoint);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X PATCH "https://api.together.xyz/v1/endpoints/endpoint-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "state": "STOPPED"
+                 }'
+      operationId: updateEndpoint
+      parameters:
+        - name: endpointId
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the endpoint to update
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                display_name:
+                  type: string
+                  description: A human-readable name for the endpoint
+                  example: My Llama3 70b endpoint
+                state:
+                  type: string
+                  description: The desired state of the endpoint
+                  enum:
+                    - STARTED
+                    - STOPPED
+                  example: STARTED
+                autoscaling:
+                  $ref: '#/components/schemas/Autoscaling'
+                  description: New autoscaling configuration for the endpoint
+                inactive_timeout:
+                  type: integer
+                  description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
+                  nullable: true
+                  example: 60
+      responses:
+        '200':
+          description: '200'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'Not Found'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
 
-      operationId: realtime-tts
+    delete:
+      tags: ['Endpoints']
+      summary: Delete endpoint
+      description: Permanently deletes an endpoint. This action cannot be undone.
       x-codeSamples:
         - lang: Python
-          label: Python WebSocket Client
+          label: Together AI SDK (v1)
           source: |
-            import asyncio
-            import websockets
-            import json
-            import base64
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
             import os
 
-            async def generate_speech():
-                api_key = os.environ.get("TOGETHER_API_KEY")
-                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara"
-
-                headers = {
-                    "Authorization": f"Bearer {api_key}"
-                }
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-                async with websockets.connect(url, additional_headers=headers) as ws:
-                    # Wait for session created
-                    session_msg = await ws.recv()
-                    session_data = json.loads(session_msg)
-                    print(f"Session created: {session_data['session']['id']}")
+            endpoint = client.endpoints.delete(
+                endpoint_id="endpoint-id",
+            )
 
-                    # Send text for TTS
-                    text_chunks = [
-                        "Hello, this is a test.",
-                        "This is the second sentence.",
-                        "And this is the final one."
-                    ]
+            print(endpoint)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-                    async def send_text():
-                        for chunk in text_chunks:
-                            await ws.send(json.dumps({
-                                "type": "input_text_buffer.append",
-                                "text": chunk
-                            }))
-                            await asyncio.sleep(0.5)  # Simulate typing
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-                        # Commit to process any remaining text
-                        await ws.send(json.dumps({
-                            "type": "input_text_buffer.commit"
-                        }))
+            const endpoint = await client.endpoints.delete("endpoint-id");
 
-                    async def receive_audio():
-                        audio_data = bytearray()
-                        async for message in ws:
-                            data = json.loads(message)
+            console.log(endpoint);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
 
-                            if data["type"] == "conversation.item.input_text.received":
-                                print(f"Text received: {data['text']}")
-                            elif data["type"] == "conversation.item.audio_output.delta":
-                                # Decode base64 audio chunk
-                                audio_chunk = base64.b64decode(data['delta'])
-                                audio_data.extend(audio_chunk)
-                                print(f"Received audio chunk for item {data['item_id']}")
-                            elif data["type"] == "conversation.item.audio_output.done":
-                                print(f"Audio generation complete for item {data['item_id']}")
-                            elif data["type"] == "conversation.item.tts.failed":
-                                error = data.get("error", {})
-                                print(f"Error: {error.get('message')}")
-                                break
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-                        # Save the audio to a file
-                        with open("output.wav", "wb") as f:
-                            f.write(audio_data)
-                        print("Audio saved to output.wav")
+            const endpoint = await client.endpoints.delete("endpoint-id");
 
-                    # Run send and receive concurrently
-                    await asyncio.gather(send_text(), receive_audio())
+            console.log(endpoint);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X "DELETE" "https://api.together.xyz/v1/endpoints/endpoint-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY"
+      operationId: deleteEndpoint
+      parameters:
+        - name: endpointId
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the endpoint to delete
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+      responses:
+        '204':
+          description: 'No Content - Endpoint successfully deleted'
+        '403':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'Not Found'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
 
-            asyncio.run(generate_speech())
-        - lang: JavaScript
-          label: Node.js WebSocket Client
+  /hardware:
+    get:
+      tags: ['Hardware']
+      summary: List available hardware configurations
+      description: >
+        Returns a list of available hardware configurations for deploying models.
+        When a model parameter is provided, it returns only hardware configurations compatible
+        with that model, including their current availability status.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
           source: |
-            import WebSocket from 'ws';
-            import fs from 'fs';
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-            const apiKey = process.env.TOGETHER_API_KEY;
-            const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara';
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-            const ws = new WebSocket(url, {
-              headers: {
-                'Authorization': `Bearer ${apiKey}`
-              }
-            });
+            response = client.endpoints.list_hardware()
 
-            const audioData = [];
+            for hardware in response:
+                print(hardware.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
 
-            ws.on('open', () => {
-              console.log('WebSocket connection established!');
-            });
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-            ws.on('message', (data) => {
-              const message = JSON.parse(data.toString());
+            response = client.hardware.list()
 
-              if (message.type === 'session.created') {
-                console.log(`Session created: ${message.session.id}`);
+            for hardware in response.data:
+                print(hardware.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-                // Send text chunks
-                const textChunks = [
-                  "Hello, this is a test.",
-                  "This is the second sentence.",
-                  "And this is the final one."
-                ];
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-                textChunks.forEach((text, index) => {
-                  setTimeout(() => {
-                    ws.send(JSON.stringify({
-                      type: 'input_text_buffer.append',
-                      text: text
-                    }));
-                  }, index * 500);
-                });
+            const hardware = await client.hardware.list();
 
-                // Commit after all chunks
-                setTimeout(() => {
-                  ws.send(JSON.stringify({
-                    type: 'input_text_buffer.commit'
-                  }));
-                }, textChunks.length * 500 + 100);
+            console.log(hardware);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
 
-              } else if (message.type === 'conversation.item.input_text.received') {
-                console.log(`Text received: ${message.text}`);
-              } else if (message.type === 'conversation.item.audio_output.delta') {
-                // Decode base64 audio chunk
-                const audioChunk = Buffer.from(message.delta, 'base64');
-                audioData.push(audioChunk);
-                console.log(`Received audio chunk for item ${message.item_id}`);
-              } else if (message.type === 'conversation.item.audio_output.done') {
-                console.log(`Audio generation complete for item ${message.item_id}`);
-              } else if (message.type === 'conversation.item.tts.failed') {
-                const errorMessage = message.error?.message ?? 'Unknown error';
-                console.error(`Error: ${errorMessage}`);
-                ws.close();
-              }
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            ws.on('close', () => {
-              // Save the audio to a file
-              if (audioData.length > 0) {
-                const completeAudio = Buffer.concat(audioData);
-                fs.writeFileSync('output.wav', completeAudio);
-                console.log('Audio saved to output.wav');
-              }
-            });
+            const hardware = await client.hardware.list();
 
-            ws.on('error', (error) => {
-              console.error('WebSocket error:', error);
-            });
+            console.log(hardware);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/hardware" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: listHardware
       parameters:
-        - in: query
-          name: model
-          required: false
-          schema:
-            type: string
-            enum:
-              - hexgrad/Kokoro-82M
-              - cartesia/sonic-english
-            default: hexgrad/Kokoro-82M
-          description: The TTS model to use for speech generation. Can also be set via `tts_session.updated` event.
-        - in: query
-          name: voice
+        - name: model
+          in: query
           required: false
           schema:
             type: string
-            default: tara
-          description: |
-            The voice to use for speech generation. Default is 'tara'.
-            Available voices vary by model. Can also be updated via `tts_session.updated` event.
-        - in: query
-          name: max_partial_length
-          required: false
-          schema:
-            type: integer
-            default: 250
-          description: |
-            Maximum number of characters in partial text before forcing TTS generation
-            even without a sentence ending. Helps reduce latency for long text without punctuation.
+          description: >
+            Filter hardware configurations by model compatibility. When provided,
+            the response includes availability status for each compatible configuration.
+          example: meta-llama/Llama-3-70b-chat-hf
       responses:
-        '101':
-          description: |
-            Switching Protocols - WebSocket connection established successfully.
-
-            Error message format:
-            ```json
-            {
-              "type": "conversation.item.tts.failed",
-              "error": {
-                "message": "Error description",
-                "type": "invalid_request_error",
-                "param": null,
-                "code": "error_code"
-              }
-            }
-            ```
-  /audio/transcriptions:
+        '200':
+          description: 'List of available hardware configurations'
+          content:
+            application/json:
+              schema:
+                type: object
+                required:
+                  - object
+                  - data
+                properties:
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  data:
+                    type: array
+                    items:
+                      $ref: '#/components/schemas/HardwareWithStatus'
+        '403':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /tci/execute:
     post:
-      tags: ['Audio']
-      summary: Create audio transcription request
-      description: Transcribes audio into text
+      tags: ['Code Interpreter']
+      callbacks: {}
+      description: |
+        Executes the given code snippet and returns the output. Without a session_id, a new session will be created to run the code. If you do pass in a valid session_id, the code will be run in that session. This is useful for running multiple code snippets in the same environment, because dependencies and similar things are persisted
+        between calls to the same session.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
           source: |
             # Docs for v2 can be found by changing the above selector ^
             from together import Together
+            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.transcriptions.create(
-                model="openai/whisper-large-v3",
-                file=file,
+            response = client.code_interpreter.run(
+                code="print('Hello world!')",
+                language="python",
             )
 
-            print(response.text)
+            print(response.data.outputs[0].data);
         - lang: Python
           label: Together AI SDK (v2)
           source: |
             from together import Together
+            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.transcriptions.create(
-                model="openai/whisper-large-v3",
-                file=file,
+            response = client.code_interpreter.execute(
+                code="print('Hello world!')",
+                language="python",
             )
 
-            print(response.text)
+            print(response.data.outputs[0].data);
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.transcriptions.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
+            const response = await client.codeInterpreter.execute({
+              code: "print('Hello world!')",
+              language: "python"
             });
 
-            console.log(response.text);
+            console.log(response.data?.outputs?.[0]?.data);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.transcriptions.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
+            const response = await client.codeInterpreter.execute({
+              code: "print('Hello world!')",
+              language: "python"
             });
 
-            console.log(response.text);
+            console.log(response.data?.outputs?.[0]?.data);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
+            curl -X POST "https://api.together.xyz/v1/tci/execute" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@audio.wav" \
-                 -F "model=openai/whisper-large-v3"
-      operationId: audio-transcriptions
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "code": "print(\'Hello world!\')",
+                   "language": "python"
+                 }'
+      operationId: tci/execute
+      parameters: []
       requestBody:
-        required: true
         content:
-          multipart/form-data:
+          application/json:
             schema:
-              $ref: '#/components/schemas/AudioTranscriptionRequest'
+              $ref: '#/components/schemas/ExecuteRequest'
+        description: Execute Request
+        required: false
       responses:
         '200':
-          description: 'OK'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/AudioTranscriptionResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /audio/translations:
-    post:
-      tags: ['Audio']
-      summary: Create audio translation request
-      description: Translates audio into English
+                $ref: '#/components/schemas/ExecuteResponse'
+          description: Execute Response
+  /tci/sessions:
+    get:
+      tags: ['Code Interpreter']
+      callbacks: {}
+      description: |
+        Lists all your currently active sessions.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
           source: |
             # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            file = open("audio.wav", "rb")
-
-            response = client.audio.translations.create(
-                model="openai/whisper-large-v3",
-                file=file,
-                language="es",
-            )
-
-            print(response.text)
+            # together v1 does not support this method
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4002,113 +4143,74 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.translations.create(
-                model="openai/whisper-large-v3",
-                file=file,
-                language="es",
-            )
+            response = client.code_interpreter.sessions.list()
 
-            print(response.text)
+            for session in response.data.sessions:
+                print(session.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.translations.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-              language: "es"
-            });
+            const response = await client.codeInterpreter.sessions.list();
 
-            console.log(response.text);
+            for (const session of response.data?.sessions) {
+              console.log(session.id);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.translations.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-              language: "es"
-            });
+            const response = await client.codeInterpreter.sessions.list();
 
-            console.log(response.text);
+            for (const session of response.data?.sessions) {
+              console.log(session.id);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
+            curl "https://api.together.xyz/v1/tci/sessions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@audio.wav" \
-                 -F "model=openai/whisper-large-v3" \
-                 -F "language=es"
-      operationId: audio-translations
-      requestBody:
-        required: true
-        content:
-          multipart/form-data:
-            schema:
-              $ref: '#/components/schemas/AudioTranslationRequest'
+                 -H "Content-Type: application/json"
+      operationId: sessions/list
+      parameters: []
       responses:
         '200':
-          description: 'OK'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/AudioTranslationResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /clusters/availability-zones:
+                $ref: '#/components/schemas/SessionListResponse'
+          description: List Response
+  /batches:
     get:
-      tags: ['endpoints']
-      summary: List all available availability zones.
-      description: List all available availability zones.
-      operationId: availabilityZones
-      responses:
-        '200':
-          description: Success
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ListAvailibilityZonesResponse'
+      tags: ['Batches']
+      summary: List batch jobs
+      description: List all batch jobs for the authenticated user
       x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batches = client.batches.list_batches()
+
+            for batch in batches:
+                print(batch.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4119,9 +4221,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list_avzones()
+            batches = client.batches.list()
 
-            print(response.avzones)
+            for batch in batches:
+                print(batch.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4131,9 +4234,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.endpoints.listAvzones();
+            const batches = await client.batches.list();
 
-            console.log(response.avzones);
+            console.log(batches);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4143,20 +4246,42 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.endpoints.listAvzones();
+            const batches = await client.batches.list();
 
-            console.log(response.avzones);
+            console.log(batches);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/clusters/availability-zones" \
+            curl "https://api.together.xyz/v1/batches" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-  /endpoints:
-    get:
-      tags: ['Endpoints']
-      summary: List all endpoints, can be filtered by type
-      description: Returns a list of all endpoints associated with your account. You can filter the results by type (dedicated or serverless).
+      security:
+        - bearerAuth: []
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: '#/components/schemas/BatchJob'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+    post:
+      tags: ['Batches']
+      summary: Create a batch job
+      description: Create a new batch job with the given input file and endpoint
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4169,10 +4294,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoints = client.endpoints.list()
+            batch = client.batches.create_batch("file_id", endpoint="/v1/chat/completions")
 
-            for endpoint in endpoints:
-                print(endpoint.id)
+            print(batch.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4183,10 +4307,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list()
+            batch = client.batches.create(input_file_id="file_id", endpoint="/v1/chat/completions")
 
-            for endpoint in response.data:
-                print(endpoint.id)
+            print(batch.job)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4196,11 +4319,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoints = await client.endpoints.list();
+            const batch = await client.batches.create({
+              endpoint: "/v1/chat/completions",
+              input_file_id: "file-id",
+            });
 
-            for (const endpoint of endpoints.data) {
-              console.log(endpoint);
-            }
+            console.log(batch);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4210,91 +4334,67 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoints = await client.endpoints.list();
+            const batch = await client.batches.create({
+              endpoint: "/v1/chat/completions",
+              input_file_id: "file-id",
+            });
 
-            for (const endpoint of endpoints.data) {
-              console.log(endpoint);
-            }
+            console.log(batch);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/endpoints" \
+            curl -X POST "https://api.together.xyz/v1/batches" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: listEndpoints
-      parameters:
-        - name: type
-          in: query
-          required: false
-          schema:
-            type: string
-            enum:
-              - dedicated
-              - serverless
-          description: Filter endpoints by type
-          example: dedicated
-        - name: usage_type
-          in: query
-          required: false
-          schema:
-            type: string
-            enum:
-              - on-demand
-              - reserved
-          description: Filter endpoints by usage type
-          example: on-demand
-        - name: mine
-          in: query
-          required: false
-          schema:
-            type: boolean
-          description: If true, return only endpoints owned by the caller
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "endpoint": "/v1/chat/completions",
+                   "input_file_id": "file-id"
+                 }'
+      security:
+        - bearerAuth: []
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateBatchRequest'
       responses:
-        '200':
-          description: '200'
+        '201':
+          description: Job created (potentially with warnings)
           content:
             application/json:
               schema:
-                type: object
-                required:
-                  - object
-                  - data
-                properties:
-                  object:
-                    type: string
-                    enum:
-                      - list
-                  data:
-                    type: array
-                    items:
-                      $ref: '#/components/schemas/ListEndpoint'
-                example:
-                  object: 'list'
-                  data:
-                    - object: 'endpoint'
-                      id: 'endpoint-5c0c20db-62fe-4f41-8ffc-d9e4ea1a264e'
-                      name: 'allenai/OLMo-7B'
-                      model: 'allenai/OLMo-7B'
-                      type: 'serverless'
-                      owner: 'together'
-                      state: 'STARTED'
-                      created_at: '2024-02-28T21:34:35.444Z'
-        '403':
-          description: 'Unauthorized'
+                $ref: '#/components/schemas/BatchJobWithWarning'
+        '400':
+          description: Bad Request
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '429':
+          description: Too Many Requests
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
         '500':
-          description: 'Internal error'
+          description: Internal Server Error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-    post:
-      tags: ['Endpoints']
-      summary: Create a dedicated endpoint, it will start automatically
-      description: Creates a new dedicated endpoint for serving models. The endpoint will automatically start after creation. You can deploy any supported model on hardware configurations that meet the model's requirements.
+                $ref: '#/components/schemas/BatchErrorResponse'
+
+  /batches/{id}:
+    get:
+      tags: ['Batches']
+      summary: Get a batch job
+      description: Get details of a batch job by ID
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4307,14 +4407,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                hardware="1x_nvidia_a100_80gb_sxm",
-                min_replicas=2,
-                max_replicas=5,
-            )
+            batch = client.batches.get_batch("batch_id")
 
-            print(endpoint.id)
+            print(batch)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4325,16 +4420,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                hardware="1x_nvidia_a100_80gb_sxm",
-                autoscaling={
-                  "min_replicas": 2,
-                  "max_replicas": 5,
-                }
-            )
+            batch = client.batches.retrieve("batch_id")
 
-            print(endpoint.id)
+            print(batch)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4344,16 +4432,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              hardware: "1x_nvidia_a100_80gb_sxm",
-              autoscaling: {
-                max_replicas: 5,
-                min_replicas: 2,
-              }
-            });
+            const batch = await client.batches.retrieve("batch-id");
 
-            console.log(endpoint.id);
+            console.log(batch);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4363,62 +4444,67 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              hardware: "1x_nvidia_a100_80gb_sxm",
-              autoscaling: {
-                max_replicas: 5,
-                min_replicas: 2,
-              }
-            });
+            const batch = await client.batches.retrieve("batch-id");
 
-            console.log(endpoint.id);
+            console.log(batch);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/endpoints" \
+            curl "https://api.together.xyz/v1/batches/ID" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "hardware": "1x_nvidia_a100_80gb_sxm",
-                   "autoscaling": {
-                     "max_replicas": 5,
-                     "min_replicas": 2
-                   }
-                 }'
-      operationId: createEndpoint
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CreateEndpointRequest'
+                 -H "Content-Type: application/json"
+      security:
+        - bearerAuth: []
+      parameters:
+        - name: id
+          in: path
+          required: true
+          description: Job ID
+          schema:
+            type: string
+          example: 'batch_job_abc123def456'
       responses:
         '200':
-          description: '200'
+          description: OK
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
+                $ref: '#/components/schemas/BatchJob'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
         '403':
-          description: 'Unauthorized'
+          description: Forbidden
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '404':
+          description: Not Found
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
         '500':
-          description: 'Internal error'
+          description: Internal Server Error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-
-  /endpoints/{endpointId}:
-    get:
-      tags: ['Endpoints']
-      summary: Get endpoint by ID
-      description: Retrieves details about a specific endpoint, including its current state, configuration, and scaling settings.
+                $ref: '#/components/schemas/BatchErrorResponse'
+  /batches/{id}/cancel:
+    post:
+      tags: ['Batches']
+      summary: Cancel a batch job
+      description: Cancel a batch job by ID
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4431,9 +4517,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.get("endpoint-id")
+            batch = client.batches.cancel("batch_id")
 
-            print(endpoint.id)
+            print(batch)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4444,9 +4530,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.retrieve("endpoint-id")
+            batch = client.batches.cancel("batch_id")
 
-            print(endpoint.id)
+            print(batch)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4456,9 +4542,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.retrieve("endpoint-id");
+            const batch = await client.batches.cancel("batch-id");
 
-            console.log(endpoint);
+            console.log(batch);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4468,59 +4554,94 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.retrieve("endpoint-id");
+            const batch = await client.batches.cancel("batch-id");
 
-            console.log(endpoint);
+            console.log(batch);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/endpoints/endpoint-id" \
+            curl -X POST "https://api.together.xyz/v1/batches/ID/cancel" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-      operationId: getEndpoint
+      security:
+        - bearerAuth: []
       parameters:
-        - name: endpointId
+        - name: id
           in: path
           required: true
+          description: Job ID
           schema:
             type: string
-          description: The ID of the endpoint to retrieve
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+          example: 'batch_job_abc123def456'
       responses:
         '200':
-          description: '200'
+          description: OK
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
+                $ref: '#/components/schemas/BatchJob'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
         '403':
-          description: 'Unauthorized'
+          description: Forbidden
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
+                $ref: '#/components/schemas/BatchErrorResponse'
         '404':
-          description: 'Not Found'
+          description: Not Found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
+                $ref: '#/components/schemas/BatchErrorResponse'
         '500':
-          description: 'Internal error'
+          description: Internal Server Error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-
-    patch:
-      tags: ['Endpoints']
-      summary: Update endpoint, this can also be used to start or stop a dedicated endpoint
-      description: Updates an existing endpoint's configuration. You can modify the display name, autoscaling settings, or change the endpoint's state (start/stop).
+                $ref: '#/components/schemas/BatchErrorResponse'
+  /evaluation:
+    post:
+      tags:
+        - evaluation
+      summary: Create an evaluation job
+      operationId: createEvaluationJob
       x-codeSamples:
         - lang: Python
-          label: Together AI SDK (v1)
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evaluation.create(
+                type="classify",
+                judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo",
+                judge_system_template="You are an expert evaluator...",
+                input_data_file_path="file-abc123",
+                labels=["good", "bad"],
+                pass_labels=["good"],
+                model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+            )
+
+            print(response.workflow_id)
+        - lang: Python
+          label: Together AI SDK (v2)
           source: |
-            # Docs for v2 can be found by changing the above selector ^
             from together import Together
             import os
 
@@ -4528,12 +4649,22 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.update(
-                endpoint_id="endpoint-id",
-                state="STOPPED"
+            response = client.evals.create(
+                type="classify",
+                parameters=ParametersEvaluationClassifyParameters(
+                    judge=ParametersEvaluationClassifyParametersJudge(
+                        model="meta-llama/Llama-3.1-70B-Instruct-Turbo",
+                        model_source="serverless",
+                        system_template="You are an expert evaluator...",
+                    ),
+                    input_data_file_path="file-abc123",
+                    labels=["good", "bad"],
+                    pass_labels=["good"],
+                    model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+                )
             )
 
-            print(endpoint)
+            print(response.workflow_id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4543,11 +4674,22 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.update("endpoint-id", {
-              state: "STOPPED"
+            const response = await client.evals.create({
+              type: 'classify',
+              parameters: {
+                judge: {
+                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
+                  model_source: 'serverless',
+                  system_template: 'You are an expert evaluator...',
+                },
+                input_data_file_path: 'file-abc123',
+                labels: ['good', 'bad'],
+                pass_labels: ['good'],
+                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
+              },
             });
 
-            console.log(endpoint);
+            console.log(response.workflow_id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4557,85 +4699,54 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.update("endpoint-id", {
-              state: "STOPPED"
+            const response = await client.evals.create({
+              type: 'classify',
+              parameters: {
+                judge: {
+                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
+                  model_source: 'serverless',
+                  system_template: 'You are an expert evaluator...',
+                },
+                input_data_file_path: 'file-abc123',
+                labels: ['good', 'bad'],
+                pass_labels: ['good'],
+                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
+              },
             });
 
-            console.log(endpoint);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X PATCH "https://api.together.xyz/v1/endpoints/endpoint-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "state": "STOPPED"
-                 }'
-      operationId: updateEndpoint
-      parameters:
-        - name: endpointId
-          in: path
-          required: true
-          schema:
-            type: string
-          description: The ID of the endpoint to update
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+            console.log(response.workflow_id);
+
+
       requestBody:
         required: true
         content:
           application/json:
             schema:
-              type: object
-              properties:
-                display_name:
-                  type: string
-                  description: A human-readable name for the endpoint
-                  example: My Llama3 70b endpoint
-                state:
-                  type: string
-                  description: The desired state of the endpoint
-                  enum:
-                    - STARTED
-                    - STOPPED
-                  example: STARTED
-                autoscaling:
-                  $ref: '#/components/schemas/Autoscaling'
-                  description: New autoscaling configuration for the endpoint
-                inactive_timeout:
-                  type: integer
-                  description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
-                  nullable: true
-                  example: 60
+              $ref: "#/components/schemas/EvaluationTypedRequest"
       responses:
-        '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
-        '403':
-          description: 'Unauthorized'
+        "200":
+          description: "Evaluation job created successfully"
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'Not Found'
+                $ref: "#/components/schemas/EvaluationResponse"
+        "400":
+          description: "Invalid request format"
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to create evaluation job"
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-
-    delete:
-      tags: ['Endpoints']
-      summary: Delete endpoint
-      description: Permanently deletes an endpoint. This action cannot be undone.
+                $ref: "#/components/schemas/ErrorData"
+    get:
+      tags:
+        - evaluation
+      summary: Get all evaluation jobs
+      operationId: getAllEvaluationJobs
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4648,11 +4759,24 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.delete(
-                endpoint_id="endpoint-id",
+            jobs = client.evaluation.list()
+
+            for job in jobs:
+                print(job.workflow_id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            print(endpoint)
+            response = client.evals.list()
+
+            for job in response:
+                print(job.workflow_id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4662,9 +4786,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.delete("endpoint-id");
+            const response = await client.evals.list();
 
-            console.log(endpoint);
+            for (const job of response) {
+              console.log(job.workflow_id);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4674,53 +4800,95 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.delete("endpoint-id");
+            const response = await client.evals.list();
 
-            console.log(endpoint);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/endpoints/endpoint-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY"
-      operationId: deleteEndpoint
+            for (const job of response) {
+              console.log(job.workflow_id);
+            }
       parameters:
-        - name: endpointId
-          in: path
-          required: true
+        - name: status
+          in: query
+          required: false
+          schema:
+            type: string
+            default: "pending"
+        - name: limit
+          in: query
+          required: false
+          schema:
+            type: integer
+            default: 10
+        - name: userId
+          in: query
+          required: false
+          description: "Admin users can specify a user ID to filter jobs. Pass empty string to get all jobs."
           schema:
             type: string
-          description: The ID of the endpoint to delete
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
       responses:
-        '204':
-          description: 'No Content - Endpoint successfully deleted'
-        '403':
-          description: 'Unauthorized'
+        "200":
+          description: "evaluation jobs retrieved successfully"
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'Not Found'
+                type: array
+                items:
+                  $ref: "#/components/schemas/EvaluationJob"
+        "400":
+          description: "Invalid request format"
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Error retrieving jobs from manager"
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-
-  /hardware:
+                $ref: "#/components/schemas/ErrorData"
+  /evaluation/model-list:
     get:
-      tags: ['Hardware']
-      summary: List available hardware configurations
-      description: >
-        Returns a list of available hardware configurations for deploying models.
-        When a model parameter is provided, it returns only hardware configurations compatible
-        with that model, including their current availability status.
+      tags:
+        - evaluation
+      summary: Get model list
+      operationId: getModelList
+      parameters:
+        - name: model_source
+          in: query
+          required: false
+          schema:
+            type: string
+            default: "all"
+      responses:
+        "200":
+          description: "Model list retrieved successfully"
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  model_list:
+                    type: array
+                    items:
+                      type: string
+                      description: "The name of the model"
+        "400":
+          description: "Invalid request format"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Error retrieving model list"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+  /evaluation/{id}:
+    get:
+      tags:
+        - evaluation
+      summary: Get evaluation job details
+      operationId: getEvaluationJobDetails
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4733,10 +4901,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list_hardware()
+            response = client.evaluation.retrieve('eval_id')
 
-            for hardware in response:
-                print(hardware.id)
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4747,10 +4914,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.hardware.list()
+            response = client.evals.retrieve('eval_id')
 
-            for hardware in response.data:
-                print(hardware.id)
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4760,9 +4926,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const hardware = await client.hardware.list();
+            const response = await client.evals.retrieve('eval_id');
 
-            console.log(hardware);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4772,64 +4938,41 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const hardware = await client.hardware.list();
+            const response = await client.evals.retrieve('eval_id');
 
-            console.log(hardware);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/hardware" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: listHardware
+            console.log(response);
       parameters:
-        - name: model
-          in: query
-          required: false
+        - name: id
+          in: path
+          required: true
           schema:
             type: string
-          description: >
-            Filter hardware configurations by model compatibility. When provided,
-            the response includes availability status for each compatible configuration.
-          example: meta-llama/Llama-3-70b-chat-hf
       responses:
-        '200':
-          description: 'List of available hardware configurations'
+        "200":
+          description: "Evaluation job details retrieved successfully"
           content:
             application/json:
               schema:
-                type: object
-                required:
-                  - object
-                  - data
-                properties:
-                  object:
-                    type: string
-                    enum:
-                      - list
-                  data:
-                    type: array
-                    items:
-                      $ref: '#/components/schemas/HardwareWithStatus'
-        '403':
-          description: 'Unauthorized'
+                $ref: "#/components/schemas/EvaluationJob"
+        "404":
+          description: "Evaluation job not found"
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to get evaluation job"
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /tci/execute:
-    post:
-      tags: ['Code Interpreter']
-      callbacks: {}
-      description: |
-        Executes the given code snippet and returns the output. Without a session_id, a new session will be created to run the code. If you do pass in a valid session_id, the code will be run in that session. This is useful for running multiple code snippets in the same environment, because dependencies and similar things are persisted
-        between calls to the same session.
+                $ref: "#/components/schemas/ErrorData"
+
+  /evaluation/{id}/status:
+    get:
+      tags:
+        - evaluation
+      summary: Get evaluation job status and results
+      operationId: getEvaluationJobStatusAndResults
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4842,12 +4985,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.code_interpreter.run(
-                code="print('Hello world!')",
-                language="python",
-            )
+            response = client.evaluation.status('eval_id')
 
-            print(response.data.outputs[0].data);
+            print(response.status)
+            print(response.results)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4858,12 +4999,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.code_interpreter.execute(
-                code="print('Hello world!')",
-                language="python",
-            )
+            response = client.evals.status('eval_id')
 
-            print(response.data.outputs[0].data);
+            print(response.status)
+            print(response.results)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4873,12 +5012,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.codeInterpreter.execute({
-              code: "print('Hello world!')",
-              language: "python"
-            });
+            const response = await client.evals.status('eval_id');
 
-            console.log(response.data?.outputs?.[0]?.data);
+            console.log(response.status);
+            console.log(response.results);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4888,1215 +5025,889 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.codeInterpreter.execute({
-              code: "print('Hello world!')",
-              language: "python"
-            });
+            const response = await client.evals.status('eval_id');
 
-            console.log(response.data?.outputs?.[0]?.data);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/tci/execute" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "code": "print(\'Hello world!\')",
-                   "language": "python"
-                 }'
-      operationId: tci/execute
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ExecuteRequest'
-        description: Execute Request
-        required: false
+            console.log(response.status);
+            console.log(response.results);
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
       responses:
-        '200':
+        "200":
+          description: "Evaluation job status and results retrieved successfully"
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ExecuteResponse'
-          description: Execute Response
-  /tci/sessions:
+                type: object
+                properties:
+                  status:
+                    type: string
+                    description: "The status of the evaluation job"
+                    enum: ["completed", "error", "user_error", "running", "queued", "pending"]
+                  results:
+                    description: "The results of the evaluation job"
+                    oneOf:
+                      - $ref: "#/components/schemas/EvaluationClassifyResults"
+                      - $ref: "#/components/schemas/EvaluationScoreResults"
+                      - $ref: "#/components/schemas/EvaluationCompareResults"
+        "404":
+          description: "Evaluation job not found"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to get evaluation job"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+
+  /realtime:
     get:
-      tags: ['Code Interpreter']
-      callbacks: {}
+      tags: ['Audio']
+      summary: Real-time audio transcription via WebSocket
       description: |
-        Lists all your currently active sessions.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            # together v1 does not support this method
+        Establishes a WebSocket connection for real-time audio transcription. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/realtime) for bidirectional streaming communication.
+
+        **Connection Setup:**
+        - Protocol: WebSocket (wss://)
+        - Authentication: Pass API key as Bearer token in Authorization header
+        - Parameters: Sent as query parameters (model, input_audio_format)
+
+        **Client Events:**
+        - `input_audio_buffer.append`: Send audio chunks as base64-encoded data
+          ```json
+          {
+            "type": "input_audio_buffer.append",
+            "audio": "<base64_encoded_audio_chunk>"
+          }
+          ```
+        - `input_audio_buffer.commit`: Signal end of audio stream
+          ```json
+          {
+            "type": "input_audio_buffer.commit"
+          }
+          ```
+
+        **Server Events:**
+        - `session.created`: Initial session confirmation (sent first)
+          ```json
+          {
+            "type": "session.created",
+            "session": {
+              "id": "session-id",
+              "object": "realtime.session",
+              "modalities": ["audio"],
+              "model": "openai/whisper-large-v3"
+            }
+          }
+          ```
+        - `conversation.item.input_audio_transcription.delta`: Partial transcription results
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.delta",
+            "delta": "The quick brown"
+          }
+          ```
+        - `conversation.item.input_audio_transcription.completed`: Final transcription
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.completed",
+            "transcript": "The quick brown fox jumps over the lazy dog"
+          }
+          ```
+        - `conversation.item.input_audio_transcription.failed`: Error occurred
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.failed",
+            "error": {
+              "message": "Error description",
+              "type": "invalid_request_error",
+              "param": null,
+              "code": "invalid_api_key"
+            }
+          }
+          ```
+
+        **Error Codes:**
+        - `invalid_api_key`: Invalid API key provided (401)
+        - `missing_api_key`: Authorization header missing (401)
+        - `model_not_available`: Invalid or unavailable model (400)
+        - Unsupported audio format errors (400)
+
+      operationId: realtime-transcription
+      x-codeSamples:
         - lang: Python
-          label: Together AI SDK (v2)
+          label: Python WebSocket Client
           source: |
-            from together import Together
+            import asyncio
+            import websockets
+            import json
+            import base64
             import os
 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.code_interpreter.sessions.list()
+            async def transcribe_audio():
+                api_key = os.environ.get("TOGETHER_API_KEY")
+                url = "wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000"
 
-            for session in response.data.sessions:
-                print(session.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
+                headers = {
+                    "Authorization": f"Bearer {api_key}"
+                }
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+                async with websockets.connect(url, additional_headers=headers) as ws:
+                    # Read audio file
+                    with open("audio.wav", "rb") as f:
+                        audio_data = f.read()
 
-            const response = await client.codeInterpreter.sessions.list();
+                    # Send audio in chunks with delay to simulate real-time
+                    chunk_size = 8192
+                    bytes_per_second = 16000 * 2  # 16kHz * 2 bytes (16-bit)
+                    delay_per_chunk = chunk_size / bytes_per_second
 
-            for (const session of response.data?.sessions) {
-              console.log(session.id);
-            }
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
+                    for i in range(0, len(audio_data), chunk_size):
+                        chunk = audio_data[i:i+chunk_size]
+                        base64_chunk = base64.b64encode(chunk).decode('utf-8')
+                        await ws.send(json.dumps({
+                            "type": "input_audio_buffer.append",
+                            "audio": base64_chunk
+                        }))
+                        # Simulate real-time streaming
+                        if i + chunk_size < len(audio_data):
+                            await asyncio.sleep(delay_per_chunk)
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+                    # Commit the audio buffer
+                    await ws.send(json.dumps({
+                        "type": "input_audio_buffer.commit"
+                    }))
 
-            const response = await client.codeInterpreter.sessions.list();
+                    # Receive transcription results
+                    async for message in ws:
+                        data = json.loads(message)
+                        if data["type"] == "conversation.item.input_audio_transcription.delta":
+                            print(f"Partial: {data['delta']}")
+                        elif data["type"] == "conversation.item.input_audio_transcription.completed":
+                            print(f"Final: {data['transcript']}")
+                            break
+                        elif data["type"] == "conversation.item.input_audio_transcription.failed":
+                            error = data.get("error", {})
+                            print(f"Error: {error.get('message')}")
+                            break
 
-            for (const session of response.data?.sessions) {
-              console.log(session.id);
-            }
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/tci/sessions" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: sessions/list
-      parameters: []
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/SessionListResponse'
-          description: List Response
-  /batches:
-    get:
-      tags: ['Batches']
-      summary: List batch jobs
-      description: List all batch jobs for the authenticated user
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
+            asyncio.run(transcribe_audio())
+        - lang: JavaScript
+          label: Node.js WebSocket Client
           source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
+            import WebSocket from 'ws';
+            import fs from 'fs';
 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
+            const apiKey = process.env.TOGETHER_API_KEY;
+            const url = 'wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000';
 
-            batches = client.batches.list_batches()
+            const ws = new WebSocket(url, {
+              headers: {
+                'Authorization': `Bearer ${apiKey}`
+              }
+            });
 
-            for batch in batches:
-                print(batch.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
+            ws.on('open', async () => {
+              console.log('WebSocket connection established!');
 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
+              // Read audio file
+              const audioData = fs.readFileSync('audio.wav');
 
-            batches = client.batches.list()
+              // Send audio in chunks with delay to simulate real-time
+              const chunkSize = 8192;
+              const bytesPerSecond = 16000 * 2;  // 16kHz * 2 bytes (16-bit)
+              const delayPerChunk = (chunkSize / bytesPerSecond) * 1000;  // Convert to ms
 
-            for batch in batches:
-                print(batch.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
+              for (let i = 0; i < audioData.length; i += chunkSize) {
+                const chunk = audioData.slice(i, i + chunkSize);
+                const base64Chunk = chunk.toString('base64');
+                ws.send(JSON.stringify({
+                  type: 'input_audio_buffer.append',
+                  audio: base64Chunk
+                }));
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+                // Simulate real-time streaming
+                if (i + chunkSize < audioData.length) {
+                  await new Promise(resolve => setTimeout(resolve, delayPerChunk));
+                }
+              }
 
-            const batches = await client.batches.list();
+              // Commit audio buffer
+              ws.send(JSON.stringify({
+                type: 'input_audio_buffer.commit'
+              }));
+            });
 
-            console.log(batches);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
+            ws.on('message', (data) => {
+              const message = JSON.parse(data.toString());
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
+              if (message.type === 'conversation.item.input_audio_transcription.delta') {
+                console.log(`Partial: ${message.delta}`);
+              } else if (message.type === 'conversation.item.input_audio_transcription.completed') {
+                console.log(`Final: ${message.transcript}`);
+                ws.close();
+              } else if (message.type === 'conversation.item.input_audio_transcription.failed') {
+                const errorMessage = message.error?.message ?? message.message ?? 'Unknown error';
+                console.error(`Error: ${errorMessage}`);
+                ws.close();
+              }
             });
 
-            const batches = await client.batches.list();
+            ws.on('error', (error) => {
+              console.error('WebSocket error:', error);
+            });
+      parameters:
+        - in: query
+          name: model
+          required: true
+          schema:
+            type: string
+            enum:
+              - openai/whisper-large-v3
+            default: openai/whisper-large-v3
+          description: The Whisper model to use for transcription
+        - in: query
+          name: input_audio_format
+          required: true
+          schema:
+            type: string
+            enum:
+              - pcm_s16le_16000
+            default: pcm_s16le_16000
+          description: Audio format specification. Currently supports 16-bit PCM at 16kHz sample rate.
+      responses:
+        '101':
+          description: |
+            Switching Protocols - WebSocket connection established successfully.
 
-            console.log(batches);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/batches" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
+            Error message format:
+            ```json
+            {
+              "type": "conversation.item.input_audio_transcription.failed",
+              "error": {
+                "message": "Error description",
+                "type": "invalid_request_error",
+                "param": null,
+                "code": "error_code"
+              }
+            }
+            ```
+  /deployments:
+    get:
+      description: Get a list of all deployments in your project
+      responses:
+        "200":
+          description: List of deployments
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/DeploymentListResponse"
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get the list of deployments
+      tags:
+        - Deployments
+    post:
+      description: Create a new deployment with specified configuration
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateDeploymentRequest"
+        description: Deployment configuration
+        required: true
       responses:
-        '200':
-          description: OK
+        "200":
+          description: Deployment created successfully
           content:
             application/json:
               schema:
-                type: array
-                items:
-                  $ref: '#/components/schemas/BatchJob'
-        '401':
-          description: Unauthorized
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "400":
+          description: Invalid request
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-    post:
-      tags: ['Batches']
-      summary: Create a batch job
-      description: Create a new batch job with the given input file and endpoint
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batch = client.batches.create_batch("file_id", endpoint="/v1/chat/completions")
-
-            print(batch.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batch = client.batches.create(input_file_id="file_id", endpoint="/v1/chat/completions")
-
-            print(batch.job)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const batch = await client.batches.create({
-              endpoint: "/v1/chat/completions",
-              input_file_id: "file-id",
-            });
-
-            console.log(batch);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const batch = await client.batches.create({
-              endpoint: "/v1/chat/completions",
-              input_file_id: "file-id",
-            });
-
-            console.log(batch);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/batches" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "endpoint": "/v1/chat/completions",
-                   "input_file_id": "file-id"
-                 }'
-      security:
-        - bearerAuth: []
+                type: object
+      summary: Create a new deployment
+      tags:
+        - Deployments
+  /deployments/{id}:
+    delete:
+      description: Delete an existing deployment
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Deployment deleted successfully
+          content:
+            application/json:
+              schema:
+                type: object
+        "404":
+          description: Deployment not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Delete a deployment
+      tags:
+        - Deployments
+    get:
+      description: Retrieve details of a specific deployment by its ID or name
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Deployment details
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "404":
+          description: Deployment not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get a deployment by ID or name
+      tags:
+        - Deployments
+    patch:
+      description: Update an existing deployment configuration
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
       requestBody:
-        required: true
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateBatchRequest'
+              $ref: "#/components/schemas/UpdateDeploymentRequest"
+        description: Updated deployment configuration
+        required: true
       responses:
-        '201':
-          description: Job created (potentially with warnings)
+        "200":
+          description: Deployment updated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchJobWithWarning'
-        '400':
-          description: Bad Request
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "400":
+          description: Invalid request
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '401':
-          description: Unauthorized
+                type: object
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '429':
-          description: Too Many Requests
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
+                type: object
+      summary: Update a deployment
+      tags:
+        - Deployments
+  /deployments/{id}/logs:
+    get:
+      description: Retrieve logs from a deployment, optionally filtered by replica ID.
+        Use follow=true to stream logs in real-time.
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+        - description: Replica ID to filter logs
+          in: query
+          name: replica_id
+          schema:
+            type: string
+        - description: Stream logs in real-time (ndjson format)
+          in: query
+          name: follow
+          schema:
+            type: boolean
+      responses:
+        "200":
+          description: Deployment logs
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/DeploymentLogs"
+        "404":
+          description: Deployment not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get logs for a deployment
+      tags:
+        - Deployments
+  /image-repositories:
+    get:
+      description: Retrieve all container image repositories available in your project
+      responses:
+        "200":
+          description: List of repositories
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/RepositoryListResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-
-  /batches/{id}:
+                type: object
+      summary: Get the list of image repositories in your project
+      tags:
+        - Images
+  /image-repositories/{id}/images:
     get:
-      tags: ['Batches']
-      summary: Get a batch job
-      description: Get details of a batch job by ID
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batch = client.batches.get_batch("batch_id")
-
-            print(batch)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batch = client.batches.retrieve("batch_id")
-
-            print(batch)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const batch = await client.batches.retrieve("batch-id");
-
-            console.log(batch);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const batch = await client.batches.retrieve("batch-id");
-
-            console.log(batch);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/batches/ID" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
+      description: Retrieve all container images (tags) available in a specific repository
       parameters:
-        - name: id
+        - description: Repository ID
           in: path
+          name: id
           required: true
-          description: Job ID
           schema:
             type: string
-          example: 'batch_job_abc123def456'
       responses:
-        '200':
-          description: OK
+        "200":
+          description: List of images
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchJob'
-        '400':
-          description: Bad Request
+                $ref: "#/components/schemas/ImageListResponse"
+        "404":
+          description: Repository not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '401':
-          description: Unauthorized
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '403':
-          description: Forbidden
+                type: object
+      summary: Get the list of images available under a repository
+      tags:
+        - Images
+  /secrets:
+    get:
+      description: Retrieve all secrets in your project
+      responses:
+        "200":
+          description: List of secrets
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '404':
-          description: Not Found
+                $ref: "#/components/schemas/ListSecretsResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
+                type: object
+      summary: Get the list of project secrets
+      tags:
+        - Secrets
+    post:
+      description: Create a new secret to store sensitive configuration values
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateSecretRequest"
+        description: Secret configuration
+        required: true
+      responses:
+        "200":
+          description: Secret created successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-  /batches/{id}/cancel:
-    post:
-      tags: ['Batches']
-      summary: Cancel a batch job
-      description: Cancel a batch job by ID
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batch = client.batches.cancel("batch_id")
-
-            print(batch)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batch = client.batches.cancel("batch_id")
-
-            print(batch)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const batch = await client.batches.cancel("batch-id");
-
-            console.log(batch);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const batch = await client.batches.cancel("batch-id");
-
-            console.log(batch);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/batches/ID/cancel" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
+                $ref: "#/components/schemas/SecretResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Create a new secret
+      tags:
+        - Secrets
+  /secrets/{id}:
+    delete:
+      description: Delete an existing secret
       parameters:
-        - name: id
+        - description: Secret ID or name
           in: path
+          name: id
           required: true
-          description: Job ID
           schema:
             type: string
-          example: 'batch_job_abc123def456'
       responses:
-        '200':
-          description: OK
+        "200":
+          description: Secret deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchJob'
-        '400':
-          description: Bad Request
+                type: object
+        "404":
+          description: Secret not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '401':
-          description: Unauthorized
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '403':
-          description: Forbidden
+                type: object
+      summary: Delete a secret
+      tags:
+        - Secrets
+    get:
+      description: Retrieve details of a specific secret by its ID or name
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Secret details
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '404':
-          description: Not Found
+                $ref: "#/components/schemas/SecretResponseItem"
+        "404":
+          description: Secret not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-  /evaluation:
-    post:
+                type: object
+      summary: Get a secret by ID or name
       tags:
-        - evaluation
-      summary: Create an evaluation job
-      operationId: createEvaluationJob
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.evaluation.create(
-                type="classify",
-                judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo",
-                judge_system_template="You are an expert evaluator...",
-                input_data_file_path="file-abc123",
-                labels=["good", "bad"],
-                pass_labels=["good"],
-                model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
-            )
-
-            print(response.workflow_id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.evals.create(
-                type="classify",
-                parameters=ParametersEvaluationClassifyParameters(
-                    judge=ParametersEvaluationClassifyParametersJudge(
-                        model="meta-llama/Llama-3.1-70B-Instruct-Turbo",
-                        model_source="serverless",
-                        system_template="You are an expert evaluator...",
-                    ),
-                    input_data_file_path="file-abc123",
-                    labels=["good", "bad"],
-                    pass_labels=["good"],
-                    model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
-                )
-            )
-
-            print(response.workflow_id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.evals.create({
-              type: 'classify',
-              parameters: {
-                judge: {
-                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
-                  model_source: 'serverless',
-                  system_template: 'You are an expert evaluator...',
-                },
-                input_data_file_path: 'file-abc123',
-                labels: ['good', 'bad'],
-                pass_labels: ['good'],
-                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
-              },
-            });
-
-            console.log(response.workflow_id);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.evals.create({
-              type: 'classify',
-              parameters: {
-                judge: {
-                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
-                  model_source: 'serverless',
-                  system_template: 'You are an expert evaluator...',
-                },
-                input_data_file_path: 'file-abc123',
-                labels: ['good', 'bad'],
-                pass_labels: ['good'],
-                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
-              },
-            });
-
-            console.log(response.workflow_id);
-
-
+        - Secrets
+    patch:
+      description: Update an existing secret's value or metadata
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
       requestBody:
-        required: true
         content:
           application/json:
             schema:
-              $ref: "#/components/schemas/EvaluationTypedRequest"
+              $ref: "#/components/schemas/UpdateSecretRequest"
+        description: Updated secret configuration
+        required: true
       responses:
         "200":
-          description: "Evaluation job created successfully"
+          description: Secret updated successfully
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/EvaluationResponse"
+                $ref: "#/components/schemas/SecretResponseItem"
         "400":
-          description: "Invalid request format"
+          description: Invalid request
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
+                type: object
+        "404":
+          description: Secret not found
+          content:
+            application/json:
+              schema:
+                type: object
         "500":
-          description: "Failed to create evaluation job"
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-    get:
+                type: object
+      summary: Update a secret
       tags:
-        - evaluation
-      summary: Get all evaluation jobs
-      operationId: getAllEvaluationJobs
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            jobs = client.evaluation.list()
-
-            for job in jobs:
-                print(job.workflow_id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.evals.list()
-
-            for job in response:
-                print(job.workflow_id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.evals.list();
-
-            for (const job of response) {
-              console.log(job.workflow_id);
-            }
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.evals.list();
-
-            for (const job of response) {
-              console.log(job.workflow_id);
-            }
+        - Secrets
+  /storage/{filename}:
+    get:
+      description: Download a file by redirecting to a signed URL
       parameters:
-        - name: status
-          in: query
-          required: false
-          schema:
-            type: string
-            default: "pending"
-        - name: limit
-          in: query
-          required: false
-          schema:
-            type: integer
-            default: 10
-        - name: userId
-          in: query
-          required: false
-          description: "Admin users can specify a user ID to filter jobs. Pass empty string to get all jobs."
+        - description: Filename
+          in: path
+          name: filename
+          required: true
           schema:
             type: string
+      responses:
+        "307":
+          description: Redirect to signed download URL
+          content:
+            application/json:
+              schema:
+                type: string
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "404":
+          description: File not found
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Download a file
+      tags:
+        - files
+  /storage/volumes:
+    get:
+      description: Retrieve all volumes in your project
       responses:
         "200":
-          description: "evaluation jobs retrieved successfully"
+          description: List of volumes
           content:
             application/json:
               schema:
-                type: array
-                items:
-                  $ref: "#/components/schemas/EvaluationJob"
+                $ref: "#/components/schemas/ListVolumesResponse"
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get the list of project volumes
+      tags:
+        - Volumes
+    post:
+      description: Create a new volume to preload files in deployments
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateVolumeRequest"
+        description: Volume configuration
+        required: true
+      responses:
+        "200":
+          description: Volume created successfully
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/VolumeResponseItem"
         "400":
-          description: "Invalid request format"
+          description: Invalid request
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
+                type: object
         "500":
-          description: "Error retrieving jobs from manager"
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-  /evaluation/model-list:
-    get:
+                type: object
+      summary: Create a new volume
       tags:
-        - evaluation
-      summary: Get model list
-      operationId: getModelList
+        - Volumes
+  /storage/volumes/{id}:
+    delete:
+      description: Delete an existing volume
       parameters:
-        - name: model_source
-          in: query
-          required: false
+        - description: Volume ID or name
+          in: path
+          name: id
+          required: true
           schema:
             type: string
-            default: "all"
       responses:
         "200":
-          description: "Model list retrieved successfully"
+          description: Volume deleted successfully
           content:
             application/json:
               schema:
                 type: object
-                properties:
-                  model_list:
-                    type: array
-                    items:
-                      type: string
-                      description: "The name of the model"
-        "400":
-          description: "Invalid request format"
+        "404":
+          description: Volume not found
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
+                type: object
         "500":
-          description: "Error retrieving model list"
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-  /evaluation/{id}:
-    get:
+                type: object
+      summary: Delete a volume
       tags:
-        - evaluation
-      summary: Get evaluation job details
-      operationId: getEvaluationJobDetails
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.evaluation.retrieve('eval_id')
-
-            print(response)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.evals.retrieve('eval_id')
-
-            print(response)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.evals.retrieve('eval_id');
-
-            console.log(response);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.evals.retrieve('eval_id');
-
-            console.log(response);
+        - Volumes
+    get:
+      description: Retrieve details of a specific volume by its ID or name
       parameters:
-        - name: id
+        - description: Volume ID or name
           in: path
+          name: id
           required: true
           schema:
             type: string
       responses:
         "200":
-          description: "Evaluation job details retrieved successfully"
+          description: Volume details
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/EvaluationJob"
+                $ref: "#/components/schemas/VolumeResponseItem"
         "404":
-          description: "Evaluation job not found"
+          description: Volume not found
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
+                type: object
         "500":
-          description: "Failed to get evaluation job"
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-
-  /evaluation/{id}/status:
-    get:
+                type: object
+      summary: Get a volume by ID or name
       tags:
-        - evaluation
-      summary: Get evaluation job status and results
-      operationId: getEvaluationJobStatusAndResults
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.evaluation.status('eval_id')
-
-            print(response.status)
-            print(response.results)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.evals.status('eval_id')
-
-            print(response.status)
-            print(response.results)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.evals.status('eval_id');
-
-            console.log(response.status);
-            console.log(response.results);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.evals.status('eval_id');
-
-            console.log(response.status);
-            console.log(response.results);
+        - Volumes
+    patch:
+      description: Update an existing volume's configuration or contents
       parameters:
-        - name: id
+        - description: Volume ID or name
           in: path
+          name: id
           required: true
           schema:
             type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/UpdateVolumeRequest"
+        description: Updated volume configuration
+        required: true
       responses:
         "200":
-          description: "Evaluation job status and results retrieved successfully"
+          description: Volume updated successfully
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/VolumeResponseItem"
+        "400":
+          description: Invalid request
           content:
             application/json:
               schema:
                 type: object
-                properties:
-                  status:
-                    type: string
-                    description: "The status of the evaluation job"
-                    enum: ["completed", "error", "user_error", "running", "queued", "pending"]
-                  results:
-                    description: "The results of the evaluation job"
-                    oneOf:
-                      - $ref: "#/components/schemas/EvaluationClassifyResults"
-                      - $ref: "#/components/schemas/EvaluationScoreResults"
-                      - $ref: "#/components/schemas/EvaluationCompareResults"
         "404":
-          description: "Evaluation job not found"
+          description: Volume not found
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
+                type: object
         "500":
-          description: "Failed to get evaluation job"
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-
-  /realtime:
-    get:
-      tags: ['Audio']
-      summary: Real-time audio transcription via WebSocket
-      description: |
-        Establishes a WebSocket connection for real-time audio transcription. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/realtime) for bidirectional streaming communication.
-
-        **Connection Setup:**
-        - Protocol: WebSocket (wss://)
-        - Authentication: Pass API key as Bearer token in Authorization header
-        - Parameters: Sent as query parameters (model, input_audio_format)
-
-        **Client Events:**
-        - `input_audio_buffer.append`: Send audio chunks as base64-encoded data
-          ```json
-          {
-            "type": "input_audio_buffer.append",
-            "audio": "<base64_encoded_audio_chunk>"
-          }
-          ```
-        - `input_audio_buffer.commit`: Signal end of audio stream
-          ```json
-          {
-            "type": "input_audio_buffer.commit"
-          }
-          ```
-
-        **Server Events:**
-        - `session.created`: Initial session confirmation (sent first)
-          ```json
-          {
-            "type": "session.created",
-            "session": {
-              "id": "session-id",
-              "object": "realtime.session",
-              "modalities": ["audio"],
-              "model": "openai/whisper-large-v3"
-            }
-          }
-          ```
-        - `conversation.item.input_audio_transcription.delta`: Partial transcription results
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.delta",
-            "delta": "The quick brown"
-          }
-          ```
-        - `conversation.item.input_audio_transcription.completed`: Final transcription
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.completed",
-            "transcript": "The quick brown fox jumps over the lazy dog"
-          }
-          ```
-        - `conversation.item.input_audio_transcription.failed`: Error occurred
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.failed",
-            "error": {
-              "message": "Error description",
-              "type": "invalid_request_error",
-              "param": null,
-              "code": "invalid_api_key"
-            }
-          }
-          ```
-
-        **Error Codes:**
-        - `invalid_api_key`: Invalid API key provided (401)
-        - `missing_api_key`: Authorization header missing (401)
-        - `model_not_available`: Invalid or unavailable model (400)
-        - Unsupported audio format errors (400)
-
-      operationId: realtime-transcription
-      x-codeSamples:
-        - lang: Python
-          label: Python WebSocket Client
-          source: |
-            import asyncio
-            import websockets
-            import json
-            import base64
-            import os
-
-            async def transcribe_audio():
-                api_key = os.environ.get("TOGETHER_API_KEY")
-                url = "wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000"
-
-                headers = {
-                    "Authorization": f"Bearer {api_key}"
-                }
-
-                async with websockets.connect(url, additional_headers=headers) as ws:
-                    # Read audio file
-                    with open("audio.wav", "rb") as f:
-                        audio_data = f.read()
-
-                    # Send audio in chunks with delay to simulate real-time
-                    chunk_size = 8192
-                    bytes_per_second = 16000 * 2  # 16kHz * 2 bytes (16-bit)
-                    delay_per_chunk = chunk_size / bytes_per_second
-
-                    for i in range(0, len(audio_data), chunk_size):
-                        chunk = audio_data[i:i+chunk_size]
-                        base64_chunk = base64.b64encode(chunk).decode('utf-8')
-                        await ws.send(json.dumps({
-                            "type": "input_audio_buffer.append",
-                            "audio": base64_chunk
-                        }))
-                        # Simulate real-time streaming
-                        if i + chunk_size < len(audio_data):
-                            await asyncio.sleep(delay_per_chunk)
-
-                    # Commit the audio buffer
-                    await ws.send(json.dumps({
-                        "type": "input_audio_buffer.commit"
-                    }))
-
-                    # Receive transcription results
-                    async for message in ws:
-                        data = json.loads(message)
-                        if data["type"] == "conversation.item.input_audio_transcription.delta":
-                            print(f"Partial: {data['delta']}")
-                        elif data["type"] == "conversation.item.input_audio_transcription.completed":
-                            print(f"Final: {data['transcript']}")
-                            break
-                        elif data["type"] == "conversation.item.input_audio_transcription.failed":
-                            error = data.get("error", {})
-                            print(f"Error: {error.get('message')}")
-                            break
-
-            asyncio.run(transcribe_audio())
-        - lang: JavaScript
-          label: Node.js WebSocket Client
-          source: |
-            import WebSocket from 'ws';
-            import fs from 'fs';
-
-            const apiKey = process.env.TOGETHER_API_KEY;
-            const url = 'wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000';
-
-            const ws = new WebSocket(url, {
-              headers: {
-                'Authorization': `Bearer ${apiKey}`
-              }
-            });
-
-            ws.on('open', async () => {
-              console.log('WebSocket connection established!');
-
-              // Read audio file
-              const audioData = fs.readFileSync('audio.wav');
-
-              // Send audio in chunks with delay to simulate real-time
-              const chunkSize = 8192;
-              const bytesPerSecond = 16000 * 2;  // 16kHz * 2 bytes (16-bit)
-              const delayPerChunk = (chunkSize / bytesPerSecond) * 1000;  // Convert to ms
-
-              for (let i = 0; i < audioData.length; i += chunkSize) {
-                const chunk = audioData.slice(i, i + chunkSize);
-                const base64Chunk = chunk.toString('base64');
-                ws.send(JSON.stringify({
-                  type: 'input_audio_buffer.append',
-                  audio: base64Chunk
-                }));
-
-                // Simulate real-time streaming
-                if (i + chunkSize < audioData.length) {
-                  await new Promise(resolve => setTimeout(resolve, delayPerChunk));
-                }
-              }
-
-              // Commit audio buffer
-              ws.send(JSON.stringify({
-                type: 'input_audio_buffer.commit'
-              }));
-            });
-
-            ws.on('message', (data) => {
-              const message = JSON.parse(data.toString());
-
-              if (message.type === 'conversation.item.input_audio_transcription.delta') {
-                console.log(`Partial: ${message.delta}`);
-              } else if (message.type === 'conversation.item.input_audio_transcription.completed') {
-                console.log(`Final: ${message.transcript}`);
-                ws.close();
-              } else if (message.type === 'conversation.item.input_audio_transcription.failed') {
-                const errorMessage = message.error?.message ?? message.message ?? 'Unknown error';
-                console.error(`Error: ${errorMessage}`);
-                ws.close();
-              }
-            });
-
-            ws.on('error', (error) => {
-              console.error('WebSocket error:', error);
-            });
-      parameters:
-        - in: query
-          name: model
-          required: true
-          schema:
-            type: string
-            enum:
-              - openai/whisper-large-v3
-            default: openai/whisper-large-v3
-          description: The Whisper model to use for transcription
-        - in: query
-          name: input_audio_format
-          required: true
-          schema:
-            type: string
-            enum:
-              - pcm_s16le_16000
-            default: pcm_s16le_16000
-          description: Audio format specification. Currently supports 16-bit PCM at 16kHz sample rate.
-      responses:
-        '101':
-          description: |
-            Switching Protocols - WebSocket connection established successfully.
-
-            Error message format:
-            ```json
-            {
-              "type": "conversation.item.input_audio_transcription.failed",
-              "error": {
-                "message": "Error description",
-                "type": "invalid_request_error",
-                "param": null,
-                "code": "error_code"
-              }
-            }
-            ```
+                type: object
+      summary: Update a volume
+      tags:
+        - Volumes
+  
 
 components:
   securitySchemes:

From 14c3a5c652399b8c27428bfefcf6cc0c47d5db4e Mon Sep 17 00:00:00 2001
From: Blaine Kasten <blainekasten@gmail.com>
Date: Wed, 17 Dec 2025 10:42:32 -0600
Subject: [PATCH 3/4] further simplify

---
 openapi.yaml | 7945 ++++++++++++++++++++++++--------------------------
 1 file changed, 3889 insertions(+), 4056 deletions(-)

diff --git a/openapi.yaml b/openapi.yaml
index ee16ea7..ae7b6e2 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -5918,1316 +5918,1589 @@ components:
       x-default: default
 
   schemas:
-    api_v1.SignedURLResponse:
+    ListVoicesResponse:
+      description: Response containing a list of models and their available voices.
+      type: object
+      required: ['data']
       properties:
-        url:
-          type: string
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelVoices'
+
+    ModelVoices:
+      description: Represents a model with its available voices.
       type: object
-    files.AbortMultiPartRequest:
+      required: ['model', 'voices']
       properties:
-        filename:
-          description: Filename is the name of the file to upload. Can contain
-            alphanumeric characters, underscores, hyphens, spaces, periods, and
-            forward slashes with an optional file extension (e.g.,
-            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
-          type: string
-        upload_id:
-          description: UploadID is the unique identifier returned from the multi-part
-            upload initialization. Aborting will discard all uploaded parts
+        model:
           type: string
-      required:
-        - filename
-        - upload_id
+
+        voices:
+          type: array
+          items:
+            type: object
+            required: ['id', 'name']
+            properties:
+              id:
+                type: string
+              name:
+                type: string
+
+    ListAvailibilityZonesResponse:
+      description: List of unique availability zones
       type: object
-    files.CompleteMultiPartRequest:
+      required: ['avzones']
       properties:
-        filename:
-          description: Filename is the name of the file to upload. Can contain
-            alphanumeric characters, underscores, hyphens, spaces, periods, and
-            forward slashes with an optional file extension (e.g.,
-            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
-          type: string
-        parts:
-          description: Parts is the list of successfully uploaded parts with their ETags.
-            Must include all parts in order
-          items:
-            $ref: "#/components/schemas/files.CompletedPart"
-          minItems: 1
+        avzones:
           type: array
-        upload_id:
-          description: UploadID is the unique identifier returned from the multi-part
-            upload initialization
-          type: string
-      required:
-        - filename
-        - parts
-        - upload_id
+          items:
+            type: string
+
+    RerankRequest:
       type: object
-    files.CompleteUploadResponse:
       properties:
-        completed_at:
-          description: CompletedAt is the timestamp when the upload was completed
+        model:
           type: string
-        path:
-          description: Path is the storage path where the uploaded file can be accessed
+          description: >
+            The model to be used for the rerank request.<br>
+            <br>
+            [See all of Together AI's rerank models](https://docs.together.ai/docs/serverless-models#rerank-models)
+          example: Salesforce/Llama-Rank-V1
+          anyOf:
+            - type: string
+              enum:
+                - Salesforce/Llama-Rank-v1
+            - type: string
+
+        query:
           type: string
-        size:
-          description: Size is the total size of the uploaded file in bytes
+          description: The search query to be used for ranking.
+          example: What animals can I find near Peru?
+        documents:
+          description: List of documents, which can be either strings or objects.
+          oneOf:
+            - type: array
+              items:
+                type: object
+                additionalProperties: true
+            - type: array
+              items:
+                type: string
+                example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
+          example:
+            - {
+                'title': 'Llama',
+                'text': 'The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era.',
+              }
+            - {
+                'title': 'Panda',
+                'text': 'The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China.',
+              }
+            - {
+                'title': 'Guanaco',
+                'text': 'The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations.',
+              }
+            - {
+                'title': 'Wild Bactrian camel',
+                'text': 'The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia.',
+              }
+        top_n:
           type: integer
+          description: The number of top results to return.
+          example: 2
+        return_documents:
+          type: boolean
+          description: Whether to return supplied documents with the response.
+          example: true
+        rank_fields:
+          type: array
+          items:
+            type: string
+          description: List of keys in the JSON Object document to rank by. Defaults to use all supplied keys for ranking.
+          example: ['title', 'text']
+      required:
+        - model
+        - query
+        - documents
+      additionalProperties: false
+
+    RerankResponse:
       type: object
-    files.CompletedPart:
+      required:
+        - object
+        - model
+        - results
       properties:
-        etag:
-          description: ETag is the entity tag returned by the storage service after
-            uploading this part. This is used to verify the part's integrity
+        object:
           type: string
-        part_number:
-          description: PartNumber is the sequential number of this part (starting from 1)
-          type: integer
-      type: object
-    files.FileRequest:
-      properties:
-        filename:
-          description: Filename is the name of the file to upload. Can contain
-            alphanumeric characters, underscores, hyphens, spaces, periods, and
-            forward slashes with an optional file extension (e.g.,
-            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
+          description: Object type
+          enum:
+            - rerank
+          example: rerank
+        id:
           type: string
-      required:
-        - filename
+          description: Request ID
+          example: 9dfa1a09-5ebc-4a40-970f-586cb8f4ae47
+        model:
+          type: string
+          description: The model to be used for the rerank request.
+          example: salesforce/turboranker-0.8-3778-6328
+        results:
+          type: array
+          items:
+            type: object
+            required: [index, relevance_score, document]
+            properties:
+              index:
+                type: integer
+              relevance_score:
+                type: number
+              document:
+                type: object
+                properties:
+                  text:
+                    type: string
+                    nullable: true
+          example:
+            - {
+                'index': 0,
+                'relevance_score': 0.29980177813003117,
+                'document':
+                  {
+                    'text': '{"title":"Llama","text":"The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."}',
+                  },
+              }
+            - {
+                'index': 2,
+                'relevance_score': 0.2752447527354349,
+                'document':
+                  {
+                    'text': '{"title":"Guanaco","text":"The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."}',
+                  },
+              }
+        usage:
+          $ref: '#/components/schemas/UsageData'
+          example:
+            {
+              'prompt_tokens': 1837,
+              'completion_tokens': 0,
+              'total_tokens': 1837,
+            }
+
+    ErrorData:
       type: object
-    files.InitiateMultiPartRequest:
-      properties:
-        filename:
-          description: Filename is the name of the file to upload. Can contain
-            alphanumeric characters, underscores, hyphens, spaces, periods, and
-            forward slashes with an optional file extension (e.g.,
-            "model_weights.bin", "data-2024.tar.gz", "path/to/file.bin")
-          type: string
-        parts_count:
-          description: PartsCount is the number of parts to split the file into for
-            parallel upload. Must be between 1 and 10,000. Use multi-part upload
-            for files larger than 100MB for better performance and reliability
-          maximum: 10000
-          minimum: 1
-          type: integer
       required:
-        - filename
-        - parts_count
+        - error
+      properties:
+        error:
+          type: object
+          properties:
+            message:
+              type: string
+              nullable: false
+            type:
+              type: string
+              nullable: false
+            param:
+              type: string
+              nullable: true
+              default: null
+            code:
+              type: string
+              nullable: true
+              default: null
+          required:
+            - type
+            - message
+
+    FinishReason:
+      type: string
+      enum:
+        - stop
+        - eos
+        - length
+        - tool_calls
+        - function_call
+
+    LogprobsPart:
       type: object
-    files.MultiPartInitResponse:
       properties:
-        part_upload_urls:
-          description: PartUploadURLs is the list of pre-signed URLs for uploading each
-            part in parallel
+        token_ids:
+          type: array
           items:
-            $ref: "#/components/schemas/files.PartUploadURL"
+            type: number
+          description: List of token IDs corresponding to the logprobs
+        tokens:
           type: array
-        path:
-          description: Path is the storage path where the file will be accessible after
-            upload completion
-          type: string
-        upload_id:
-          description: UploadID is the unique identifier for this multi-part upload
-            session. Use this when completing or aborting the upload
-          type: string
+          items:
+            type: string
+          description: List of token strings
+        token_logprobs:
+          type: array
+          items:
+            type: number
+          description: List of token log probabilities
+
+    PromptPart:
+      type: array
+      items:
+        type: object
+        properties:
+          text:
+            type: string
+            example: <s>[INST] What is the capital of France? [/INST]
+          logprobs:
+            $ref: '#/components/schemas/LogprobsPart'
+
+    InferenceWarning:
       type: object
-    files.PartUploadURL:
+      required:
+        - message
       properties:
-        headers:
-          additionalProperties:
-            type: string
-          description: Headers are the required HTTP headers to include when uploading
-            this part
-          type: object
-        method:
-          description: Method is the HTTP method to use for uploading this part (typically
-            "PUT")
-          type: string
-        part_number:
-          description: PartNumber is the sequential number identifying this part (starting
-            from 1)
-          type: integer
-        url:
-          description: URL is the pre-signed URL for uploading this specific part
+        message:
           type: string
+
+    UsageData:
       type: object
-    files.UploadResponse:
       properties:
-        path:
-          description: Path is the storage path where the file will be accessible after
-            upload (e.g., "user-files/model_weights.bin")
-          type: string
-        upload_url:
-          allOf:
-            - $ref: "#/components/schemas/files.UploadURL"
-          description: UploadURL contains the signed URL and metadata needed to upload the
-            file
-      type: object
-    files.UploadURL:
-      properties:
-        form_data:
-          additionalProperties:
-            type: string
-          description: FormData contains form fields required for multipart/form-data
-            uploads (if applicable)
-          type: object
-        headers:
-          additionalProperties:
+        prompt_tokens:
+          type: integer
+        completion_tokens:
+          type: integer
+        total_tokens:
+          type: integer
+      required:
+        - prompt_tokens
+        - completion_tokens
+        - total_tokens
+      nullable: true
+
+    CompletionChoicesData:
+      type: array
+      items:
+        type: object
+        properties:
+          text:
             type: string
-          description: Headers are the required HTTP headers to include in the upload
-            request
-          type: object
-        method:
-          description: Method is the HTTP method to use (typically "PUT" or "POST")
-          type: string
-        url:
-          description: URL is the pre-signed URL endpoint for uploading the file
-          type: string
+            example: The capital of France is Paris. It's located in the north-central part of the country and is one of the most populous and visited cities in the world, known for its iconic landmarks like the Eiffel Tower, Louvre Museum, Notre-Dame Cathedral, and more. Paris is also the capital of the Île-de-France region and is a major global center for art, fashion, gastronomy, and culture.
+          seed:
+            type: integer
+          finish_reason:
+            $ref: '#/components/schemas/FinishReason'
+          logprobs:
+            $ref: '#/components/schemas/LogprobsPart'
+
+    CompletionRequest:
       type: object
-    ContainerStatus:
+      required:
+        - model
+        - prompt
       properties:
-        finishedAt:
-          description: FinishedAt is the timestamp when the container finished execution
-            (if terminated)
-          type: string
-        message:
-          description: Message provides a human-readable message with details about the
-            container's status
-          type: string
-        name:
-          description: Name is the name of the container
-          type: string
-        reason:
-          description: Reason provides a brief machine-readable reason for the container's
-            current status
-          type: string
-        startedAt:
-          description: StartedAt is the timestamp when the container started execution
+        prompt:
           type: string
-        status:
-          description: Status is the current state of the container (e.g., "Running",
-            "Terminated", "Waiting")
+          description: A string providing context for the model to complete.
+          example: <s>[INST] What is the capital of France? [/INST]
+        model:
           type: string
-      type: object
-    CreateDeploymentRequest:
-      properties:
-        args:
-          description: Args overrides the container's CMD. Provide as an array of
-            arguments (e.g., ["python", "app.py"])
-          items:
-            type: string
+          description: >
+            The name of the model to query.<br>
+            <br>
+            [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models)
+          example: mistralai/Mixtral-8x7B-Instruct-v0.1
+          anyOf:
+            - type: string
+              enum:
+                - meta-llama/Llama-2-70b-hf
+                - mistralai/Mistral-7B-v0.1
+                - mistralai/Mixtral-8x7B-v0.1
+                - Meta-Llama/Llama-Guard-7b
+            - type: string
+        max_tokens:
+          type: integer
+          description: The maximum number of tokens to generate.
+        stop:
           type: array
-        autoscaling:
-          additionalProperties:
-            type: string
-          description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
-            "QueueBacklogPerWorker", "target": "10"} to scale based on queue
-            backlog'
-          type: object
-        command:
-          description: Command overrides the container's ENTRYPOINT. Provide as an array
-            (e.g., ["/bin/sh", "-c"])
+          description: A list of string sequences that will truncate (stop) inference text output. For example, "</s>" will stop generation as soon as the model generates the given token.
           items:
             type: string
-          type: array
-        cpu:
-          description: CPU is the number of CPU cores to allocate per container instance
-            (e.g., 0.1 = 100 milli cores)
-          minimum: 0.1
+        temperature:
           type: number
-        description:
-          description: Description is an optional human-readable description of your
-            deployment
-          type: string
-        environment_variables:
-          description: EnvironmentVariables is a list of environment variables to set in
-            the container. Each must have a name and either a value or
-            value_from_secret
-          items:
-            $ref: "#/components/schemas/v1.EnvironmentVariable"
-          type: array
-        gpu_count:
-          description: GPUCount is the number of GPUs to allocate per container instance.
-            Defaults to 0 if not specified
-          type: integer
-        gpu_type:
-          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
-          enum:
-            - h100-80gb
-            - " a100-80gb"
-          type: string
-        health_check_path:
-          description: HealthCheckPath is the HTTP path for health checks (e.g.,
-            "/health"). If set, the platform will check this endpoint to
-            determine container health
-          type: string
-        image:
-          description: Image is the container image to deploy from registry.together.ai.
-          type: string
-        max_replicas:
-          description: MaxReplicas is the maximum number of container instances that can
-            be scaled up to. If not set, will be set to MinReplicas
-          type: integer
-        memory:
-          description: Memory is the amount of RAM to allocate per container instance in
-            GiB (e.g., 0.5 = 512MiB)
-          minimum: 0.1
+          description: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. A value closer to 1 introduces more randomness in the output.
+          format: float
+        top_p:
           type: number
-        min_replicas:
-          description: MinReplicas is the minimum number of container instances to run.
-            Defaults to 1 if not specified
+          description: A percentage (also called the nucleus parameter) that's used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold below which all less likely tokens are filtered out. This technique helps maintain diversity and generate more fluent and natural-sounding text.
+          format: float
+        top_k:
           type: integer
-        name:
-          description: Name is the unique identifier for your deployment. Must contain
-            only alphanumeric characters, underscores, or hyphens (1-100
-            characters)
-          maxLength: 100
-          minLength: 1
-          type: string
-        port:
-          description: Port is the container port your application listens on (e.g., 8080
-            for web servers). Required if your application serves traffic
+          description: An integer that's used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.
+          format: int32
+        repetition_penalty:
+          type: number
+          description: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
+          format: float
+        stream:
+          type: boolean
+          description: 'If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results.'
+        logprobs:
           type: integer
-        storage:
-          description: Storage is the amount of ephemeral disk storage to allocate per
-            container instance (e.g., 10 = 10GiB)
+          minimum: 0
+          maximum: 20
+          description: An integer between 0 and 20 of the top k tokens to return log probabilities for at each generation step, instead of just the sampled token. Log probabilities help assess model confidence in token predictions.
+        echo:
+          type: boolean
+          description: If true, the response will contain the prompt. Can be used with `logprobs` to return prompt logprobs.
+        n:
           type: integer
-        termination_grace_period_seconds:
-          description: TerminationGracePeriodSeconds is the time in seconds to wait for
-            graceful shutdown before forcefully terminating the replica
+          description: The number of completions to generate for each prompt.
+          minimum: 1
+          maximum: 128
+        safety_model:
+          type: string
+          description: The name of the moderation model used to validate tokens. Choose from the available moderation models found [here](https://docs.together.ai/docs/inference-models#moderation-models).
+          example: 'safety_model_name'
+          anyOf:
+            - type: string
+              enum:
+                - Meta-Llama/Llama-Guard-7b
+            - type: string
+        min_p:
+          type: number
+          description: A number between 0 and 1 that can be used as an alternative to top-p and top-k.
+          format: float
+        presence_penalty:
+          type: number
+          description: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics.
+          format: float
+        frequency_penalty:
+          type: number
+          description: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned.
+          format: float
+        logit_bias:
+          type: object
+          additionalProperties:
+            type: number
+            format: float
+          description: Adjusts the likelihood of specific tokens appearing in the generated output.
+          example: { '1024': -10.5, '105': 21.4 }
+        seed:
           type: integer
-        volumes:
-          description: Volumes is a list of volume mounts to attach to the container. Each
-            mount must reference an existing volume by name
-          items:
-            $ref: "#/components/schemas/v1.VolumeMount"
-          type: array
-      required:
-        - gpu_type
-        - image
-        - name
+          description: Seed value for reproducibility.
+          example: 42
+    CompletionResponse:
       type: object
-    CreateSecretRequest:
       properties:
-        description:
-          description: Description is an optional human-readable description of the
-            secret's purpose (max 500 characters)
-          maxLength: 500
+        id:
           type: string
-        name:
-          description: Name is the unique identifier for the secret. Can contain
-            alphanumeric characters, underscores, hyphens, forward slashes, and
-            periods (1-100 characters)
-          maxLength: 100
-          minLength: 1
+        choices:
+          $ref: '#/components/schemas/CompletionChoicesData'
+        prompt:
+          $ref: '#/components/schemas/PromptPart'
+        usage:
+          $ref: '#/components/schemas/UsageData'
+        created:
+          type: integer
+        model:
           type: string
-        project_id:
-          description: ProjectID is ignored - the project is automatically determined from
-            your authentication
-          type: string
-        value:
-          description: Value is the sensitive data to store securely (e.g., API keys,
-            passwords, tokens). This value will be encrypted at rest
-          minLength: 1
+        object:
           type: string
+          enum:
+            - text.completion
       required:
-        - name
-        - value
+        - id
+        - choices
+        - usage
+        - created
+        - model
+        - object
+
+    CompletionStream:
+      oneOf:
+        - $ref: '#/components/schemas/CompletionEvent'
+        - $ref: '#/components/schemas/StreamSentinel'
+
+    CompletionEvent:
       type: object
-    CreateVolumeRequest:
+      required: [data]
       properties:
-        content:
-          allOf:
-            - $ref: "#/components/schemas/volumes.VolumeContent"
-          description: Content specifies the content configuration for this volume
-        name:
-          description: Name is the unique identifier for the volume within the project
+        data:
+          $ref: '#/components/schemas/CompletionChunk'
+
+    CompletionChunk:
+      type: object
+      required: [id, token, choices, usage, finish_reason]
+      properties:
+        id:
           type: string
-        type:
+        token:
+          $ref: '#/components/schemas/CompletionToken'
+        created:
+          type: integer
+        object:
+          type: string
+          enum:
+            - completion.chunk
+        choices:
+          title: CompletionChoices
+          type: array
+          items:
+            $ref: '#/components/schemas/CompletionChoice'
+        usage:
           allOf:
-            - $ref: "#/components/schemas/volumes.VolumeType"
-          description: Type is the volume type (currently only "readOnly" is supported)
-      required:
-        - content
-        - name
-        - type
+            - $ref: '#/components/schemas/UsageData'
+            - nullable: true
+        seed:
+          type: integer
+        finish_reason:
+          allOf:
+            - $ref: '#/components/schemas/FinishReason'
+            - nullable: true
+
+    CompletionChoice:
       type: object
-    DeploymentListResponse:
+      required: [index]
       properties:
-        data:
-          description: Data is the array of deployment items
-          items:
-            $ref: "#/components/schemas/v1.DeploymentResponseItem"
-          type: array
-        object:
-          description: Object is the type identifier for this response (always "list")
+        text:
           type: string
+        index:
+          type: integer
+        delta:
+          title: CompletionChoiceDelta
+          type: object
+          required: [role]
+          properties:
+            token_id:
+              type: integer
+            role:
+              type: string
+              enum: ['system', 'user', 'assistant', 'function', 'tool']
+            content:
+              type: string
+              nullable: true
+            reasoning:
+              type: string
+              nullable: true
+            tool_calls:
+              type: array
+              items:
+                $ref: '#/components/schemas/ToolChoice'
+            function_call:
+              type: object
+              deprecated: true
+              nullable: true
+              properties:
+                arguments:
+                  type: string
+                name:
+                  type: string
+              required:
+                - arguments
+                - name
+
+    CompletionToken:
       type: object
-    DeploymentLogs:
+      required: [id, text, logprob, special]
       properties:
-        lines:
-          items:
+        id:
+          type: integer
+        text:
+          type: string
+        logprob:
+          type: number
+        special:
+          type: boolean
+
+    ChatCompletionChoicesData:
+      type: array
+      items:
+        type: object
+        properties:
+          text:
             type: string
+          index:
+            type: integer
+          seed:
+            type: integer
+          finish_reason:
+            $ref: '#/components/schemas/FinishReason'
+          message:
+            $ref: '#/components/schemas/ChatCompletionMessage'
+          logprobs:
+            allOf:
+              - nullable: true
+              - $ref: '#/components/schemas/LogprobsPart'
+    ChatCompletionMessage:
+      type: object
+      required: [role, content]
+      properties:
+        content:
+          type: string
+          nullable: true
+        role:
+          type: string
+          enum: [assistant]
+        tool_calls:
           type: array
+          items:
+            $ref: '#/components/schemas/ToolChoice'
+        function_call:
+          type: object
+          deprecated: true
+          required: [arguments, name]
+          properties:
+            arguments:
+              type: string
+            name:
+              type: string
+        reasoning:
+          type: string
+          nullable: true
+    ChatCompletionTool:
       type: object
-    DeploymentResponseItem:
+      required: [type, function]
       properties:
-        args:
-          description: Args are the arguments passed to the container's command
+        type:
+          type: string
+          enum: ['function']
+        function:
+          type: object
+          required: [name]
+          properties:
+            description:
+              type: string
+            name:
+              type: string
+            parameters:
+              type: object
+              additionalProperties: true
+
+    ChatCompletionRequest:
+      type: object
+      required:
+        - model
+        - messages
+      properties:
+        messages:
+          type: array
+          description: A list of messages comprising the conversation so far.
           items:
-            type: string
+            $ref: '#/components/schemas/ChatCompletionMessageParam'
+        model:
+          description: >
+            The name of the model to query.<br>
+            <br>
+            [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models)
+          example: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+          anyOf:
+            - type: string
+              enum:
+                - Qwen/Qwen2.5-72B-Instruct-Turbo
+                - Qwen/Qwen2.5-7B-Instruct-Turbo
+                - meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+                - meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
+                - meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
+            - type: string
+        max_tokens:
+          type: integer
+          description: The maximum number of tokens to generate.
+        stop:
           type: array
-        autoscaling:
-          additionalProperties:
-            type: string
-          description: Autoscaling contains autoscaling configuration parameters for this
-            deployment
-          type: object
-        command:
-          description: Command is the entrypoint command run in the container
+          description: A list of string sequences that will truncate (stop) inference text output. For example, "</s>" will stop generation as soon as the model generates the given token.
           items:
             type: string
-          type: array
-        cpu:
-          description: CPU is the amount of CPU resource allocated to each replica in
-            cores (fractional value is allowed)
+        temperature:
           type: number
-        created_at:
-          description: CreatedAt is the ISO8601 timestamp when this deployment was created
-          type: string
-        description:
-          description: Description provides a human-readable explanation of the
-            deployment's purpose or content
-          type: string
-        desired_replicas:
-          description: DesiredReplicas is the number of replicas that the orchestrator is
-            targeting
+          description: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. A value closer to 1 introduces more randomness in the output.
+          format: float
+        top_p:
+          type: number
+          description: A percentage (also called the nucleus parameter) that's used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold below which all less likely tokens are filtered out. This technique helps maintain diversity and generate more fluent and natural-sounding text.
+          format: float
+        top_k:
           type: integer
-        environment_variables:
-          description: EnvironmentVariables is a list of environment variables set in the
-            container
-          items:
-            $ref: "#/components/schemas/v1.EnvironmentVariable"
-          type: array
-        gpu_count:
-          description: GPUCount is the number of GPUs allocated to each replica in this
-            deployment
-          type: integer
-        gpu_type:
-          description: GPUType specifies the type of GPU requested (if any) for this
-            deployment
-          enum:
-            - h100-80gb
-            - " a100-80gb"
-          type: string
-        health_check_path:
-          description: HealthCheckPath is the HTTP path used for health checks of the
-            application
-          type: string
-        id:
-          description: ID is the unique identifier of the deployment
-          type: string
-        image:
-          description: Image specifies the container image used for this deployment
+          description: An integer that's used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.
+          format: int32
+        context_length_exceeded_behavior:
           type: string
-        max_replicas:
-          description: MaxReplicas is the maximum number of replicas to run for this
-            deployment
-          type: integer
-        memory:
-          description: Memory is the amount of memory allocated to each replica in GiB
-            (fractional value is allowed)
+          enum: ['truncate', 'error']
+          default: 'error'
+          description: Defined the behavior of the API when max_tokens exceed the maximum context length of the model. When set to 'error', API will return 400 with appropriate error message. When set to 'truncate', override the max_tokens with maximum context length of the model.
+        repetition_penalty:
           type: number
-        min_replicas:
-          description: MinReplicas is the minimum number of replicas to run for this
-            deployment
-          type: integer
-        name:
-          description: Name is the name of the deployment
-          type: string
-        object:
-          description: Object is the type identifier for this response (always "deployment")
-          type: string
-        port:
-          description: Port is the container port that the deployment exposes
+          description: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
+        stream:
+          type: boolean
+          description: 'If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results.'
+        logprobs:
           type: integer
-        ready_replicas:
-          description: ReadyReplicas is the current number of replicas that are in the
-            Ready state
+          minimum: 0
+          maximum: 20
+          description: An integer between 0 and 20 of the top k tokens to return log probabilities for at each generation step, instead of just the sampled token. Log probabilities help assess model confidence in token predictions.
+        echo:
+          type: boolean
+          description: If true, the response will contain the prompt. Can be used with `logprobs` to return prompt logprobs.
+        n:
           type: integer
-        replica_events:
-          additionalProperties:
-            $ref: "#/components/schemas/v1.ReplicaEvent"
-          description: ReplicaEvents is a mapping of replica names or IDs to their status
-            events
+          description: The number of completions to generate for each prompt.
+          minimum: 1
+          maximum: 128
+        min_p:
+          type: number
+          description: A number between 0 and 1 that can be used as an alternative to top_p and top-k.
+          format: float
+        presence_penalty:
+          type: number
+          description: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics.
+          format: float
+        frequency_penalty:
+          type: number
+          description: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned.
+          format: float
+        logit_bias:
           type: object
-        status:
-          allOf:
-            - $ref: "#/components/schemas/v1.DeploymentStatus"
-          description: Status represents the overall status of the deployment (e.g.,
-            Updating, Scaling, Ready, Failed)
-          enum:
-            - Updating
-            - Scaling
-            - Ready
-            - Failed
-        storage:
-          description: Storage is the amount of storage (in MB or units as defined by the
-            platform) allocated to each replica
+          additionalProperties:
+            type: number
+            format: float
+          description: Adjusts the likelihood of specific tokens appearing in the generated output.
+          example: { '1024': -10.5, '105': 21.4 }
+        seed:
           type: integer
-        updated_at:
-          description: UpdatedAt is the ISO8601 timestamp when this deployment was last
-            updated
-          type: string
-        volumes:
-          description: Volumes is a list of volume mounts for this deployment
-          items:
-            $ref: "#/components/schemas/v1.VolumeMount"
+          description: Seed value for reproducibility.
+          example: 42
+        function_call:
+          oneOf:
+            - type: string
+              enum: [none, auto]
+            - type: object
+              required: [name]
+              properties:
+                name:
+                  type: string
+        response_format:
+          description: |
+            An object specifying the format that the model must output.
+
+            Setting to `{ "type": "json_schema", "json_schema": {...} }` enables
+            Structured Outputs which ensures the model will match your supplied JSON
+            schema. Learn more in the [Structured Outputs
+            guide](https://docs.together.ai/docs/json-mode).
+
+            Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+            ensures the message the model generates is valid JSON. Using `json_schema`
+            is preferred for models that support it.
+          discriminator:
+                  propertyName: type
+          anyOf:
+            - $ref: '#/components/schemas/ResponseFormatText'
+            - $ref: '#/components/schemas/ResponseFormatJsonSchema'
+            - $ref: '#/components/schemas/ResponseFormatJsonObject'
+        tools:
           type: array
-      type: object
-    DeploymentStatus:
-      enum:
-        - Updating
-        - Scaling
-        - Ready
-        - Failed
-      type: string
-      x-enum-varnames:
-        - DeploymentStatusUpdating
-        - DeploymentStatusScaling
-        - DeploymentStatusReady
-        - DeploymentStatusFailed
-    EnvironmentVariable:
-      properties:
-        name:
-          description: Name is the environment variable name (e.g., "DATABASE_URL"). Must
-            start with a letter or underscore, followed by letters, numbers, or
-            underscores
-          type: string
-        value:
-          description: Value is the plain text value for the environment variable. Use
-            this for non-sensitive values. Either Value or ValueFromSecret must
-            be set, but not both
+          description: A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.
+          items:
+            $ref: '#/components/schemas/ToolsPart'
+        tool_choice:
+          description: Controls which (if any) function is called by the model. By default uses `auto`, which lets the model pick between generating a message or calling a function.
+          oneOf:
+            - type: string
+              example: 'tool_name'
+            - $ref: '#/components/schemas/ToolChoice'
+        safety_model:
           type: string
-        value_from_secret:
-          description: ValueFromSecret references a secret by name or ID to use as the
-            value. Use this for sensitive values like API keys or passwords.
-            Either Value or ValueFromSecret must be set, but not both
+          description: The name of the moderation model used to validate tokens. Choose from the available moderation models found [here](https://docs.together.ai/docs/inference-models#moderation-models).
+          example: 'safety_model_name'
+        reasoning_effort:
           type: string
-      required:
-        - name
+          enum: ['low', 'medium', 'high']
+          description: Controls the level of reasoning effort the model should apply when generating responses. Higher values may result in more thoughtful and detailed responses but may take longer to generate.
+          example: 'medium'
+
+    ResponseFormatText:
       type: object
-    ImageListResponse:
+      title: Text
+      description: |
+        Default response format. Used to generate text responses.
       properties:
-        data:
-          description: Data is the array of image items
-          items:
-            $ref: "#/components/schemas/v1.ImageResponseItem"
-          type: array
-        object:
-          description: Object is the type identifier for this response (always "list")
+        type:
           type: string
+          description: The type of response format being defined. Always `text`.
+          enum:
+            - text
+          x-stainless-const: true
+      required:
+        - type
+    ResponseFormatJsonObject:
       type: object
-    ImageResponseItem:
+      title: JSON object
+      description: |
+        JSON object response format. An older method of generating JSON responses.
+        Using `json_schema` is recommended for models that support it. Note that the
+        model will not generate JSON without a system or user message instructing it
+        to do so.
       properties:
-        object:
-          description: Object is the type identifier for this response (always "image")
-          type: string
-        tag:
-          description: Tag is the image tag/version identifier (e.g., "latest", "v1.0.0")
-          type: string
-        url:
-          description: URL is the full registry URL for this image including tag (e.g.,
-            "registry.together.ai/project-id/repository:tag")
+        type:
           type: string
+          description: The type of response format being defined. Always `json_object`.
+          enum:
+            - json_object
+          x-stainless-const: true
+      required:
+        - type
+    ResponseFormatJsonSchema:
       type: object
-    KubernetesEvent:
+      title: JSON schema
+      description: |
+        JSON Schema response format. Used to generate structured JSON responses.
+        Learn more about [Structured Outputs](https://docs.together.ai/docs/json-mode).
       properties:
-        action:
-          description: Action is the action taken or reported by this event
-          type: string
-        count:
-          description: Count is the number of times this event has occurred
-          type: integer
-        first_seen:
-          description: FirstSeen is the timestamp when this event was first observed
-          type: string
-        last_seen:
-          description: LastSeen is the timestamp when this event was last observed
-          type: string
-        message:
-          description: Message is a human-readable description of the event
-          type: string
-        reason:
-          description: Reason is a brief machine-readable reason for this event (e.g.,
-            "Pulling", "Started", "Failed")
-          type: string
-      type: object
-    ListSecretsResponse:
-      properties:
-        data:
-          description: Data is the array of secret items
-          items:
-            $ref: "#/components/schemas/v1.SecretResponseItem"
-          type: array
-        object:
-          description: Object is the type identifier for this response (always "list")
+        type:
           type: string
+          description: The type of response format being defined. Always `json_schema`.
+          enum:
+            - json_schema
+          x-stainless-const: true
+        json_schema:
+          type: object
+          title: JSON schema
+          description: |
+            Structured Outputs configuration options, including a JSON Schema.
+          properties:
+            description:
+              type: string
+              description: |
+                A description of what the response format is for, used by the model to
+                determine how to respond in the format.
+            name:
+              type: string
+              description: |
+                The name of the response format. Must be a-z, A-Z, 0-9, or contain
+                underscores and dashes, with a maximum length of 64.
+            schema:
+              $ref: '#/components/schemas/ResponseFormatJsonSchemaSchema'
+            strict:
+              anyOf:
+                - type: boolean
+                  default: false
+                  description: |
+                    Whether to enable strict schema adherence when generating the output.
+                    If set to true, the model will always follow the exact schema defined
+                    in the `schema` field. Only a subset of JSON Schema is supported when
+                    `strict` is `true`. To learn more, read the [Structured Outputs
+                    guide](https://docs.together.ai/docs/json-mode).
+                - type: 'null'
+          required:
+            - name
+      required:
+        - type
+        - json_schema
+    ResponseFormatJsonSchemaSchema:
       type: object
-    ListVolumesResponse:
-      properties:
-        data:
-          description: Data is the array of volume items
-          items:
-            $ref: "#/components/schemas/v1.VolumeResponseItem"
-          type: array
-        object:
-          description: Object is the type identifier for this response (always "list")
-          type: string
+      title: JSON schema
+      description: |
+        The schema for the response format, described as a JSON Schema object.
+        Learn how to build JSON schemas [here](https://json-schema.org/).
+      additionalProperties: true
+
+    ChatCompletionMessageParam:
+      oneOf:
+        - $ref: '#/components/schemas/ChatCompletionSystemMessageParam'
+        - $ref: '#/components/schemas/ChatCompletionUserMessageParam'
+        - $ref: '#/components/schemas/ChatCompletionAssistantMessageParam'
+        - $ref: '#/components/schemas/ChatCompletionToolMessageParam'
+        - $ref: '#/components/schemas/ChatCompletionFunctionMessageParam'
+
+    # Start Message Params
+
+    ChatCompletionSystemMessageParam:
       type: object
-    ReplicaEvent:
+      required: [content, role]
       properties:
-        container_status:
-          allOf:
-            - $ref: "#/components/schemas/v1.ContainerStatus"
-          description: ContainerStatus provides detailed status information about the
-            container within this replica
-        events:
-          description: Events is a list of Kubernetes events related to this replica for
-            troubleshooting
-          items:
-            $ref: "#/components/schemas/v1.KubernetesEvent"
-          type: array
-        replica_completed_at:
-          description: ReplicaCompletedAt is the timestamp when the replica finished
-            execution
-          type: string
-        replica_marked_for_termination_at:
-          description: ReplicaMarkedForTerminationAt is the timestamp when the replica was
-            marked for termination
-          type: string
-        replica_ready_since:
-          description: ReplicaReadySince is the timestamp when the replica became ready to
-            serve traffic
-          type: string
-        replica_running_since:
-          description: ReplicaRunningSince is the timestamp when the replica entered the
-            running state
-          type: string
-        replica_started_at:
-          description: ReplicaStartedAt is the timestamp when the replica was created
-          type: string
-        replica_status:
-          description: ReplicaStatus is the current status of the replica (e.g.,
-            "Running", "Pending", "Failed")
-          type: string
-        replica_status_message:
-          description: ReplicaStatusMessage provides a human-readable message explaining
-            the replica's status
+        content:
           type: string
-        replica_status_reason:
-          description: ReplicaStatusReason provides a brief machine-readable reason for
-            the replica's status
+        role:
           type: string
-        scheduled_on_cluster:
-          description: ScheduledOnCluster identifies which cluster this replica is
-            scheduled on
+          enum: ['system']
+        name:
           type: string
+
+    ChatCompletionUserMessageParam:
       type: object
-    RepositoryListResponse:
+      required: [content, role]
       properties:
-        data:
-          description: Data is the array of repository items
-          items:
-            $ref: "#/components/schemas/v1.RepositoryResponseItem"
-          type: array
-        object:
-          description: Object is the type identifier for this response (always "list")
+        content:
+          $ref: '#/components/schemas/ChatCompletionUserMessageContent'
+        role:
+          type: string
+          enum: ['user']
+        name:
           type: string
+
+    ChatCompletionUserMessageContentString:
+      type: string
+      description: A plain text message.
+
+    ChatCompletionUserMessageContentMultimodal:
+      type: array
+      description: A structured message with mixed content types.
+      items:
+        type: object
+        oneOf:
+          - type: object
+            properties:
+              type:
+                type: string
+                enum:
+                  - text
+              text:
+                type: string
+            required:
+              - type
+              - text
+          - type: object
+            properties:
+              type:
+                type: string
+                enum:
+                  - image_url
+              image_url:
+                type: object
+                properties:
+                  url:
+                    type: string
+                    description: The URL of the image
+                required:
+                  - url
+          - type: object
+            title: Video
+            properties:
+              type:
+                type: string
+                enum:
+                  - video_url
+              video_url:
+                type: object
+                properties:
+                  url:
+                    type: string
+                    description: The URL of the video
+                required:
+                  - url
+            required:
+              - type
+              - video_url
+          - type: object
+            title: Audio
+            properties:
+              type:
+                type: string
+                enum:
+                  - audio_url
+              audio_url:
+                type: object
+                properties:
+                  url:
+                    type: string
+                    description: The URL of the audio
+                required:
+                  - url
+            required:
+              - type
+              - audio_url
+          - type: object
+            title: Input Audio
+            properties:
+              type:
+                type: string
+                enum:
+                  - input_audio
+              input_audio:
+                type: object
+                properties:
+                  data:
+                    type: string
+                    description: The base64 encoded audio data
+                  format:
+                    type: string
+                    description: The format of the audio data
+                    enum:
+                      - wav
+                required:
+                  - data
+                  - format
+            required:
+              - type
+              - input_audio
+
+    ChatCompletionUserMessageContent:
+      description: The content of the message, which can either be a simple string or a structured format.
+      oneOf:
+        - $ref: '#/components/schemas/ChatCompletionUserMessageContentString'
+        - $ref: '#/components/schemas/ChatCompletionUserMessageContentMultimodal'
+
+    ChatCompletionAssistantMessageParam:
       type: object
-    RepositoryResponseItem:
+      required: [role]
       properties:
-        id:
-          description: ID is the unique identifier for this repository (repository name
-            with slashes replaced by "___")
+        content:
           type: string
-        object:
-          description: Object is the type identifier for this response (always
-            "image-repository")
+          nullable: true
+        role:
           type: string
-        url:
-          description: URL is the full registry URL for this repository (e.g.,
-            "registry.together.ai/project-id/repository-name")
+          enum: ['assistant']
+        name:
           type: string
-      type: object
-    SecretResponseItem:
-      properties:
-        created_at:
-          description: CreatedAt is the ISO8601 timestamp when this secret was created
-          type: string
-        created_by:
-          description: CreatedBy is the identifier of the user who created this secret
-          type: string
-        description:
-          description: Description is a human-readable description of the secret's purpose
-          type: string
-        id:
-          description: ID is the unique identifier for this secret
-          type: string
-        last_updated_by:
-          description: LastUpdatedBy is the identifier of the user who last updated this
-            secret
-          type: string
-        name:
-          description: Name is the name/key of the secret
-          type: string
-        object:
-          description: Object is the type identifier for this response (always "secret")
-          type: string
-        updated_at:
-          description: UpdatedAt is the ISO8601 timestamp when this secret was last updated
-          type: string
-      type: object
-    UpdateDeploymentRequest:
-      properties:
-        args:
-          description: Args overrides the container's CMD. Provide as an array of
-            arguments (e.g., ["python", "app.py"])
-          items:
-            type: string
-          type: array
-        autoscaling:
-          additionalProperties:
-            type: string
-          description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
-            "QueueBacklogPerWorker", "target": "10"} to scale based on queue
-            backlog'
-          type: object
-        command:
-          description: Command overrides the container's ENTRYPOINT. Provide as an array
-            (e.g., ["/bin/sh", "-c"])
-          items:
-            type: string
+        tool_calls:
           type: array
-        cpu:
-          description: CPU is the number of CPU cores to allocate per container instance
-            (e.g., 0.1 = 100 milli cores)
-          minimum: 0.1
-          type: number
-        description:
-          description: Description is an optional human-readable description of your
-            deployment
-          type: string
-        environment_variables:
-          description: EnvironmentVariables is a list of environment variables to set in
-            the container. This will replace all existing environment variables
           items:
-            $ref: "#/components/schemas/v1.EnvironmentVariable"
-          type: array
-        gpu_count:
-          description: GPUCount is the number of GPUs to allocate per container instance
-          type: integer
-        gpu_type:
-          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
-          enum:
-            - h100-80gb
-            - " a100-80gb"
-          type: string
-        health_check_path:
-          description: HealthCheckPath is the HTTP path for health checks (e.g.,
-            "/health"). Set to empty string to disable health checks
+            $ref: '#/components/schemas/ToolChoice'
+        function_call:
+          type: object
+          deprecated: true
+          properties:
+            arguments:
+              type: string
+            name:
+              type: string
+          required: [arguments, name]
+
+    ChatCompletionFunctionMessageParam:
+      type: object
+      deprecated: true
+      required: [content, role, name]
+      properties:
+        role:
           type: string
-        image:
-          description: Image is the container image to deploy from registry.together.ai.
+          enum: ['function']
+        content:
           type: string
-        max_replicas:
-          description: MaxReplicas is the maximum number of replicas that can be scaled up
-            to.
-          type: integer
-        memory:
-          description: Memory is the amount of RAM to allocate per container instance in
-            GiB (e.g., 0.5 = 512MiB)
-          minimum: 0.1
-          type: number
-        min_replicas:
-          description: MinReplicas is the minimum number of replicas to run
-          type: integer
         name:
-          description: Name is the new unique identifier for your deployment. Must contain
-            only alphanumeric characters, underscores, or hyphens (1-100
-            characters)
-          maxLength: 100
-          minLength: 1
           type: string
-        port:
-          description: Port is the container port your application listens on (e.g., 8080
-            for web servers)
-          type: integer
-        storage:
-          description: Storage is the amount of ephemeral disk storage to allocate per
-            container instance (e.g., 10 = 10GiB)
-          type: integer
-        termination_grace_period_seconds:
-          description: TerminationGracePeriodSeconds is the time in seconds to wait for
-            graceful shutdown before forcefully terminating the replica
-          type: integer
-        volumes:
-          description: Volumes is a list of volume mounts to attach to the container. This
-            will replace all existing volumes
-          items:
-            $ref: "#/components/schemas/v1.VolumeMount"
-          type: array
+
+    ChatCompletionToolMessageParam:
       type: object
-    UpdateSecretRequest:
       properties:
-        description:
-          description: Description is an optional human-readable description of the
-            secret's purpose (max 500 characters)
-          maxLength: 500
-          type: string
         name:
-          description: Name is the new unique identifier for the secret. Can contain
-            alphanumeric characters, underscores, hyphens, forward slashes, and
-            periods (1-100 characters)
-          maxLength: 100
-          minLength: 1
-          type: string
-        project_id:
-          description: ProjectID is ignored - the project is automatically determined from
-            your authentication
           type: string
-        value:
-          description: Value is the new sensitive data to store securely. Updating this
-            will replace the existing secret value
-          minLength: 1
+        role:
           type: string
-      type: object
-    UpdateVolumeRequest:
-      properties:
+          enum: ['tool']
         content:
-          allOf:
-            - $ref: "#/components/schemas/volumes.VolumeContent"
-          description: Content specifies the new content that will be preloaded to this
-            volume
-        name:
-          description: Name is the new unique identifier for the volume within the project
-          type: string
-        type:
-          allOf:
-            - $ref: "#/components/schemas/volumes.VolumeType"
-          description: Type is the new volume type (currently only "readOnly" is supported)
-      type: object
-    VolumeMount:
-      properties:
-        mount_path:
-          description: MountPath is the path in the container where the volume will be
-            mounted (e.g., "/data")
           type: string
-        name:
-          description: Name is the name of the volume to mount. Must reference an existing
-            volume by name or ID
+        tool_call_id:
           type: string
-      required:
-        - mount_path
-        - name
+      required: [role, content, tool_call_id]
+
+    # End Message Params
+
+    ChatCompletionResponse:
       type: object
-    VolumeResponseItem:
       properties:
-        content:
-          allOf:
-            - $ref: "#/components/schemas/volumes.VolumeContent"
-          description: Content specifies the content that will be preloaded to this volume
-        created_at:
-          description: CreatedAt is the ISO8601 timestamp when this volume was created
-          type: string
         id:
-          description: ID is the unique identifier for this volume
           type: string
-        name:
-          description: Name is the name of the volume
+        choices:
+          $ref: '#/components/schemas/ChatCompletionChoicesData'
+        usage:
+          $ref: '#/components/schemas/UsageData'
+        created:
+          type: integer
+        model:
           type: string
         object:
-          description: Object is the type identifier for this response (always "volume")
-          type: string
-        type:
-          allOf:
-            - $ref: "#/components/schemas/volumes.VolumeType"
-          description: Type is the volume type (e.g., "readOnly")
-        updated_at:
-          description: UpdatedAt is the ISO8601 timestamp when this volume was last updated
           type: string
-      type: object
-    volumes.VolumeContent:
-      properties:
-        source_prefix:
-          description: SourcePrefix is the file path prefix for the content to be
-            preloaded into the volume
-          example: models/
-          type: string
-        type:
-          description: Type is the content type (currently only "files" is supported which
-            allows preloading files uploaded via Files API into the volume)
           enum:
-            - files
-          example: files
-          type: string
-      type: object
-    volumes.VolumeType:
-      enum:
-        - readOnly
-      type: string
-      x-enum-varnames:
-        - VolumeTypeReadOnly
-    ListVoicesResponse:
-      description: Response containing a list of models and their available voices.
+            - chat.completion
+        warnings:
+          type: array
+          items:
+            $ref: '#/components/schemas/InferenceWarning'
+      required: [choices, id, created, model, object]
+
+    ChatCompletionStream:
+      oneOf:
+        - $ref: '#/components/schemas/ChatCompletionEvent'
+        - $ref: '#/components/schemas/StreamSentinel'
+
+    ChatCompletionEvent:
       type: object
-      required: ['data']
+      required: [data]
       properties:
         data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ModelVoices'
+          $ref: '#/components/schemas/ChatCompletionChunk'
 
-    ModelVoices:
-      description: Represents a model with its available voices.
+    ChatCompletionChunk:
       type: object
-      required: ['model', 'voices']
+      required: [id, object, created, choices, model]
       properties:
-        model:
+        id:
           type: string
-
-        voices:
+        object:
+          type: string
+          enum:
+            - chat.completion.chunk
+        created:
+          type: integer
+        system_fingerprint:
+          type: string
+        model:
+          type: string
+          example: mistralai/Mixtral-8x7B-Instruct-v0.1
+        choices:
+          title: ChatCompletionChoices
           type: array
           items:
             type: object
-            required: ['id', 'name']
+            required: [index, delta, finish_reason]
             properties:
-              id:
-                type: string
-              name:
-                type: string
-
-    ListAvailibilityZonesResponse:
-      description: List of unique availability zones
-      type: object
-      required: ['avzones']
-      properties:
-        avzones:
+              index:
+                type: integer
+              finish_reason:
+                $ref: '#/components/schemas/FinishReason'
+                nullable: true
+              logprobs:
+                type: number
+                nullable: true
+              seed:
+                type: integer
+                nullable: true
+              delta:
+                title: ChatCompletionChoiceDelta
+                type: object
+                required: [role]
+                properties:
+                  token_id:
+                    type: integer
+                  role:
+                    type: string
+                    enum: ['system', 'user', 'assistant', 'function', 'tool']
+                  content:
+                    type: string
+                    nullable: true
+                  reasoning:
+                    type: string
+                    nullable: true
+                  tool_calls:
+                    type: array
+                    items:
+                      $ref: '#/components/schemas/ToolChoice'
+                  function_call:
+                    type: object
+                    deprecated: true
+                    nullable: true
+                    properties:
+                      arguments:
+                        type: string
+                      name:
+                        type: string
+                    required:
+                      - arguments
+                      - name
+        usage:
+          allOf:
+            - $ref: '#/components/schemas/UsageData'
+            - nullable: true
+        warnings:
           type: array
           items:
-            type: string
-
-    RerankRequest:
+            $ref: '#/components/schemas/InferenceWarning'
+    AudioSpeechRequest:
       type: object
+      required:
+        - model
+        - input
+        - voice
       properties:
         model:
-          type: string
           description: >
-            The model to be used for the rerank request.<br>
+            The name of the model to query.<br>
             <br>
-            [See all of Together AI's rerank models](https://docs.together.ai/docs/serverless-models#rerank-models)
-          example: Salesforce/Llama-Rank-V1
+            [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
+            The current supported tts models are:
+            - cartesia/sonic
+            - hexgrad/Kokoro-82M
+            - canopylabs/orpheus-3b-0.1-ft
+          example: canopylabs/orpheus-3b-0.1-ft
           anyOf:
             - type: string
               enum:
-                - Salesforce/Llama-Rank-v1
+                - cartesia/sonic
+                - hexgrad/Kokoro-82M
+                - canopylabs/orpheus-3b-0.1-ft
             - type: string
-
-        query:
+        input:
           type: string
-          description: The search query to be used for ranking.
-          example: What animals can I find near Peru?
-        documents:
-          description: List of documents, which can be either strings or objects.
-          oneOf:
-            - type: array
-              items:
-                type: object
-                additionalProperties: true
-            - type: array
-              items:
-                type: string
-                example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
-          example:
-            - {
-                'title': 'Llama',
-                'text': 'The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era.',
-              }
-            - {
-                'title': 'Panda',
-                'text': 'The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China.',
-              }
-            - {
-                'title': 'Guanaco',
-                'text': 'The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations.',
-              }
-            - {
-                'title': 'Wild Bactrian camel',
-                'text': 'The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia.',
-              }
-        top_n:
+          description: Input text to generate the audio for
+        voice:
+          description: >
+            The voice to use for generating the audio.
+            The voices supported are different for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is af_alloy and for cartesia/sonic, one of the voices supported is "friendly sidekick".
+            <br>
+            <br>
+            You can view the voices supported for each model using the /v1/voices endpoint sending the model name as the query parameter.
+            [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
+          type: string
+        response_format:
+          type: string
+          description: The format of audio output. Supported formats are mp3, wav, raw if streaming is false. If streaming is true, the only supported format is raw.
+          default: wav
+          enum:
+            - mp3
+            - wav
+            - raw
+        language:
+          type: string
+          description: Language of input text.
+          default: en
+          enum:
+            - en
+            - de
+            - fr
+            - es
+            - hi
+            - it
+            - ja
+            - ko
+            - nl
+            - pl
+            - pt
+            - ru
+            - sv
+            - tr
+            - zh
+        response_encoding:
+          type: string
+          description: Audio encoding of response
+          default: pcm_f32le
+          enum:
+            - pcm_f32le
+            - pcm_s16le
+            - pcm_mulaw
+            - pcm_alaw
+        sample_rate:
           type: integer
-          description: The number of top results to return.
-          example: 2
-        return_documents:
+          default: 44100
+          description: Sampling rate to use for the output audio. The default sampling rate for canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for cartesia/sonic is 44100.
+        stream:
           type: boolean
-          description: Whether to return supplied documents with the response.
-          example: true
-        rank_fields:
-          type: array
-          items:
-            type: string
-          description: List of keys in the JSON Object document to rank by. Defaults to use all supplied keys for ranking.
-          example: ['title', 'text']
-      required:
-        - model
-        - query
-        - documents
-      additionalProperties: false
+          default: false
+          description: 'If true, output is streamed for several characters at a time instead of waiting for the full response. The stream terminates with `data: [DONE]`. If false, return the encoded audio as octet stream'
 
-    RerankResponse:
+    AudioTranscriptionRequest:
       type: object
       required:
-        - object
-        - model
-        - results
+        - file
       properties:
-        object:
+        file:
+          oneOf:
+            - $ref: '#/components/schemas/AudioFileBinary'
+            - $ref: '#/components/schemas/AudioFileUrl'
+          description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac.
+        model:
           type: string
-          description: Object type
+          description: Model to use for transcription
+          default: openai/whisper-large-v3
           enum:
-            - rerank
-          example: rerank
-        id:
+            - openai/whisper-large-v3
+        language:
           type: string
-          description: Request ID
-          example: 9dfa1a09-5ebc-4a40-970f-586cb8f4ae47
-        model:
+          description: Optional ISO 639-1 language code. If `auto` is provided, language is auto-detected.
+          default: en
+          example: en
+        prompt:
           type: string
-          description: The model to be used for the rerank request.
-          example: salesforce/turboranker-0.8-3778-6328
-        results:
-          type: array
-          items:
-            type: object
-            required: [index, relevance_score, document]
-            properties:
-              index:
-                type: integer
-              relevance_score:
-                type: number
-              document:
-                type: object
-                properties:
-                  text:
-                    type: string
-                    nullable: true
-          example:
-            - {
-                'index': 0,
-                'relevance_score': 0.29980177813003117,
-                'document':
-                  {
-                    'text': '{"title":"Llama","text":"The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."}',
-                  },
-              }
-            - {
-                'index': 2,
-                'relevance_score': 0.2752447527354349,
-                'document':
-                  {
-                    'text': '{"title":"Guanaco","text":"The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."}',
-                  },
-              }
-        usage:
-          $ref: '#/components/schemas/UsageData'
-          example:
-            {
-              'prompt_tokens': 1837,
-              'completion_tokens': 0,
-              'total_tokens': 1837,
-            }
+          description: Optional text to bias decoding.
+        response_format:
+          type: string
+          description: The format of the response
+          default: json
+          enum:
+            - json
+            - verbose_json
+        temperature:
+          type: number
+          format: float
+          description: Sampling temperature between 0.0 and 1.0
+          default: 0.0
+          minimum: 0.0
+          maximum: 1.0
+        timestamp_granularities:
+          oneOf:
+            - type: string
+              enum:
+                - segment
+                - word
+            - type: array
+              items:
+                type: string
+                enum:
+                  - segment
+                  - word
+              uniqueItems: true
+              minItems: 1
+              maxItems: 2
+          description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels.
+          default: segment
+          example: ['word', 'segment']
+        diarize:
+          type: boolean
+          description: >
+            Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
+            In the response, in the words array, you will get the speaker id for each word.
+            In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
+            <br>
+            <br>
+            For eg -
+            ...
+            "speaker_segments": [
+              "speaker_id": "SPEAKER_00",
+              "start": 0,
+              "end": 30.02,
+              "words": [
+                {
+                  "id": 0,
+                  "word": "Tijana",
+                  "start": 0,
+                  "end": 11.475,
+                  "speaker_id": "SPEAKER_00"
+                },
+                ...
+          default: false
+        min_speakers:
+          type: integer
+          description: Minimum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
+        max_speakers:
+          type: integer
+          description: Maximum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
 
-    ErrorData:
+    AudioTranscriptionResponse:
+      oneOf:
+        - $ref: '#/components/schemas/AudioTranscriptionJsonResponse'
+        - $ref: '#/components/schemas/AudioTranscriptionVerboseJsonResponse'
+
+    AudioTranscriptionJsonResponse:
       type: object
       required:
-        - error
+        - text
       properties:
-        error:
-          type: object
-          properties:
-            message:
-              type: string
-              nullable: false
-            type:
-              type: string
-              nullable: false
-            param:
-              type: string
-              nullable: true
-              default: null
-            code:
-              type: string
-              nullable: true
-              default: null
-          required:
-            - type
-            - message
-
-    FinishReason:
-      type: string
-      enum:
-        - stop
-        - eos
-        - length
-        - tool_calls
-        - function_call
+        text:
+          type: string
+          description: The transcribed text
+          example: Hello, world!
 
-    LogprobsPart:
+    AudioTranscriptionVerboseJsonResponse:
       type: object
+      required:
+        - task
+        - language
+        - duration
+        - text
+        - segments
       properties:
-        token_ids:
+        task:
+          type: string
+          description: The task performed
+          enum:
+            - transcribe
+            - translate
+          example: transcribe
+        language:
+          type: string
+          description: The language of the audio
+          example: english
+        duration:
+          type: number
+          format: float
+          description: The duration of the audio in seconds
+          example: 3.5
+        text:
+          type: string
+          description: The transcribed text
+          example: Hello, world!
+        segments:
           type: array
           items:
-            type: number
-          description: List of token IDs corresponding to the logprobs
-        tokens:
+            $ref: '#/components/schemas/AudioTranscriptionSegment'
+          description: Array of transcription segments
+        words:
           type: array
           items:
-            type: string
-          description: List of token strings
-        token_logprobs:
+            $ref: '#/components/schemas/AudioTranscriptionWord'
+          description: Array of transcription words (only when timestamp_granularities includes 'word')
+        speaker_segments:
           type: array
           items:
-            type: number
-          description: List of token log probabilities
-
-    PromptPart:
-      type: array
-      items:
-        type: object
-        properties:
-          text:
-            type: string
-            example: <s>[INST] What is the capital of France? [/INST]
-          logprobs:
-            $ref: '#/components/schemas/LogprobsPart'
+            $ref: '#/components/schemas/AudioTranscriptionSpeakerSegment'
+          description: Array of transcription speaker segments (only when diarize is enabled)
 
-    InferenceWarning:
+    AudioTranscriptionSegment:
       type: object
       required:
-        - message
+        - id
+        - start
+        - end
+        - text
       properties:
-        message:
+        id:
+          type: integer
+          description: Unique identifier for the segment
+          example: 0
+        start:
+          type: number
+          format: float
+          description: Start time of the segment in seconds
+          example: 0.0
+        end:
+          type: number
+          format: float
+          description: End time of the segment in seconds
+          example: 3.5
+        text:
           type: string
+          description: The text content of the segment
+          example: Hello, world!
 
-    UsageData:
+    AudioTranscriptionWord:
       type: object
-      properties:
-        prompt_tokens:
-          type: integer
-        completion_tokens:
-          type: integer
-        total_tokens:
-          type: integer
       required:
-        - prompt_tokens
-        - completion_tokens
-        - total_tokens
-      nullable: true
-
-    CompletionChoicesData:
-      type: array
-      items:
-        type: object
-        properties:
-          text:
-            type: string
-            example: The capital of France is Paris. It's located in the north-central part of the country and is one of the most populous and visited cities in the world, known for its iconic landmarks like the Eiffel Tower, Louvre Museum, Notre-Dame Cathedral, and more. Paris is also the capital of the Île-de-France region and is a major global center for art, fashion, gastronomy, and culture.
-          seed:
-            type: integer
-          finish_reason:
-            $ref: '#/components/schemas/FinishReason'
-          logprobs:
-            $ref: '#/components/schemas/LogprobsPart'
+        - word
+        - start
+        - end
+      properties:
+        word:
+          type: string
+          description: The word
+          example: Hello
+        start:
+          type: number
+          format: float
+          description: Start time of the word in seconds
+          example: 0.0
+        end:
+          type: number
+          format: float
+          description: End time of the word in seconds
+          example: 0.5
+        speaker_id:
+          type: string
+          description: The speaker id for the word (only when diarize is enabled)
+          example: SPEAKER_00
 
-    CompletionRequest:
+    AudioTranscriptionSpeakerSegment:
       type: object
       required:
-        - model
-        - prompt
+        - speaker_id
+        - start
+        - end
+        - words
+        - text
+        - id
       properties:
-        prompt:
-          type: string
-          description: A string providing context for the model to complete.
-          example: <s>[INST] What is the capital of France? [/INST]
-        model:
+        speaker_id:
           type: string
-          description: >
-            The name of the model to query.<br>
-            <br>
-            [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models)
-          example: mistralai/Mixtral-8x7B-Instruct-v0.1
-          anyOf:
-            - type: string
-              enum:
-                - meta-llama/Llama-2-70b-hf
-                - mistralai/Mistral-7B-v0.1
-                - mistralai/Mixtral-8x7B-v0.1
-                - Meta-Llama/Llama-Guard-7b
-            - type: string
-        max_tokens:
-          type: integer
-          description: The maximum number of tokens to generate.
-        stop:
-          type: array
-          description: A list of string sequences that will truncate (stop) inference text output. For example, "</s>" will stop generation as soon as the model generates the given token.
-          items:
-            type: string
-        temperature:
-          type: number
-          description: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. A value closer to 1 introduces more randomness in the output.
-          format: float
-        top_p:
+          description: The speaker identifier
+          example: SPEAKER_00
+        start:
           type: number
-          description: A percentage (also called the nucleus parameter) that's used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold below which all less likely tokens are filtered out. This technique helps maintain diversity and generate more fluent and natural-sounding text.
           format: float
-        top_k:
-          type: integer
-          description: An integer that's used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.
-          format: int32
-        repetition_penalty:
+          description: Start time of the speaker segment in seconds
+          example: 0.0
+        end:
           type: number
-          description: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
           format: float
-        stream:
-          type: boolean
-          description: 'If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results.'
-        logprobs:
-          type: integer
-          minimum: 0
-          maximum: 20
-          description: An integer between 0 and 20 of the top k tokens to return log probabilities for at each generation step, instead of just the sampled token. Log probabilities help assess model confidence in token predictions.
-        echo:
-          type: boolean
-          description: If true, the response will contain the prompt. Can be used with `logprobs` to return prompt logprobs.
-        n:
-          type: integer
-          description: The number of completions to generate for each prompt.
-          minimum: 1
-          maximum: 128
-        safety_model:
+          description: End time of the speaker segment in seconds
+          example: 30.02
+        words:
+          type: array
+          items:
+            $ref: '#/components/schemas/AudioTranscriptionWord'
+          description: Array of words spoken by this speaker in this segment
+        text:
           type: string
-          description: The name of the moderation model used to validate tokens. Choose from the available moderation models found [here](https://docs.together.ai/docs/inference-models#moderation-models).
-          example: 'safety_model_name'
-          anyOf:
-            - type: string
-              enum:
-                - Meta-Llama/Llama-Guard-7b
-            - type: string
-        min_p:
-          type: number
-          description: A number between 0 and 1 that can be used as an alternative to top-p and top-k.
-          format: float
-        presence_penalty:
-          type: number
-          description: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics.
-          format: float
-        frequency_penalty:
-          type: number
-          description: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned.
-          format: float
-        logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-            format: float
-          description: Adjusts the likelihood of specific tokens appearing in the generated output.
-          example: { '1024': -10.5, '105': 21.4 }
-        seed:
+          description: The full text spoken by this speaker in this segment
+          example: "Hello, how are you doing today?"
+        id:
           type: integer
-          description: Seed value for reproducibility.
-          example: 42
-    CompletionResponse:
+          description: Unique identifier for the speaker segment
+          example: 1
+
+    AudioTranslationRequest:
       type: object
+      required:
+        - file
       properties:
-        id:
+        file:
+          oneOf:
+            - type: string
+              format: binary
+              description: Audio file to translate
+            - type: string
+              format: uri
+              description: Public HTTP/HTTPS URL to audio file
+          description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac.
+        model:
           type: string
-        choices:
-          $ref: '#/components/schemas/CompletionChoicesData'
+          description: Model to use for translation
+          default: openai/whisper-large-v3
+          enum:
+            - openai/whisper-large-v3
+        language:
+          type: string
+          description: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English.
+          default: en
+          example: en
         prompt:
-          $ref: '#/components/schemas/PromptPart'
-        usage:
-          $ref: '#/components/schemas/UsageData'
-        created:
-          type: integer
-        model:
           type: string
-        object:
+          description: Optional text to bias decoding.
+        response_format:
           type: string
+          description: The format of the response
+          default: json
           enum:
-            - text.completion
+            - json
+            - verbose_json
+        temperature:
+          type: number
+          format: float
+          description: Sampling temperature between 0.0 and 1.0
+          default: 0.0
+          minimum: 0.0
+          maximum: 1.0
+        timestamp_granularities:
+          oneOf:
+            - type: string
+              enum:
+                - segment
+                - word
+            - type: array
+              items:
+                type: string
+                enum:
+                  - segment
+                  - word
+              uniqueItems: true
+              minItems: 1
+              maxItems: 2
+          description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels.
+          default: segment
+          example: ['word', 'segment']
+
+    AudioTranslationResponse:
+      oneOf:
+        - $ref: '#/components/schemas/AudioTranslationJsonResponse'
+        - $ref: '#/components/schemas/AudioTranslationVerboseJsonResponse'
+
+    AudioTranslationJsonResponse:
+      type: object
       required:
-        - id
-        - choices
-        - usage
-        - created
-        - model
-        - object
+        - text
+      properties:
+        text:
+          type: string
+          description: The translated text
+          example: Hello, world!
 
-    CompletionStream:
+    AudioTranslationVerboseJsonResponse:
+      type: object
+      required:
+        - task
+        - language
+        - duration
+        - text
+        - segments
+      properties:
+        task:
+          type: string
+          description: The task performed
+          enum:
+            - transcribe
+            - translate
+          example: translate
+        language:
+          type: string
+          description: The target language of the translation
+          example: english
+        duration:
+          type: number
+          format: float
+          description: The duration of the audio in seconds
+          example: 3.5
+        text:
+          type: string
+          description: The translated text
+          example: Hello, world!
+        segments:
+          type: array
+          items:
+            $ref: '#/components/schemas/AudioTranscriptionSegment'
+          description: Array of translation segments
+        words:
+          type: array
+          items:
+            $ref: '#/components/schemas/AudioTranscriptionWord'
+          description: Array of translation words (only when timestamp_granularities includes 'word')
+
+    AudioSpeechStreamResponse:
       oneOf:
-        - $ref: '#/components/schemas/CompletionEvent'
+        - $ref: '#/components/schemas/AudioSpeechStreamEvent'
         - $ref: '#/components/schemas/StreamSentinel'
 
-    CompletionEvent:
+    AudioSpeechStreamEvent:
       type: object
       required: [data]
       properties:
         data:
-          $ref: '#/components/schemas/CompletionChunk'
+          $ref: '#/components/schemas/AudioSpeechStreamChunk'
 
-    CompletionChunk:
+    AudioSpeechStreamChunk:
       type: object
-      required: [id, token, choices, usage, finish_reason]
+      required: [object, model, b64]
       properties:
-        id:
-          type: string
-        token:
-          $ref: '#/components/schemas/CompletionToken'
-        created:
-          type: integer
         object:
           type: string
           enum:
-            - completion.chunk
-        choices:
-          title: CompletionChoices
-          type: array
-          items:
-            $ref: '#/components/schemas/CompletionChoice'
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/UsageData'
-            - nullable: true
-        seed:
-          type: integer
-        finish_reason:
-          allOf:
-            - $ref: '#/components/schemas/FinishReason'
-            - nullable: true
+            - audio.tts.chunk
+        model:
+          type: string
+          example: cartesia/sonic
+        b64:
+          type: string
+          description: base64 encoded audio stream
 
-    CompletionChoice:
+    StreamSentinel:
       type: object
-      required: [index]
+      required: [data]
+      properties:
+        data:
+          title: stream_signal
+          type: string
+          enum:
+            - '[DONE]'
+
+    ChatCompletionToken:
+      type: object
+      required: [id, text, logprob, special]
       properties:
+        id:
+          type: integer
         text:
           type: string
+        logprob:
+          type: number
+        special:
+          type: boolean
+
+    ChatCompletionChoice:
+      type: object
+      required: [index, delta, finish_reason]
+      properties:
         index:
           type: integer
+        finish_reason:
+          $ref: '#/components/schemas/FinishReason'
+        logprobs:
+          $ref: '#/components/schemas/LogprobsPart'
         delta:
-          title: CompletionChoiceDelta
+          title: ChatCompletionChoiceDelta
           type: object
           required: [role]
           properties:
@@ -7239,9 +7512,6 @@ components:
             content:
               type: string
               nullable: true
-            reasoning:
-              type: string
-              nullable: true
             tool_calls:
               type: array
               items:
@@ -7258,3396 +7528,2959 @@ components:
               required:
                 - arguments
                 - name
+            reasoning:
+              type: string
+              nullable: true
 
-    CompletionToken:
+    EmbeddingsRequest:
       type: object
-      required: [id, text, logprob, special]
+      required:
+        - model
+        - input
       properties:
-        id:
-          type: integer
-        text:
+        model:
           type: string
-        logprob:
-          type: number
-        special:
-          type: boolean
+          description: >
+            The name of the embedding model to use.<br>
+            <br>
+            [See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models)
+          example: togethercomputer/m2-bert-80M-8k-retrieval
+          anyOf:
+            - type: string
+              enum:
+                - WhereIsAI/UAE-Large-V1
+                - BAAI/bge-large-en-v1.5
+                - BAAI/bge-base-en-v1.5
+                - togethercomputer/m2-bert-80M-8k-retrieval
+            - type: string
+        input:
+          oneOf:
+            - type: string
+              description: A string providing the text for the model to embed.
+              example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
+            - type: array
+              items:
+                type: string
+                description: A string providing the text for the model to embed.
+                example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
+          example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
 
-    ChatCompletionChoicesData:
-      type: array
-      items:
-        type: object
-        properties:
-          text:
-            type: string
-          index:
-            type: integer
-          seed:
-            type: integer
-          finish_reason:
-            $ref: '#/components/schemas/FinishReason'
-          message:
-            $ref: '#/components/schemas/ChatCompletionMessage'
-          logprobs:
-            allOf:
-              - nullable: true
-              - $ref: '#/components/schemas/LogprobsPart'
-    ChatCompletionMessage:
+    EmbeddingsResponse:
       type: object
-      required: [role, content]
+      required:
+        - object
+        - model
+        - data
       properties:
-        content:
+        object:
           type: string
-          nullable: true
-        role:
+          enum:
+            - list
+        model:
           type: string
-          enum: [assistant]
-        tool_calls:
+        data:
           type: array
           items:
-            $ref: '#/components/schemas/ToolChoice'
-        function_call:
-          type: object
-          deprecated: true
-          required: [arguments, name]
-          properties:
-            arguments:
-              type: string
-            name:
-              type: string
-        reasoning:
-          type: string
-          nullable: true
-    ChatCompletionTool:
+            type: object
+            required: [index, object, embedding]
+            properties:
+              object:
+                type: string
+                enum:
+                  - embedding
+              embedding:
+                type: array
+                items:
+                  type: number
+              index:
+                type: integer
+
+    ModelInfoList:
+      type: array
+      items:
+        $ref: '#/components/schemas/ModelInfo'
+    ModelInfo:
       type: object
-      required: [type, function]
+      required: [id, object, created, type]
       properties:
+        id:
+          type: string
+          example: 'Austism/chronos-hermes-13b'
+        object:
+          type: string
+          example: 'model'
+        created:
+          type: integer
+          example: 1692896905
         type:
+          enum:
+            - chat
+            - language
+            - code
+            - image
+            - embedding
+            - moderation
+            - rerank
+          example: 'chat'
+        display_name:
           type: string
-          enum: ['function']
-        function:
-          type: object
-          required: [name]
-          properties:
-            description:
-              type: string
-            name:
-              type: string
-            parameters:
-              type: object
-              additionalProperties: true
+          example: 'Chronos Hermes (13B)'
+        organization:
+          type: string
+          example: 'Austism'
+        link:
+          type: string
+        license:
+          type: string
+          example: 'other'
+        context_length:
+          type: integer
+          example: 2048
+        pricing:
+          $ref: '#/components/schemas/Pricing'
 
-    ChatCompletionRequest:
+    ModelUploadRequest:
       type: object
       required:
-        - model
-        - messages
+        - model_name
+        - model_source
       properties:
-        messages:
-          type: array
-          description: A list of messages comprising the conversation so far.
-          items:
-            $ref: '#/components/schemas/ChatCompletionMessageParam'
-        model:
-          description: >
-            The name of the model to query.<br>
-            <br>
-            [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models)
-          example: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-          anyOf:
-            - type: string
-              enum:
-                - Qwen/Qwen2.5-72B-Instruct-Turbo
-                - Qwen/Qwen2.5-7B-Instruct-Turbo
-                - meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
-                - meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
-                - meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
-            - type: string
-        max_tokens:
-          type: integer
-          description: The maximum number of tokens to generate.
-        stop:
-          type: array
-          description: A list of string sequences that will truncate (stop) inference text output. For example, "</s>" will stop generation as soon as the model generates the given token.
-          items:
-            type: string
-        temperature:
-          type: number
-          description: A decimal number from 0-1 that determines the degree of randomness in the response. A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. A value closer to 1 introduces more randomness in the output.
-          format: float
-        top_p:
-          type: number
-          description: A percentage (also called the nucleus parameter) that's used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold below which all less likely tokens are filtered out. This technique helps maintain diversity and generate more fluent and natural-sounding text.
-          format: float
-        top_k:
-          type: integer
-          description: An integer that's used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.
-          format: int32
-        context_length_exceeded_behavior:
+        model_name:
           type: string
-          enum: ['truncate', 'error']
-          default: 'error'
-          description: Defined the behavior of the API when max_tokens exceed the maximum context length of the model. When set to 'error', API will return 400 with appropriate error message. When set to 'truncate', override the max_tokens with maximum context length of the model.
-        repetition_penalty:
-          type: number
-          description: A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
-        stream:
-          type: boolean
-          description: 'If true, stream tokens as Server-Sent Events as the model generates them instead of waiting for the full model response. The stream terminates with `data: [DONE]`. If false, return a single JSON object containing the results.'
-        logprobs:
-          type: integer
-          minimum: 0
-          maximum: 20
-          description: An integer between 0 and 20 of the top k tokens to return log probabilities for at each generation step, instead of just the sampled token. Log probabilities help assess model confidence in token predictions.
-        echo:
-          type: boolean
-          description: If true, the response will contain the prompt. Can be used with `logprobs` to return prompt logprobs.
-        n:
-          type: integer
-          description: The number of completions to generate for each prompt.
-          minimum: 1
-          maximum: 128
-        min_p:
-          type: number
-          description: A number between 0 and 1 that can be used as an alternative to top_p and top-k.
-          format: float
-        presence_penalty:
-          type: number
-          description: A number between -2.0 and 2.0 where a positive value increases the likelihood of a model talking about new topics.
-          format: float
-        frequency_penalty:
-          type: number
-          description: A number between -2.0 and 2.0 where a positive value decreases the likelihood of repeating tokens that have already been mentioned.
-          format: float
-        logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-            format: float
-          description: Adjusts the likelihood of specific tokens appearing in the generated output.
-          example: { '1024': -10.5, '105': 21.4 }
-        seed:
-          type: integer
-          description: Seed value for reproducibility.
-          example: 42
-        function_call:
-          oneOf:
-            - type: string
-              enum: [none, auto]
-            - type: object
-              required: [name]
-              properties:
-                name:
-                  type: string
-        response_format:
-          description: |
-            An object specifying the format that the model must output.
-
-            Setting to `{ "type": "json_schema", "json_schema": {...} }` enables
-            Structured Outputs which ensures the model will match your supplied JSON
-            schema. Learn more in the [Structured Outputs
-            guide](https://docs.together.ai/docs/json-mode).
-
-            Setting to `{ "type": "json_object" }` enables the older JSON mode, which
-            ensures the message the model generates is valid JSON. Using `json_schema`
-            is preferred for models that support it.
-          discriminator:
-                  propertyName: type
-          anyOf:
-            - $ref: '#/components/schemas/ResponseFormatText'
-            - $ref: '#/components/schemas/ResponseFormatJsonSchema'
-            - $ref: '#/components/schemas/ResponseFormatJsonObject'
-        tools:
-          type: array
-          description: A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.
-          items:
-            $ref: '#/components/schemas/ToolsPart'
-        tool_choice:
-          description: Controls which (if any) function is called by the model. By default uses `auto`, which lets the model pick between generating a message or calling a function.
-          oneOf:
-            - type: string
-              example: 'tool_name'
-            - $ref: '#/components/schemas/ToolChoice'
-        safety_model:
+          description: The name to give to your uploaded model
+          example: 'Qwen2.5-72B-Instruct'
+        model_source:
           type: string
-          description: The name of the moderation model used to validate tokens. Choose from the available moderation models found [here](https://docs.together.ai/docs/inference-models#moderation-models).
-          example: 'safety_model_name'
-        reasoning_effort:
+          description: The source location of the model (Hugging Face repo or S3 path)
+          example: 'unsloth/Qwen2.5-72B-Instruct'
+        model_type:
           type: string
-          enum: ['low', 'medium', 'high']
-          description: Controls the level of reasoning effort the model should apply when generating responses. Higher values may result in more thoughtful and detailed responses but may take longer to generate.
-          example: 'medium'
+          description: Whether the model is a full model or an adapter
+          default: 'model'
+          enum:
+            - model
+            - adapter
+          example: 'model'
+        hf_token:
+          type: string
+          description: Hugging Face token (if uploading from Hugging Face)
+          example: 'hf_examplehuggingfacetoken'
+        description:
+          type: string
+          description: A description of your model
+          example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
+        base_model:
+          type: string
+          description: The base model to use for an adapter if setting it to run against a serverless pool.  Only used for model_type `adapter`.
+          example: 'Qwen/Qwen2.5-72B-Instruct'
+        lora_model:
+          type: string
+          description: The lora pool to use for an adapter if setting it to run against, say, a dedicated pool.  Only used for model_type `adapter`.
+          example: 'my_username/Qwen2.5-72B-Instruct-lora'
 
-    ResponseFormatText:
+    ModelUploadSuccessResponse:
       type: object
-      title: Text
-      description: |
-        Default response format. Used to generate text responses.
+      required:
+        - data
+        - message
       properties:
-        type:
+        data:
+          type: object
+          required:
+            - job_id
+            - model_name
+            - model_id
+            - model_source
+          properties:
+            job_id:
+              type: string
+              example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
+            model_name:
+              type: string
+              example: 'necolinehubner/Qwen2.5-72B-Instruct'
+            model_id:
+              type: string
+              example: 'model-c0e32dfc-637e-47b2-bf4e-e9b2e58c9da7'
+            model_source:
+              type: string
+              example: 'huggingface'
+        message:
           type: string
-          description: The type of response format being defined. Always `text`.
+          example: 'Processing model weights. Job created.'
+
+    ImageResponse:
+      type: object
+      properties:
+        id:
+          type: string
+        model:
+          type: string
+        object:
           enum:
-            - text
-          x-stainless-const: true
+            - list
+          example: 'list'
+        data:
+          type: array
+          items:
+            oneOf:
+              - $ref: '#/components/schemas/ImageResponseDataB64'
+              - $ref: '#/components/schemas/ImageResponseDataUrl'
+            discriminator:
+              propertyName: type
       required:
-        - type
-    ResponseFormatJsonObject:
+        - id
+        - model
+        - object
+        - data
+
+    ImageResponseDataB64:
       type: object
-      title: JSON object
-      description: |
-        JSON object response format. An older method of generating JSON responses.
-        Using `json_schema` is recommended for models that support it. Note that the
-        model will not generate JSON without a system or user message instructing it
-        to do so.
+      required: [index, b64_json, type]
       properties:
+        index:
+          type: integer
+        b64_json:
+          type: string
         type:
           type: string
-          description: The type of response format being defined. Always `json_object`.
-          enum:
-            - json_object
-          x-stainless-const: true
-      required:
-        - type
-    ResponseFormatJsonSchema:
+          enum: [b64_json]
+
+    ImageResponseDataUrl:
       type: object
-      title: JSON schema
-      description: |
-        JSON Schema response format. Used to generate structured JSON responses.
-        Learn more about [Structured Outputs](https://docs.together.ai/docs/json-mode).
+      required: [index, url, type]
       properties:
+        index:
+          type: integer
+        url:
+          type: string
         type:
           type: string
-          description: The type of response format being defined. Always `json_schema`.
-          enum:
-            - json_schema
-          x-stainless-const: true
-        json_schema:
-          type: object
-          title: JSON schema
-          description: |
-            Structured Outputs configuration options, including a JSON Schema.
-          properties:
-            description:
-              type: string
-              description: |
-                A description of what the response format is for, used by the model to
-                determine how to respond in the format.
-            name:
-              type: string
-              description: |
-                The name of the response format. Must be a-z, A-Z, 0-9, or contain
-                underscores and dashes, with a maximum length of 64.
-            schema:
-              $ref: '#/components/schemas/ResponseFormatJsonSchemaSchema'
-            strict:
-              anyOf:
-                - type: boolean
-                  default: false
-                  description: |
-                    Whether to enable strict schema adherence when generating the output.
-                    If set to true, the model will always follow the exact schema defined
-                    in the `schema` field. Only a subset of JSON Schema is supported when
-                    `strict` is `true`. To learn more, read the [Structured Outputs
-                    guide](https://docs.together.ai/docs/json-mode).
-                - type: 'null'
-          required:
-            - name
+          enum: [url]
+
+    JobInfoSuccessResponse:
+      type: object
       required:
         - type
-        - json_schema
-    ResponseFormatJsonSchemaSchema:
-      type: object
-      title: JSON schema
-      description: |
-        The schema for the response format, described as a JSON Schema object.
-        Learn how to build JSON schemas [here](https://json-schema.org/).
-      additionalProperties: true
-
-    ChatCompletionMessageParam:
-      oneOf:
-        - $ref: '#/components/schemas/ChatCompletionSystemMessageParam'
-        - $ref: '#/components/schemas/ChatCompletionUserMessageParam'
-        - $ref: '#/components/schemas/ChatCompletionAssistantMessageParam'
-        - $ref: '#/components/schemas/ChatCompletionToolMessageParam'
-        - $ref: '#/components/schemas/ChatCompletionFunctionMessageParam'
-
-    # Start Message Params
-
-    ChatCompletionSystemMessageParam:
-      type: object
-      required: [content, role]
+        - job_id
+        - status
+        - status_updates
+        - args
+        - created_at
+        - updated_at
       properties:
-        content:
-          type: string
-        role:
-          type: string
-          enum: ['system']
-        name:
+        type:
           type: string
-
-    ChatCompletionUserMessageParam:
-      type: object
-      required: [content, role]
-      properties:
-        content:
-          $ref: '#/components/schemas/ChatCompletionUserMessageContent'
-        role:
+          example: 'model_upload'
+        job_id:
           type: string
-          enum: ['user']
-        name:
+          example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
+        status:
           type: string
-
-    ChatCompletionUserMessageContentString:
-      type: string
-      description: A plain text message.
-
-    ChatCompletionUserMessageContentMultimodal:
-      type: array
-      description: A structured message with mixed content types.
-      items:
-        type: object
-        oneOf:
-          - type: object
-            properties:
-              type:
-                type: string
-                enum:
-                  - text
-              text:
-                type: string
+          enum: ['Queued', 'Running', 'Complete', 'Failed']
+          example: 'Complete'
+        status_updates:
+          type: array
+          items:
+            type: object
             required:
-              - type
-              - text
-          - type: object
-            properties:
-              type:
-                type: string
-                enum:
-                  - image_url
-              image_url:
-                type: object
-                properties:
-                  url:
-                    type: string
-                    description: The URL of the image
-                required:
-                  - url
-          - type: object
-            title: Video
+              - status
+              - message
+              - timestamp
             properties:
-              type:
+              status:
                 type: string
-                enum:
-                  - video_url
-              video_url:
-                type: object
-                properties:
-                  url:
-                    type: string
-                    description: The URL of the video
-                required:
-                  - url
-            required:
-              - type
-              - video_url
-          - type: object
-            title: Audio
-            properties:
-              type:
+                example: 'Complete'
+              message:
                 type: string
-                enum:
-                  - audio_url
-              audio_url:
-                type: object
-                properties:
-                  url:
-                    type: string
-                    description: The URL of the audio
-                required:
-                  - url
-            required:
-              - type
-              - audio_url
-          - type: object
-            title: Input Audio
-            properties:
-              type:
+                example: 'Job is Complete'
+              timestamp:
                 type: string
-                enum:
-                  - input_audio
-              input_audio:
-                type: object
-                properties:
-                  data:
-                    type: string
-                    description: The base64 encoded audio data
-                  format:
-                    type: string
-                    description: The format of the audio data
-                    enum:
-                      - wav
-                required:
-                  - data
-                  - format
-            required:
-              - type
-              - input_audio
-
-    ChatCompletionUserMessageContent:
-      description: The content of the message, which can either be a simple string or a structured format.
-      oneOf:
-        - $ref: '#/components/schemas/ChatCompletionUserMessageContentString'
-        - $ref: '#/components/schemas/ChatCompletionUserMessageContentMultimodal'
-
-    ChatCompletionAssistantMessageParam:
-      type: object
-      required: [role]
-      properties:
-        content:
-          type: string
-          nullable: true
-        role:
-          type: string
-          enum: ['assistant']
-        name:
-          type: string
-        tool_calls:
-          type: array
-          items:
-            $ref: '#/components/schemas/ToolChoice'
-        function_call:
+                format: date-time
+                example: '2025-03-11T22:36:12Z'
+        args:
           type: object
-          deprecated: true
           properties:
-            arguments:
+            description:
               type: string
-            name:
+              example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
+            modelName:
               type: string
-          required: [arguments, name]
-
-    ChatCompletionFunctionMessageParam:
-      type: object
-      deprecated: true
-      required: [content, role, name]
-      properties:
-        role:
-          type: string
-          enum: ['function']
-        content:
-          type: string
-        name:
-          type: string
-
-    ChatCompletionToolMessageParam:
-      type: object
-      properties:
-        name:
-          type: string
-        role:
-          type: string
-          enum: ['tool']
-        content:
+              example: 'necolinehubner/Qwen2.5-72B-Instruct'
+            modelSource:
+              type: string
+              example: 'unsloth/Qwen2.5-72B-Instruct'
+        created_at:
           type: string
-        tool_call_id:
+          format: date-time
+          example: '2025-03-11T22:05:43Z'
+        updated_at:
           type: string
-      required: [role, content, tool_call_id]
-
-    # End Message Params
+          format: date-time
+          example: '2025-03-11T22:36:12Z'
 
-    ChatCompletionResponse:
+    JobsInfoSuccessResponse:
       type: object
+      required:
+        - data
       properties:
-        id:
-          type: string
-        choices:
-          $ref: '#/components/schemas/ChatCompletionChoicesData'
-        usage:
-          $ref: '#/components/schemas/UsageData'
-        created:
-          type: integer
-        model:
-          type: string
-        object:
-          type: string
-          enum:
-            - chat.completion
-        warnings:
+        data:
           type: array
           items:
-            $ref: '#/components/schemas/InferenceWarning'
-      required: [choices, id, created, model, object]
-
-    ChatCompletionStream:
-      oneOf:
-        - $ref: '#/components/schemas/ChatCompletionEvent'
-        - $ref: '#/components/schemas/StreamSentinel'
+            $ref: '#/components/schemas/JobInfoSuccessResponse'
 
-    ChatCompletionEvent:
+    Pricing:
       type: object
-      required: [data]
+      required: [hourly, input, output, base, finetune]
       properties:
-        data:
-          $ref: '#/components/schemas/ChatCompletionChunk'
+        hourly:
+          type: number
+          example: 0
+        input:
+          type: number
+          example: 0.3
+        output:
+          type: number
+          example: 0.3
+        base:
+          type: number
+          example: 0
+        finetune:
+          type: number
+          example: 0
 
-    ChatCompletionChunk:
+    ToolsPart:
       type: object
-      required: [id, object, created, choices, model]
       properties:
-        id:
-          type: string
-        object:
+        type:
           type: string
-          enum:
-            - chat.completion.chunk
-        created:
-          type: integer
-        system_fingerprint:
+          example: 'tool_type'
+        function:
+          type: object
+          properties:
+            description:
+              type: string
+              example: 'A description of the function.'
+            name:
+              type: string
+              example: 'function_name'
+            parameters:
+              type: object
+              additionalProperties: true
+              description: 'A map of parameter names to their values.'
+    ToolChoice:
+      type: object
+      required: [id, type, function, index]
+      properties:
+        # TODO: is this the right place for index?
+        index:
+          type: number
+        id:
           type: string
-        model:
+        type:
           type: string
-          example: mistralai/Mixtral-8x7B-Instruct-v0.1
-        choices:
-          title: ChatCompletionChoices
-          type: array
-          items:
-            type: object
-            required: [index, delta, finish_reason]
-            properties:
-              index:
-                type: integer
-              finish_reason:
-                $ref: '#/components/schemas/FinishReason'
-                nullable: true
-              logprobs:
-                type: number
-                nullable: true
-              seed:
-                type: integer
-                nullable: true
-              delta:
-                title: ChatCompletionChoiceDelta
-                type: object
-                required: [role]
-                properties:
-                  token_id:
-                    type: integer
-                  role:
-                    type: string
-                    enum: ['system', 'user', 'assistant', 'function', 'tool']
-                  content:
-                    type: string
-                    nullable: true
-                  reasoning:
-                    type: string
-                    nullable: true
-                  tool_calls:
-                    type: array
-                    items:
-                      $ref: '#/components/schemas/ToolChoice'
-                  function_call:
-                    type: object
-                    deprecated: true
-                    nullable: true
-                    properties:
-                      arguments:
-                        type: string
-                      name:
-                        type: string
-                    required:
-                      - arguments
-                      - name
-        usage:
-          allOf:
-            - $ref: '#/components/schemas/UsageData'
-            - nullable: true
-        warnings:
-          type: array
-          items:
-            $ref: '#/components/schemas/InferenceWarning'
-    AudioSpeechRequest:
+          enum: ['function']
+        function:
+          type: object
+          required: [name, arguments]
+          properties:
+            name:
+              type: string
+              example: 'function_name'
+            arguments:
+              type: string
+
+    FileResponse:
       type: object
       required:
-        - model
-        - input
-        - voice
+        - id
+        - object
+        - created_at
+        - filename
+        - bytes
+        - purpose
+        - FileType
+        - Processed
+        - LineCount
       properties:
-        model:
-          description: >
-            The name of the model to query.<br>
-            <br>
-            [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
-            The current supported tts models are:
-            - cartesia/sonic
-            - hexgrad/Kokoro-82M
-            - canopylabs/orpheus-3b-0.1-ft
-          example: canopylabs/orpheus-3b-0.1-ft
-          anyOf:
-            - type: string
-              enum:
-                - cartesia/sonic
-                - hexgrad/Kokoro-82M
-                - canopylabs/orpheus-3b-0.1-ft
-            - type: string
-        input:
-          type: string
-          description: Input text to generate the audio for
-        voice:
-          description: >
-            The voice to use for generating the audio.
-            The voices supported are different for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is af_alloy and for cartesia/sonic, one of the voices supported is "friendly sidekick".
-            <br>
-            <br>
-            You can view the voices supported for each model using the /v1/voices endpoint sending the model name as the query parameter.
-            [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
-          type: string
-        response_format:
+        id:
           type: string
-          description: The format of audio output. Supported formats are mp3, wav, raw if streaming is false. If streaming is true, the only supported format is raw.
-          default: wav
-          enum:
-            - mp3
-            - wav
-            - raw
-        language:
+        object:
           type: string
-          description: Language of input text.
-          default: en
-          enum:
-            - en
-            - de
-            - fr
-            - es
-            - hi
-            - it
-            - ja
-            - ko
-            - nl
-            - pl
-            - pt
-            - ru
-            - sv
-            - tr
-            - zh
-        response_encoding:
+          example: 'file'
+        created_at:
+          type: integer
+          example: 1715021438
+        filename:
           type: string
-          description: Audio encoding of response
-          default: pcm_f32le
-          enum:
-            - pcm_f32le
-            - pcm_s16le
-            - pcm_mulaw
-            - pcm_alaw
-        sample_rate:
+          example: 'my_file.jsonl'
+        bytes:
           type: integer
-          default: 44100
-          description: Sampling rate to use for the output audio. The default sampling rate for canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for cartesia/sonic is 44100.
-        stream:
+          example: 2664
+        purpose:
+          $ref: '#/components/schemas/FilePurpose'
+        Processed:
           type: boolean
-          default: false
-          description: 'If true, output is streamed for several characters at a time instead of waiting for the full response. The stream terminates with `data: [DONE]`. If false, return the encoded audio as octet stream'
-
-    AudioTranscriptionRequest:
-      type: object
+        FileType:
+          $ref: '#/components/schemas/FileType'
+        LineCount:
+          type: integer
+    FileList:
       required:
-        - file
+        - data
+      type: object
       properties:
-        file:
-          oneOf:
-            - $ref: '#/components/schemas/AudioFileBinary'
-            - $ref: '#/components/schemas/AudioFileUrl'
-          description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac.
-        model:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/FileResponse'
+    FileObject:
+      type: object
+      properties:
+        object:
           type: string
-          description: Model to use for transcription
-          default: openai/whisper-large-v3
-          enum:
-            - openai/whisper-large-v3
-        language:
+        id:
           type: string
-          description: Optional ISO 639-1 language code. If `auto` is provided, language is auto-detected.
-          default: en
-          example: en
-        prompt:
+        filename:
           type: string
-          description: Optional text to bias decoding.
-        response_format:
-          type: string
-          description: The format of the response
-          default: json
-          enum:
-            - json
-            - verbose_json
-        temperature:
-          type: number
-          format: float
-          description: Sampling temperature between 0.0 and 1.0
-          default: 0.0
-          minimum: 0.0
-          maximum: 1.0
-        timestamp_granularities:
-          oneOf:
-            - type: string
-              enum:
-                - segment
-                - word
-            - type: array
-              items:
-                type: string
-                enum:
-                  - segment
-                  - word
-              uniqueItems: true
-              minItems: 1
-              maxItems: 2
-          description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels.
-          default: segment
-          example: ['word', 'segment']
-        diarize:
-          type: boolean
-          description: >
-            Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
-            In the response, in the words array, you will get the speaker id for each word.
-            In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
-            <br>
-            <br>
-            For eg -
-            ...
-            "speaker_segments": [
-              "speaker_id": "SPEAKER_00",
-              "start": 0,
-              "end": 30.02,
-              "words": [
-                {
-                  "id": 0,
-                  "word": "Tijana",
-                  "start": 0,
-                  "end": 11.475,
-                  "speaker_id": "SPEAKER_00"
-                },
-                ...
-          default: false
-        min_speakers:
-          type: integer
-          description: Minimum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
-        max_speakers:
+        size:
           type: integer
-          description: Maximum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
-
-    AudioTranscriptionResponse:
-      oneOf:
-        - $ref: '#/components/schemas/AudioTranscriptionJsonResponse'
-        - $ref: '#/components/schemas/AudioTranscriptionVerboseJsonResponse'
-
-    AudioTranscriptionJsonResponse:
+    FilePurpose:
+      type: string
+      description: The purpose of the file
+      example: 'fine-tune'
+      enum:
+        - fine-tune
+        - eval
+        - eval-sample
+        - eval-output
+        - eval-summary
+        - batch-generated
+        - batch-api
+    FileType:
+      type: string
+      description: The type of the file
+      default: 'jsonl'
+      example: 'jsonl'
+      enum:
+        - 'csv'
+        - 'jsonl'
+        - 'parquet'
+    FileDeleteResponse:
       type: object
-      required:
-        - text
       properties:
-        text:
+        id:
           type: string
-          description: The transcribed text
-          example: Hello, world!
-
-    AudioTranscriptionVerboseJsonResponse:
+        deleted:
+          type: boolean
+    FinetuneResponse:
       type: object
       required:
-        - task
-        - language
-        - duration
-        - text
-        - segments
+        - id
+        - status
       properties:
-        task:
+        id:
           type: string
-          description: The task performed
-          enum:
-            - transcribe
-            - translate
-          example: transcribe
-        language:
+          format: uuid
+        training_file:
           type: string
-          description: The language of the audio
-          example: english
-        duration:
+        validation_file:
+          type: string
+        model:
+          type: string
+        model_output_name:
+          type: string
+        model_output_path:
+          type: string
+        trainingfile_numlines:
+          type: integer
+        trainingfile_size:
+          type: integer
+        created_at:
+          type: string
+          format: date-time
+        updated_at:
+          type: string
+          format: date-time
+        n_epochs:
+          type: integer
+        n_checkpoints:
+          type: integer
+        n_evals:
+          type: integer
+        batch_size:
+          oneOf:
+            - type: integer
+            - type: string
+              enum:
+                - max
+          default: 'max'
+        learning_rate:
+          type: number
+        lr_scheduler:
+          type: object
+          $ref: '#/components/schemas/LRScheduler'
+        warmup_ratio:
+          type: number
+        max_grad_norm:
           type: number
           format: float
-          description: The duration of the audio in seconds
-          example: 3.5
-        text:
+        weight_decay:
+          type: number
+          format: float
+        eval_steps:
+          type: integer
+        train_on_inputs:
+          oneOf:
+            - type: boolean
+            - type: string
+              enum:
+                - auto
+          default: auto
+        training_method:
+          type: object
+          oneOf:
+            - $ref: '#/components/schemas/TrainingMethodSFT'
+            - $ref: '#/components/schemas/TrainingMethodDPO'
+        training_type:
+          type: object
+          oneOf:
+            - $ref: '#/components/schemas/FullTrainingType'
+            - $ref: '#/components/schemas/LoRATrainingType'
+        status:
+          $ref: '#/components/schemas/FinetuneJobStatus'
+        job_id:
           type: string
-          description: The transcribed text
-          example: Hello, world!
-        segments:
-          type: array
-          items:
-            $ref: '#/components/schemas/AudioTranscriptionSegment'
-          description: Array of transcription segments
-        words:
-          type: array
-          items:
-            $ref: '#/components/schemas/AudioTranscriptionWord'
-          description: Array of transcription words (only when timestamp_granularities includes 'word')
-        speaker_segments:
+        events:
           type: array
           items:
-            $ref: '#/components/schemas/AudioTranscriptionSpeakerSegment'
-          description: Array of transcription speaker segments (only when diarize is enabled)
+            $ref: '#/components/schemas/FineTuneEvent'
+        token_count:
+          type: integer
+        param_count:
+          type: integer
+        total_price:
+          type: integer
+        epochs_completed:
+          type: integer
+        queue_depth:
+          type: integer
+        wandb_project_name:
+          type: string
+        wandb_url:
+          type: string
+        from_checkpoint:
+          type: string
+        from_hf_model:
+          type: string
+        hf_model_revision:
+          type: string
+        progress:
+          $ref: '#/components/schemas/FineTuneProgress'
 
-    AudioTranscriptionSegment:
+    FinetuneResponseTruncated:
       type: object
+      description: A truncated version of the fine-tune response, used for POST /fine-tunes, GET /fine-tunes and POST /fine-tunes/{id}/cancel endpoints
       required:
         - id
-        - start
-        - end
-        - text
+        - status
+        - created_at
+        - updated_at
+      example:
+        id: ft-01234567890123456789
+        status: completed
+        created_at: '2023-05-17T17:35:45.123Z'
+        updated_at: '2023-05-17T18:46:23.456Z'
+        user_id: 'user_01234567890123456789'
+        owner_address: 'user@example.com'
+        total_price: 1500
+        token_count: 850000
+        events: [] # FineTuneTruncated object has no events
+        model: 'meta-llama/Llama-2-7b-hf'
+        model_output_name: 'mynamespace/meta-llama/Llama-2-7b-hf-32162631'
+        n_epochs: 3
+        training_file: 'file-01234567890123456789'
+        wandb_project_name: 'my-finetune-project'
       properties:
         id:
-          type: integer
-          description: Unique identifier for the segment
-          example: 0
-        start:
-          type: number
-          format: float
-          description: Start time of the segment in seconds
-          example: 0.0
-        end:
-          type: number
-          format: float
-          description: End time of the segment in seconds
-          example: 3.5
-        text:
           type: string
-          description: The text content of the segment
-          example: Hello, world!
-
-    AudioTranscriptionWord:
-      type: object
-      required:
-        - word
-        - start
-        - end
-      properties:
-        word:
+          description: Unique identifier for the fine-tune job
+        status:
+          $ref: '#/components/schemas/FinetuneJobStatus'
+        created_at:
           type: string
-          description: The word
-          example: Hello
-        start:
+          format: date-time
+          description: Creation timestamp of the fine-tune job
+        updated_at:
+          type: string
+          format: date-time
+          description: Last update timestamp of the fine-tune job
+        user_id:
+          type: string
+          description: Identifier for the user who created the job
+        owner_address:
+          type: string
+          description: Owner address information
+        total_price:
+          type: integer
+          description: Total price for the fine-tuning job
+        token_count:
+          type: integer
+          description: Count of tokens processed
+        events:
+          type: array
+          items:
+            $ref: '#/components/schemas/FineTuneEvent'
+          description: Events related to this fine-tune job
+        # FineTuneUserParams fields
+        training_file:
+          type: string
+          description: File-ID of the training file
+        validation_file:
+          type: string
+          description: File-ID of the validation file
+        model:
+          type: string
+          description: Base model used for fine-tuning
+        model_output_name:
+          type: string
+        suffix:
+          type: string
+          description: Suffix added to the fine-tuned model name
+        n_epochs:
+          type: integer
+          description: Number of training epochs
+        n_evals:
+          type: integer
+          description: Number of evaluations during training
+        n_checkpoints:
+          type: integer
+          description: Number of checkpoints saved during training
+        batch_size:
+          type: integer
+          description: Batch size used for training
+        training_type:
+          oneOf:
+            - $ref: '#/components/schemas/FullTrainingType'
+            - $ref: '#/components/schemas/LoRATrainingType'
+          description: Type of training used (full or LoRA)
+        training_method:
+          oneOf:
+            - $ref: '#/components/schemas/TrainingMethodSFT'
+            - $ref: '#/components/schemas/TrainingMethodDPO'
+          description: Method of training used
+        learning_rate:
           type: number
           format: float
-          description: Start time of the word in seconds
-          example: 0.0
-        end:
+          description: Learning rate used for training
+        lr_scheduler:
+          $ref: '#/components/schemas/LRScheduler'
+          description: Learning rate scheduler configuration
+        warmup_ratio:
           type: number
           format: float
-          description: End time of the word in seconds
-          example: 0.5
-        speaker_id:
-          type: string
-          description: The speaker id for the word (only when diarize is enabled)
-          example: SPEAKER_00
-
-    AudioTranscriptionSpeakerSegment:
-      type: object
-      required:
-        - speaker_id
-        - start
-        - end
-        - words
-        - text
-        - id
-      properties:
-        speaker_id:
-          type: string
-          description: The speaker identifier
-          example: SPEAKER_00
-        start:
+          description: Ratio of warmup steps
+        max_grad_norm:
           type: number
           format: float
-          description: Start time of the speaker segment in seconds
-          example: 0.0
-        end:
+          description: Maximum gradient norm for clipping
+        weight_decay:
           type: number
           format: float
-          description: End time of the speaker segment in seconds
-          example: 30.02
-        words:
-          type: array
-          items:
-            $ref: '#/components/schemas/AudioTranscriptionWord'
-          description: Array of words spoken by this speaker in this segment
-        text:
+          description: Weight decay value used
+        wandb_project_name:
           type: string
-          description: The full text spoken by this speaker in this segment
-          example: "Hello, how are you doing today?"
-        id:
-          type: integer
-          description: Unique identifier for the speaker segment
-          example: 1
-
-    AudioTranslationRequest:
-      type: object
-      required:
-        - file
-      properties:
-        file:
-          oneOf:
-            - type: string
-              format: binary
-              description: Audio file to translate
-            - type: string
-              format: uri
-              description: Public HTTP/HTTPS URL to audio file
-          description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac.
-        model:
+          description: Weights & Biases project name
+        wandb_name:
           type: string
-          description: Model to use for translation
-          default: openai/whisper-large-v3
-          enum:
-            - openai/whisper-large-v3
-        language:
+          description: Weights & Biases run name
+        from_checkpoint:
           type: string
-          description: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English.
-          default: en
-          example: en
-        prompt:
+          description: Checkpoint used to continue training
+        from_hf_model:
           type: string
-          description: Optional text to bias decoding.
-        response_format:
+          description: Hugging Face Hub repo to start training from
+        hf_model_revision:
           type: string
-          description: The format of the response
-          default: json
-          enum:
-            - json
-            - verbose_json
-        temperature:
-          type: number
-          format: float
-          description: Sampling temperature between 0.0 and 1.0
-          default: 0.0
-          minimum: 0.0
-          maximum: 1.0
-        timestamp_granularities:
-          oneOf:
-            - type: string
-              enum:
-                - segment
-                - word
-            - type: array
-              items:
-                type: string
-                enum:
-                  - segment
-                  - word
-              uniqueItems: true
-              minItems: 1
-              maxItems: 2
-          description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels.
-          default: segment
-          example: ['word', 'segment']
-
-    AudioTranslationResponse:
-      oneOf:
-        - $ref: '#/components/schemas/AudioTranslationJsonResponse'
-        - $ref: '#/components/schemas/AudioTranslationVerboseJsonResponse'
-
-    AudioTranslationJsonResponse:
+          description: The revision of the Hugging Face Hub model to continue training from
+        progress:
+          $ref: '#/components/schemas/FineTuneProgress'
+          description: Progress information for the fine-tuning job
+    FinetuneDeleteResponse:
       type: object
-      required:
-        - text
       properties:
-        text:
+        message:
           type: string
-          description: The translated text
-          example: Hello, world!
+          description: Message indicating the result of the deletion
+    FinetuneJobStatus:
+      type: string
+      enum:
+        - pending
+        - queued
+        - running
+        - compressing
+        - uploading
+        - cancel_requested
+        - cancelled
+        - error
+        - completed
 
-    AudioTranslationVerboseJsonResponse:
-      type: object
-      required:
-        - task
-        - language
-        - duration
-        - text
-        - segments
-      properties:
-        task:
-          type: string
-          description: The task performed
-          enum:
-            - transcribe
-            - translate
-          example: translate
-        language:
-          type: string
-          description: The target language of the translation
-          example: english
-        duration:
-          type: number
-          format: float
-          description: The duration of the audio in seconds
-          example: 3.5
-        text:
-          type: string
-          description: The translated text
-          example: Hello, world!
-        segments:
-          type: array
-          items:
-            $ref: '#/components/schemas/AudioTranscriptionSegment'
-          description: Array of translation segments
-        words:
+    FinetuneEventLevels:
+      type: string
+      enum:
+        - null
+        - info
+        - warning
+        - error
+        - legacy_info
+        - legacy_iwarning
+        - legacy_ierror
+    FinetuneEventType:
+      type: string
+      enum:
+        - job_pending
+        - job_start
+        - job_stopped
+        - model_downloading
+        - model_download_complete
+        - training_data_downloading
+        - training_data_download_complete
+        - validation_data_downloading
+        - validation_data_download_complete
+        - wandb_init
+        - training_start
+        - checkpoint_save
+        - billing_limit
+        - epoch_complete
+        - training_complete
+        - model_compressing
+        - model_compression_complete
+        - model_uploading
+        - model_upload_complete
+        - job_complete
+        - job_error
+        - cancel_requested
+        - job_restarted
+        - refund
+        - warning
+
+    FinetuneTruncatedList:
+      type: object
+      required:
+        - data
+      properties:
+        data:
           type: array
           items:
-            $ref: '#/components/schemas/AudioTranscriptionWord'
-          description: Array of translation words (only when timestamp_granularities includes 'word')
-
-    AudioSpeechStreamResponse:
-      oneOf:
-        - $ref: '#/components/schemas/AudioSpeechStreamEvent'
-        - $ref: '#/components/schemas/StreamSentinel'
-
-    AudioSpeechStreamEvent:
+            $ref: '#/components/schemas/FinetuneResponseTruncated'
+    FinetuneListEvents:
       type: object
-      required: [data]
+      required:
+        - data
       properties:
         data:
-          $ref: '#/components/schemas/AudioSpeechStreamChunk'
-
-    AudioSpeechStreamChunk:
+          type: array
+          items:
+            $ref: '#/components/schemas/FineTuneEvent'
+    FineTuneEvent:
       type: object
-      required: [object, model, b64]
+      required:
+        - object
+        - created_at
+        - message
+        - type
+        - param_count
+        - token_count
+        - total_steps
+        - wandb_url
+        - step
+        - checkpoint_path
+        - model_path
+        - training_offset
+        - hash
       properties:
         object:
           type: string
-          enum:
-            - audio.tts.chunk
-        model:
+          enum: [fine-tune-event]
+        created_at:
           type: string
-          example: cartesia/sonic
-        b64:
+        level:
+          anyOf:
+            - $ref: '#/components/schemas/FinetuneEventLevels'
+        message:
           type: string
-          description: base64 encoded audio stream
-
-    StreamSentinel:
+        type:
+          $ref: '#/components/schemas/FinetuneEventType'
+        param_count:
+          type: integer
+        token_count:
+          type: integer
+        total_steps:
+          type: integer
+        wandb_url:
+          type: string
+        step:
+          type: integer
+        checkpoint_path:
+          type: string
+        model_path:
+          type: string
+        training_offset:
+          type: integer
+        hash:
+          type: string
+    FineTuneProgress:
       type: object
-      required: [data]
+      description: Progress information for a fine-tuning job
+      required:
+        - estimate_available
+        - seconds_remaining
+      properties:
+        estimate_available:
+          type: boolean
+          description: Whether time estimate is available
+        seconds_remaining:
+          type: integer
+          description: Estimated time remaining in seconds for the fine-tuning job to next state
+    FinetuneListCheckpoints:
+      type: object
+      required:
+        - data
       properties:
         data:
-          title: stream_signal
-          type: string
-          enum:
-            - '[DONE]'
-
-    ChatCompletionToken:
+          type: array
+          items:
+            $ref: '#/components/schemas/FineTuneCheckpoint'
+    FineTuneCheckpoint:
       type: object
-      required: [id, text, logprob, special]
+      required:
+        - step
+        - path
+        - created_at
+        - checkpoint_type
       properties:
-        id:
+        step:
           type: integer
-        text:
+        created_at:
+          type: string
+        path:
+          type: string
+        checkpoint_type:
           type: string
-        logprob:
-          type: number
-        special:
-          type: boolean
 
-    ChatCompletionChoice:
+    FullTrainingType:
       type: object
-      required: [index, delta, finish_reason]
       properties:
-        index:
+        type:
+          type: string
+          enum: ['Full']
+      required:
+        - type
+    LoRATrainingType:
+      type: object
+      properties:
+        type:
+          type: string
+          enum: ['Lora']
+        lora_r:
           type: integer
-        finish_reason:
-          $ref: '#/components/schemas/FinishReason'
-        logprobs:
-          $ref: '#/components/schemas/LogprobsPart'
-        delta:
-          title: ChatCompletionChoiceDelta
-          type: object
-          required: [role]
-          properties:
-            token_id:
-              type: integer
-            role:
-              type: string
-              enum: ['system', 'user', 'assistant', 'function', 'tool']
-            content:
-              type: string
-              nullable: true
-            tool_calls:
-              type: array
-              items:
-                $ref: '#/components/schemas/ToolChoice'
-            function_call:
-              type: object
-              deprecated: true
-              nullable: true
-              properties:
-                arguments:
-                  type: string
-                name:
-                  type: string
-              required:
-                - arguments
-                - name
-            reasoning:
-              type: string
-              nullable: true
+        lora_alpha:
+          type: integer
+        lora_dropout:
+          type: number
+          format: float
+          default: 0.0
+        lora_trainable_modules:
+          type: string
+          default: 'all-linear'
+      required:
+        - type
+        - lora_r
+        - lora_alpha
 
-    EmbeddingsRequest:
+    TrainingMethodSFT:
       type: object
-      required:
-        - model
-        - input
       properties:
-        model:
+        method:
           type: string
-          description: >
-            The name of the embedding model to use.<br>
-            <br>
-            [See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models)
-          example: togethercomputer/m2-bert-80M-8k-retrieval
-          anyOf:
-            - type: string
-              enum:
-                - WhereIsAI/UAE-Large-V1
-                - BAAI/bge-large-en-v1.5
-                - BAAI/bge-base-en-v1.5
-                - togethercomputer/m2-bert-80M-8k-retrieval
-            - type: string
-        input:
+          enum: ['sft']
+        train_on_inputs:
           oneOf:
+            - type: boolean
             - type: string
-              description: A string providing the text for the model to embed.
-              example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
-            - type: array
-              items:
-                type: string
-                description: A string providing the text for the model to embed.
-                example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
-          example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
-
-    EmbeddingsResponse:
+              enum:
+                - auto
+          type: boolean
+          default: auto
+          description: Whether to mask the user messages in conversational data or prompts in instruction data.
+      required:
+        - method
+        - train_on_inputs
+    TrainingMethodDPO:
       type: object
+      properties:
+        method:
+          type: string
+          enum: ['dpo']
+        dpo_beta:
+          type: number
+          format: float
+          default: 0.1
+        rpo_alpha:
+          type: number
+          format: float
+          default: 0.0
+        dpo_normalize_logratios_by_length:
+          type: boolean
+          default: false
+        dpo_reference_free:
+          type: boolean
+          default: false
+        simpo_gamma:
+          type: number
+          format: float
+          default: 0.0
       required:
-        - object
-        - model
-        - data
+        - method
+
+    LRScheduler:
+      type: object
       properties:
-        object:
+        lr_scheduler_type:
           type: string
           enum:
-            - list
-        model:
-          type: string
-        data:
-          type: array
-          items:
-            type: object
-            required: [index, object, embedding]
-            properties:
-              object:
-                type: string
-                enum:
-                  - embedding
-              embedding:
-                type: array
-                items:
-                  type: number
-              index:
-                type: integer
+            - linear
+            - cosine
+        lr_scheduler_args:
+          oneOf:
+            - $ref: '#/components/schemas/LinearLRSchedulerArgs'
+            - $ref: '#/components/schemas/CosineLRSchedulerArgs'
+      required:
+        - lr_scheduler_type
+    CosineLRSchedulerArgs:
+      type: object
+      properties:
+        min_lr_ratio:
+          type: number
+          format: float
+          default: 0.0
+          description: The ratio of the final learning rate to the peak learning rate
+        num_cycles:
+          type: number
+          format: float
+          default: 0.5
+          description: Number or fraction of cycles for the cosine learning rate scheduler
+      required:
+        - min_lr_ratio
+        - num_cycles
+    LinearLRSchedulerArgs:
+      type: object
+      properties:
+        min_lr_ratio:
+          type: number
+          format: float
+          default: 0.0
+          description: The ratio of the final learning rate to the peak learning rate
 
-    ModelInfoList:
-      type: array
-      items:
-        $ref: '#/components/schemas/ModelInfo'
-    ModelInfo:
+    Autoscaling:
       type: object
-      required: [id, object, created, type]
+      description: Configuration for automatic scaling of replicas based on demand.
+      required:
+        - min_replicas
+        - max_replicas
       properties:
-        id:
-          type: string
-          example: 'Austism/chronos-hermes-13b'
-        object:
-          type: string
-          example: 'model'
-        created:
+        min_replicas:
           type: integer
-          example: 1692896905
-        type:
-          enum:
-            - chat
-            - language
-            - code
-            - image
-            - embedding
-            - moderation
-            - rerank
-          example: 'chat'
-        display_name:
-          type: string
-          example: 'Chronos Hermes (13B)'
-        organization:
-          type: string
-          example: 'Austism'
-        link:
-          type: string
-        license:
-          type: string
-          example: 'other'
-        context_length:
+          format: int32
+          description: The minimum number of replicas to maintain, even when there is no load
+          examples:
+            - 2
+        max_replicas:
           type: integer
-          example: 2048
-        pricing:
-          $ref: '#/components/schemas/Pricing'
+          format: int32
+          description: The maximum number of replicas to scale up to under load
+          examples:
+            - 5
 
-    ModelUploadRequest:
+    HardwareSpec:
       type: object
+      description: Detailed specifications of a hardware configuration
       required:
-        - model_name
-        - model_source
+        - gpu_type
+        - gpu_link
+        - gpu_memory
+        - gpu_count
       properties:
-        model_name:
+        gpu_type:
           type: string
-          description: The name to give to your uploaded model
-          example: 'Qwen2.5-72B-Instruct'
-        model_source:
+          description: The type/model of GPU
+          examples:
+            - a100-80gb
+        gpu_link:
           type: string
-          description: The source location of the model (Hugging Face repo or S3 path)
-          example: 'unsloth/Qwen2.5-72B-Instruct'
-        model_type:
+          description: The GPU interconnect technology
+          examples:
+            - sxm
+        gpu_memory:
+          type: number
+          format: float
+          description: Amount of GPU memory in GB
+          examples:
+            - 80
+        gpu_count:
+          type: integer
+          format: int32
+          description: Number of GPUs in this configuration
+          examples:
+            - 2
+
+    EndpointPricing:
+      type: object
+      description: Pricing details for using an endpoint
+      required:
+        - cents_per_minute
+      properties:
+        cents_per_minute:
+          type: number
+          format: float
+          description: Cost per minute of endpoint uptime in cents
+          examples:
+            - 5.42
+
+    HardwareAvailability:
+      type: object
+      description: Indicates the current availability status of a hardware configuration
+      required:
+        - status
+      properties:
+        status:
           type: string
-          description: Whether the model is a full model or an adapter
-          default: 'model'
+          description: The availability status of the hardware configuration
           enum:
-            - model
-            - adapter
-          example: 'model'
-        hf_token:
-          type: string
-          description: Hugging Face token (if uploading from Hugging Face)
-          example: 'hf_examplehuggingfacetoken'
-        description:
-          type: string
-          description: A description of your model
-          example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
-        base_model:
-          type: string
-          description: The base model to use for an adapter if setting it to run against a serverless pool.  Only used for model_type `adapter`.
-          example: 'Qwen/Qwen2.5-72B-Instruct'
-        lora_model:
-          type: string
-          description: The lora pool to use for an adapter if setting it to run against, say, a dedicated pool.  Only used for model_type `adapter`.
-          example: 'my_username/Qwen2.5-72B-Instruct-lora'
+            - available
+            - unavailable
+            - insufficient
 
-    ModelUploadSuccessResponse:
+    HardwareWithStatus:
       type: object
+      description: Hardware configuration details with optional availability status
       required:
-        - data
-        - message
+        - object
+        - id
+        - pricing
+        - specs
+        - updated_at
       properties:
-        data:
-          type: object
-          required:
-            - job_id
-            - model_name
-            - model_id
-            - model_source
-          properties:
-            job_id:
-              type: string
-              example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
-            model_name:
-              type: string
-              example: 'necolinehubner/Qwen2.5-72B-Instruct'
-            model_id:
-              type: string
-              example: 'model-c0e32dfc-637e-47b2-bf4e-e9b2e58c9da7'
-            model_source:
-              type: string
-              example: 'huggingface'
-        message:
+        object:
           type: string
-          example: 'Processing model weights. Job created.'
-
-    ImageResponse:
-      type: object
-      properties:
+          enum:
+            - hardware
         id:
           type: string
-        model:
+          description: Unique identifier for the hardware configuration
+          examples:
+            - 2x_nvidia_a100_80gb_sxm
+        pricing:
+          $ref: '#/components/schemas/EndpointPricing'
+        specs:
+          $ref: '#/components/schemas/HardwareSpec'
+        availability:
+          $ref: '#/components/schemas/HardwareAvailability'
+        updated_at:
           type: string
-        object:
-          enum:
-            - list
-          example: 'list'
-        data:
-          type: array
-          items:
-            oneOf:
-              - $ref: '#/components/schemas/ImageResponseDataB64'
-              - $ref: '#/components/schemas/ImageResponseDataUrl'
-            discriminator:
-              propertyName: type
-      required:
-        - id
-        - model
-        - object
-        - data
+          format: date-time
+          description: Timestamp of when the hardware status was last updated
 
-    ImageResponseDataB64:
+    CreateEndpointRequest:
       type: object
-      required: [index, b64_json, type]
+      required:
+        - model
+        - hardware
+        - autoscaling
       properties:
-        index:
-          type: integer
-        b64_json:
+        display_name:
           type: string
-        type:
+          description: A human-readable name for the endpoint
+          examples:
+            - My Llama3 70b endpoint
+        model:
           type: string
-          enum: [b64_json]
-
-    ImageResponseDataUrl:
-      type: object
-      required: [index, url, type]
-      properties:
-        index:
-          type: integer
-        url:
+          description: The model to deploy on this endpoint
+          examples:
+            - meta-llama/Llama-3-8b-chat-hf
+        hardware:
           type: string
-        type:
+          description: The hardware configuration to use for this endpoint
+          examples:
+            - 1x_nvidia_a100_80gb_sxm
+        autoscaling:
+          $ref: '#/components/schemas/Autoscaling'
+          description: Configuration for automatic scaling of the endpoint
+        disable_prompt_cache:
+          type: boolean
+          description: Whether to disable the prompt cache for this endpoint
+          default: false
+        disable_speculative_decoding:
+          type: boolean
+          description: Whether to disable speculative decoding for this endpoint
+          default: false
+        state:
           type: string
-          enum: [url]
+          description: The desired state of the endpoint
+          enum:
+            - STARTED
+            - STOPPED
+          default: STARTED
+          example: STARTED
+        inactive_timeout:
+          type: integer
+          description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to null, omit or set to 0 to disable automatic timeout.
+          nullable: true
+          example: 60
+        availability_zone:
+          type: string
+          description: Create the endpoint in a specified availability zone (e.g., us-central-4b)
 
-    JobInfoSuccessResponse:
+    DedicatedEndpoint:
       type: object
+      description: Details about a dedicated endpoint deployment
       required:
+        - object
+        - id
+        - name
+        - display_name
+        - model
+        - hardware
         - type
-        - job_id
-        - status
-        - status_updates
-        - args
+        - owner
+        - state
+        - autoscaling
         - created_at
-        - updated_at
       properties:
+        object:
+          type: string
+          enum:
+            - endpoint
+          description: The type of object
+          example: endpoint
+        id:
+          type: string
+          description: Unique identifier for the endpoint
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+        name:
+          type: string
+          description: System name for the endpoint
+          example: devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1
+        display_name:
+          type: string
+          description: Human-readable name for the endpoint
+          example: My Llama3 70b endpoint
+        model:
+          type: string
+          description: The model deployed on this endpoint
+          example: meta-llama/Llama-3-8b-chat-hf
+        hardware:
+          type: string
+          description: The hardware configuration used for this endpoint
+          example: 1x_nvidia_a100_80gb_sxm
         type:
           type: string
-          example: 'model_upload'
-        job_id:
+          enum:
+            - dedicated
+          description: The type of endpoint
+          example: dedicated
+        owner:
           type: string
-          example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
-        status:
+          description: The owner of this endpoint
+          example: devuser
+        state:
           type: string
-          enum: ['Queued', 'Running', 'Complete', 'Failed']
-          example: 'Complete'
-        status_updates:
-          type: array
-          items:
-            type: object
-            required:
-              - status
-              - message
-              - timestamp
-            properties:
-              status:
-                type: string
-                example: 'Complete'
-              message:
-                type: string
-                example: 'Job is Complete'
-              timestamp:
-                type: string
-                format: date-time
-                example: '2025-03-11T22:36:12Z'
-        args:
-          type: object
-          properties:
-            description:
-              type: string
-              example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
-            modelName:
-              type: string
-              example: 'necolinehubner/Qwen2.5-72B-Instruct'
-            modelSource:
-              type: string
-              example: 'unsloth/Qwen2.5-72B-Instruct'
+          enum:
+            - PENDING
+            - STARTING
+            - STARTED
+            - STOPPING
+            - STOPPED
+            - ERROR
+          description: Current state of the endpoint
+          example: STARTED
+        autoscaling:
+          $ref: '#/components/schemas/Autoscaling'
+          description: Configuration for automatic scaling of the endpoint
         created_at:
           type: string
           format: date-time
-          example: '2025-03-11T22:05:43Z'
-        updated_at:
-          type: string
-          format: date-time
-          example: '2025-03-11T22:36:12Z'
+          description: Timestamp when the endpoint was created
+          example: 2025-02-04T10:43:55.405Z
 
-    JobsInfoSuccessResponse:
+    ListEndpoint:
       type: object
+      description: Details about an endpoint when listed via the list endpoint
       required:
-        - data
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/JobInfoSuccessResponse'
-
-    Pricing:
-      type: object
-      required: [hourly, input, output, base, finetune]
-      properties:
-        hourly:
-          type: number
-          example: 0
-        input:
-          type: number
-          example: 0.3
-        output:
-          type: number
-          example: 0.3
-        base:
-          type: number
-          example: 0
-        finetune:
-          type: number
-          example: 0
-
-    ToolsPart:
-      type: object
+        - id
+        - object
+        - name
+        - model
+        - type
+        - owner
+        - state
+        - created_at
       properties:
+        object:
+          type: string
+          enum:
+            - endpoint
+          description: The type of object
+          example: endpoint
+        id:
+          type: string
+          description: Unique identifier for the endpoint
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+        name:
+          type: string
+          description: System name for the endpoint
+          example: allenai/OLMo-7B
+        model:
+          type: string
+          description: The model deployed on this endpoint
+          example: allenai/OLMo-7B
         type:
           type: string
-          example: 'tool_type'
-        function:
-          type: object
+          enum:
+            - serverless
+            - dedicated
+          description: The type of endpoint
+          example: serverless
+        owner:
+          type: string
+          description: The owner of this endpoint
+          example: together
+        state:
+          type: string
+          enum:
+            - PENDING
+            - STARTING
+            - STARTED
+            - STOPPING
+            - STOPPED
+            - ERROR
+          description: Current state of the endpoint
+          example: STARTED
+        created_at:
+          type: string
+          format: date-time
+          description: Timestamp when the endpoint was created
+          example: 2024-02-28T21:34:35.444Z
+
+    DisplayorExecuteOutput:
+      properties:
+        data:
           properties:
-            description:
+            application/geo+json:
+              type: object
+            application/javascript:
               type: string
-              example: 'A description of the function.'
-            name:
+            application/json:
+              type: object
+            application/pdf:
+              format: byte
               type: string
-              example: 'function_name'
-            parameters:
+            application/vnd.vega.v5+json:
               type: object
-              additionalProperties: true
-              description: 'A map of parameter names to their values.'
-    ToolChoice:
-      type: object
-      required: [id, type, function, index]
-      properties:
-        # TODO: is this the right place for index?
-        index:
-          type: number
-        id:
-          type: string
+            application/vnd.vegalite.v4+json:
+              type: object
+            image/gif:
+              format: byte
+              type: string
+            image/jpeg:
+              format: byte
+              type: string
+            image/png:
+              format: byte
+              type: string
+            image/svg+xml:
+              type: string
+            text/html:
+              type: string
+            text/latex:
+              type: string
+            text/markdown:
+              type: string
+            text/plain:
+              type: string
+          type: object
         type:
+          enum:
+            - display_data
+            - execute_result
           type: string
-          enum: ['function']
-        function:
+      required:
+        - type
+        - data
+      title: DisplayorExecuteOutput
+
+    Error:
+      oneOf:
+        - type: string
+        - additionalProperties: true
           type: object
-          required: [name, arguments]
-          properties:
-            name:
-              type: string
-              example: 'function_name'
-            arguments:
-              type: string
+      title: Error
 
-    FileResponse:
-      type: object
-      required:
-        - id
-        - object
-        - created_at
-        - filename
-        - bytes
-        - purpose
-        - FileType
-        - Processed
-        - LineCount
+    ErrorOutput:
+      title: ErrorOutput
+      description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
       properties:
-        id:
-          type: string
-        object:
+        data:
           type: string
-          example: 'file'
-        created_at:
-          type: integer
-          example: 1715021438
-        filename:
+        type:
+          enum:
+            - error
           type: string
-          example: 'my_file.jsonl'
-        bytes:
-          type: integer
-          example: 2664
-        purpose:
-          $ref: '#/components/schemas/FilePurpose'
-        Processed:
-          type: boolean
-        FileType:
-          $ref: '#/components/schemas/FileType'
-        LineCount:
-          type: integer
-    FileList:
       required:
+        - type
         - data
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/FileResponse'
-    FileObject:
-      type: object
+
+    ExecuteRequest:
+      title: ExecuteRequest
+      required:
+        - language
+        - code
       properties:
-        object:
-          type: string
-        id:
-          type: string
-        filename:
+        code:
+          description: 'Code snippet to execute.'
+          example: "print('Hello, world!')"
           type: string
-        size:
-          type: integer
-    FilePurpose:
-      type: string
-      description: The purpose of the file
-      example: 'fine-tune'
-      enum:
-        - fine-tune
-        - eval
-        - eval-sample
-        - eval-output
-        - eval-summary
-        - batch-generated
-        - batch-api
-    FileType:
-      type: string
-      description: The type of the file
-      default: 'jsonl'
-      example: 'jsonl'
-      enum:
-        - 'csv'
-        - 'jsonl'
-        - 'parquet'
-    FileDeleteResponse:
-      type: object
-      properties:
-        id:
+        files:
+          description: Files to upload to the session. If present, files will be uploaded before executing the given code.
+          items:
+            properties:
+              content:
+                type: string
+              encoding:
+                description: Encoding of the file content. Use `string` for text files such as code, and `base64` for binary files, such as images.
+                enum:
+                  - string
+                  - base64
+                type: string
+              name:
+                type: string
+            required:
+              - name
+              - encoding
+              - content
+            type: object
+          type: array
+        language:
+          default: python
+          description: Programming language for the code to execute. Currently only supports Python, but more will be added.
+          enum:
+            - python
+        session_id:
+          description: Identifier of the current session. Used to make follow-up calls. Requests will return an error if the session does not belong to the caller or has expired.
+          example: ses_abcDEF123
+          nullable: false
           type: string
-        deleted:
-          type: boolean
-    FinetuneResponse:
+
+    ExecuteResponse:
+      title: ExecuteResponse
       type: object
-      required:
-        - id
-        - status
-      properties:
-        id:
-          type: string
-          format: uuid
-        training_file:
-          type: string
-        validation_file:
-          type: string
-        model:
-          type: string
-        model_output_name:
-          type: string
-        model_output_path:
-          type: string
-        trainingfile_numlines:
-          type: integer
-        trainingfile_size:
-          type: integer
-        created_at:
-          type: string
-          format: date-time
-        updated_at:
-          type: string
-          format: date-time
-        n_epochs:
-          type: integer
-        n_checkpoints:
-          type: integer
-        n_evals:
-          type: integer
-        batch_size:
-          oneOf:
-            - type: integer
-            - type: string
-              enum:
-                - max
-          default: 'max'
-        learning_rate:
-          type: number
-        lr_scheduler:
-          type: object
-          $ref: '#/components/schemas/LRScheduler'
-        warmup_ratio:
-          type: number
-        max_grad_norm:
-          type: number
-          format: float
-        weight_decay:
-          type: number
-          format: float
-        eval_steps:
-          type: integer
-        train_on_inputs:
-          oneOf:
-            - type: boolean
-            - type: string
-              enum:
-                - auto
-          default: auto
-        training_method:
+      description: 'The result of the execution. If successful, `data` contains the result and `errors` will be null. If unsuccessful, `data` will be null and `errors` will contain the errors.'
+      oneOf:
+        - title: SuccessfulExecution
           type: object
-          oneOf:
-            - $ref: '#/components/schemas/TrainingMethodSFT'
-            - $ref: '#/components/schemas/TrainingMethodDPO'
-        training_type:
+          required: [data, errors]
+          properties:
+            errors:
+              type: 'null'
+            data:
+              type: object
+              nullable: false
+              required: [session_id, outputs]
+              properties:
+                outputs:
+                  type: array
+                  items:
+                    discriminator:
+                      propertyName: type
+                    oneOf:
+                      - title: StreamOutput
+                        description: Outputs that were printed to stdout or stderr
+                        type: object
+                        required: [type, data]
+                        properties:
+                          type:
+                            enum:
+                              - stdout
+                              - stderr
+                            type: string
+                          data:
+                            type: string
+                      - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
+                        properties:
+                          data:
+                            type: string
+                          type:
+                            enum:
+                              - error
+                            type: string
+                        required:
+                          - type
+                          - data
+                        title: ErrorOutput
+                      - properties:
+                          data:
+                            properties:
+                              application/geo+json:
+                                type: object
+                                additionalProperties: true
+                              application/javascript:
+                                type: string
+                              application/json:
+                                type: object
+                                additionalProperties: true
+                              application/pdf:
+                                format: byte
+                                type: string
+                              application/vnd.vega.v5+json:
+                                type: object
+                                additionalProperties: true
+                              application/vnd.vegalite.v4+json:
+                                type: object
+                                additionalProperties: true
+                              image/gif:
+                                format: byte
+                                type: string
+                              image/jpeg:
+                                format: byte
+                                type: string
+                              image/png:
+                                format: byte
+                                type: string
+                              image/svg+xml:
+                                type: string
+                              text/html:
+                                type: string
+                              text/latex:
+                                type: string
+                              text/markdown:
+                                type: string
+                              text/plain:
+                                type: string
+                            type: object
+                          type:
+                            enum:
+                              - display_data
+                              - execute_result
+                            type: string
+                        required:
+                          - type
+                          - data
+                        title: DisplayorExecuteOutput
+                    title: InterpreterOutput
+                session_id:
+                  type: string
+                  description: Identifier of the current session. Used to make follow-up calls.
+                  example: ses_abcDEF123
+                  nullable: false
+                status:
+                  type: string
+                  enum:
+                    - success
+                  description: Status of the execution. Currently only supports success.
+        - title: FailedExecution
           type: object
-          oneOf:
-            - $ref: '#/components/schemas/FullTrainingType'
-            - $ref: '#/components/schemas/LoRATrainingType'
-        status:
-          $ref: '#/components/schemas/FinetuneJobStatus'
-        job_id:
-          type: string
-        events:
-          type: array
-          items:
-            $ref: '#/components/schemas/FineTuneEvent'
-        token_count:
-          type: integer
-        param_count:
-          type: integer
-        total_price:
-          type: integer
-        epochs_completed:
-          type: integer
-        queue_depth:
-          type: integer
-        wandb_project_name:
-          type: string
-        wandb_url:
-          type: string
-        from_checkpoint:
-          type: string
-        from_hf_model:
-          type: string
-        hf_model_revision:
-          type: string
-        progress:
-          $ref: '#/components/schemas/FineTuneProgress'
-
-    FinetuneResponseTruncated:
-      type: object
-      description: A truncated version of the fine-tune response, used for POST /fine-tunes, GET /fine-tunes and POST /fine-tunes/{id}/cancel endpoints
-      required:
-        - id
-        - status
-        - created_at
-        - updated_at
-      example:
-        id: ft-01234567890123456789
-        status: completed
-        created_at: '2023-05-17T17:35:45.123Z'
-        updated_at: '2023-05-17T18:46:23.456Z'
-        user_id: 'user_01234567890123456789'
-        owner_address: 'user@example.com'
-        total_price: 1500
-        token_count: 850000
-        events: [] # FineTuneTruncated object has no events
-        model: 'meta-llama/Llama-2-7b-hf'
-        model_output_name: 'mynamespace/meta-llama/Llama-2-7b-hf-32162631'
-        n_epochs: 3
-        training_file: 'file-01234567890123456789'
-        wandb_project_name: 'my-finetune-project'
-      properties:
-        id:
-          type: string
-          description: Unique identifier for the fine-tune job
-        status:
-          $ref: '#/components/schemas/FinetuneJobStatus'
-        created_at:
-          type: string
-          format: date-time
-          description: Creation timestamp of the fine-tune job
-        updated_at:
-          type: string
-          format: date-time
-          description: Last update timestamp of the fine-tune job
-        user_id:
-          type: string
-          description: Identifier for the user who created the job
-        owner_address:
-          type: string
-          description: Owner address information
-        total_price:
-          type: integer
-          description: Total price for the fine-tuning job
-        token_count:
-          type: integer
-          description: Count of tokens processed
-        events:
-          type: array
-          items:
-            $ref: '#/components/schemas/FineTuneEvent'
-          description: Events related to this fine-tune job
-        # FineTuneUserParams fields
-        training_file:
-          type: string
-          description: File-ID of the training file
-        validation_file:
-          type: string
-          description: File-ID of the validation file
-        model:
-          type: string
-          description: Base model used for fine-tuning
-        model_output_name:
-          type: string
-        suffix:
-          type: string
-          description: Suffix added to the fine-tuned model name
-        n_epochs:
-          type: integer
-          description: Number of training epochs
-        n_evals:
-          type: integer
-          description: Number of evaluations during training
-        n_checkpoints:
-          type: integer
-          description: Number of checkpoints saved during training
-        batch_size:
-          type: integer
-          description: Batch size used for training
-        training_type:
-          oneOf:
-            - $ref: '#/components/schemas/FullTrainingType'
-            - $ref: '#/components/schemas/LoRATrainingType'
-          description: Type of training used (full or LoRA)
-        training_method:
-          oneOf:
-            - $ref: '#/components/schemas/TrainingMethodSFT'
-            - $ref: '#/components/schemas/TrainingMethodDPO'
-          description: Method of training used
-        learning_rate:
-          type: number
-          format: float
-          description: Learning rate used for training
-        lr_scheduler:
-          $ref: '#/components/schemas/LRScheduler'
-          description: Learning rate scheduler configuration
-        warmup_ratio:
-          type: number
-          format: float
-          description: Ratio of warmup steps
-        max_grad_norm:
-          type: number
-          format: float
-          description: Maximum gradient norm for clipping
-        weight_decay:
-          type: number
-          format: float
-          description: Weight decay value used
-        wandb_project_name:
-          type: string
-          description: Weights & Biases project name
-        wandb_name:
-          type: string
-          description: Weights & Biases run name
-        from_checkpoint:
-          type: string
-          description: Checkpoint used to continue training
-        from_hf_model:
-          type: string
-          description: Hugging Face Hub repo to start training from
-        hf_model_revision:
-          type: string
-          description: The revision of the Hugging Face Hub model to continue training from
-        progress:
-          $ref: '#/components/schemas/FineTuneProgress'
-          description: Progress information for the fine-tuning job
-    FinetuneDeleteResponse:
-      type: object
-      properties:
-        message:
-          type: string
-          description: Message indicating the result of the deletion
-    FinetuneJobStatus:
-      type: string
-      enum:
-        - pending
-        - queued
-        - running
-        - compressing
-        - uploading
-        - cancel_requested
-        - cancelled
-        - error
-        - completed
+          required: [data, errors]
+          properties:
+            data:
+              type: 'null'
+            errors:
+              type: array
+              items:
+                title: Error
+                oneOf:
+                  - type: string
+                  - type: object
+                    additionalProperties: true
 
-    FinetuneEventLevels:
-      type: string
-      enum:
-        - null
-        - info
-        - warning
-        - error
-        - legacy_info
-        - legacy_iwarning
-        - legacy_ierror
-    FinetuneEventType:
-      type: string
-      enum:
-        - job_pending
-        - job_start
-        - job_stopped
-        - model_downloading
-        - model_download_complete
-        - training_data_downloading
-        - training_data_download_complete
-        - validation_data_downloading
-        - validation_data_download_complete
-        - wandb_init
-        - training_start
-        - checkpoint_save
-        - billing_limit
-        - epoch_complete
-        - training_complete
-        - model_compressing
-        - model_compression_complete
-        - model_uploading
-        - model_upload_complete
-        - job_complete
-        - job_error
-        - cancel_requested
-        - job_restarted
-        - refund
-        - warning
+    InterpreterOutput:
+      discriminator:
+        propertyName: type
+      oneOf:
+        - description: Outputs that were printed to stdout or stderr
+          properties:
+            data:
+              type: string
+            type:
+              enum:
+                - stdout
+                - stderr
+              type: string
+          required:
+            - type
+            - data
+          title: StreamOutput
+        - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
+          properties:
+            data:
+              type: string
+            type:
+              enum:
+                - error
+              type: string
+          required:
+            - type
+            - data
+          title: ErrorOutput
+        - properties:
+            data:
+              properties:
+                application/geo+json:
+                  type: object
+                application/javascript:
+                  type: string
+                application/json:
+                  type: object
+                application/pdf:
+                  format: byte
+                  type: string
+                application/vnd.vega.v5+json:
+                  type: object
+                application/vnd.vegalite.v4+json:
+                  type: object
+                image/gif:
+                  format: byte
+                  type: string
+                image/jpeg:
+                  format: byte
+                  type: string
+                image/png:
+                  format: byte
+                  type: string
+                image/svg+xml:
+                  type: string
+                text/html:
+                  type: string
+                text/latex:
+                  type: string
+                text/markdown:
+                  type: string
+                text/plain:
+                  type: string
+              type: object
+            type:
+              enum:
+                - display_data
+                - execute_result
+              type: string
+          required:
+            - type
+            - data
+          title: DisplayorExecuteOutput
+      title: InterpreterOutput
 
-    FinetuneTruncatedList:
-      type: object
-      required:
-        - data
+    Response:
       properties:
-        data:
-          type: array
+        errors:
           items:
-            $ref: '#/components/schemas/FinetuneResponseTruncated'
-    FinetuneListEvents:
+            oneOf:
+              - type: string
+              - additionalProperties: true
+                type: object
+            title: Error
+          type: array
+      title: Response
       type: object
-      required:
-        - data
+
+    SessionListResponse:
+      allOf:
+        - properties:
+            errors:
+              items:
+                oneOf:
+                  - type: string
+                  - additionalProperties: true
+                    type: object
+                title: Error
+              type: array
+          title: Response
+          type: object
+        - properties:
+            data:
+              properties:
+                sessions:
+                  items:
+                    properties:
+                      execute_count:
+                        type: integer
+                      expires_at:
+                        format: date-time
+                        type: string
+                      id:
+                        description: Session Identifier. Used to make follow-up calls.
+                        example: ses_abcDEF123
+                        type: string
+                      last_execute_at:
+                        format: date-time
+                        type: string
+                      started_at:
+                        format: date-time
+                        type: string
+                    required:
+                      - execute_count
+                      - expires_at
+                      - id
+                      - last_execute_at
+                      - started_at
+                    type: object
+                  type: array
+              required:
+                - sessions
+          type: object
+      title: SessionListResponse
+      type: object
+
+    StreamOutput:
+      description: Outputs that were printed to stdout or stderr
       properties:
         data:
-          type: array
-          items:
-            $ref: '#/components/schemas/FineTuneEvent'
-    FineTuneEvent:
-      type: object
+          type: string
+        type:
+          enum:
+            - stdout
+            - stderr
+          type: string
       required:
-        - object
-        - created_at
-        - message
         - type
-        - param_count
-        - token_count
-        - total_steps
-        - wandb_url
-        - step
-        - checkpoint_path
-        - model_path
-        - training_offset
-        - hash
+        - data
+      title: StreamOutput
+
+    CreateBatchRequest:
+      type: object
+      required: [endpoint, input_file_id]
       properties:
-        object:
-          type: string
-          enum: [fine-tune-event]
-        created_at:
-          type: string
-        level:
-          anyOf:
-            - $ref: '#/components/schemas/FinetuneEventLevels'
-        message:
-          type: string
-        type:
-          $ref: '#/components/schemas/FinetuneEventType'
-        param_count:
-          type: integer
-        token_count:
-          type: integer
-        total_steps:
-          type: integer
-        wandb_url:
+        endpoint:
           type: string
-        step:
-          type: integer
-        checkpoint_path:
+          description: The endpoint to use for batch processing
+          example: '/v1/chat/completions'
+        input_file_id:
           type: string
-        model_path:
+          description: ID of the uploaded input file containing batch requests
+          example: 'file-abc123def456ghi789'
+        completion_window:
           type: string
-        training_offset:
+          description: Time window for batch completion (optional)
+          example: '24h'
+        priority:
           type: integer
-        hash:
+          description: Priority for batch processing (optional)
+          example: 1
+        model_id:
           type: string
-    FineTuneProgress:
+          description: 'Model to use for processing batch requests'
+          example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
+    BatchErrorResponse:
       type: object
-      description: Progress information for a fine-tuning job
-      required:
-        - estimate_available
-        - seconds_remaining
       properties:
-        estimate_available:
-          type: boolean
-          description: Whether time estimate is available
-        seconds_remaining:
-          type: integer
-          description: Estimated time remaining in seconds for the fine-tuning job to next state
-    FinetuneListCheckpoints:
+        error:
+          type: string
+    BatchJobWithWarning:
       type: object
-      required:
-        - data
       properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/FineTuneCheckpoint'
-    FineTuneCheckpoint:
+        job:
+          $ref: '#/components/schemas/BatchJob'
+        warning:
+          type: string
+    BatchJob:
       type: object
-      required:
-        - step
-        - path
-        - created_at
-        - checkpoint_type
       properties:
-        step:
+        id:
+          type: string
+          format: uuid
+          example: '01234567-8901-2345-6789-012345678901'
+        user_id:
+          type: string
+          example: 'user_789xyz012'
+        input_file_id:
+          type: string
+          example: 'file-input123abc456def'
+        file_size_bytes:
           type: integer
+          format: int64
+          example: 1048576
+          description: 'Size of input file in bytes'
+        status:
+          $ref: '#/components/schemas/BatchJobStatus'
+        job_deadline:
+          type: string
+          format: date-time
+          example: '2024-01-15T15:30:00Z'
         created_at:
           type: string
-        path:
+          format: date-time
+          example: '2024-01-15T14:30:00Z'
+        endpoint:
           type: string
-        checkpoint_type:
+          example: '/v1/chat/completions'
+        progress:
+          type: number
+          format: float64
+          example: 75.0
+          description: 'Completion progress (0.0 to 100)'
+        model_id:
+          type: string
+          example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
+          description: 'Model used for processing requests'
+        output_file_id:
+          type: string
+          example: 'file-output789xyz012ghi'
+        error_file_id:
+          type: string
+          example: 'file-errors456def789jkl'
+        error:
           type: string
+        completed_at:
+          type: string
+          format: date-time
+          example: '2024-01-15T15:45:30Z'
+    BatchJobStatus:
+      type: string
+      enum:
+        - VALIDATING
+        - IN_PROGRESS
+        - COMPLETED
+        - FAILED
+        - EXPIRED
+        - CANCELLED
+      example: 'IN_PROGRESS'
+      description: 'Current status of the batch job'
 
-    FullTrainingType:
+    EvaluationTypedRequest:
       type: object
-      properties:
-        type:
-          type: string
-          enum: ['Full']
       required:
         - type
-    LoRATrainingType:
-      type: object
+        - parameters
       properties:
         type:
           type: string
-          enum: ['Lora']
-        lora_r:
-          type: integer
-        lora_alpha:
-          type: integer
-        lora_dropout:
-          type: number
-          format: float
-          default: 0.0
-        lora_trainable_modules:
-          type: string
-          default: 'all-linear'
-      required:
-        - type
-        - lora_r
-        - lora_alpha
+          enum: [classify, score, compare]
+          description: The type of evaluation to perform
+          example: 'classify'
+        parameters:
+          oneOf:
+            - $ref: '#/components/schemas/EvaluationClassifyParameters'
+            - $ref: '#/components/schemas/EvaluationScoreParameters'
+            - $ref: '#/components/schemas/EvaluationCompareParameters'
+          description: Type-specific parameters for the evaluation
 
-    TrainingMethodSFT:
+    EvaluationClassifyParameters:
       type: object
-      properties:
-        method:
-          type: string
-          enum: ['sft']
-        train_on_inputs:
-          oneOf:
-            - type: boolean
-            - type: string
-              enum:
-                - auto
-          type: boolean
-          default: auto
-          description: Whether to mask the user messages in conversational data or prompts in instruction data.
       required:
-        - method
-        - train_on_inputs
-    TrainingMethodDPO:
-      type: object
+        - judge
+        - labels
+        - pass_labels
+        - input_data_file_path
       properties:
-        method:
+        judge:
+          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
+        labels:
+          type: array
+          items:
+            type: string
+          minItems: 2
+          description: List of possible classification labels
+          example: ['yes', 'no']
+        pass_labels:
+          type: array
+          items:
+            type: string
+          minItems: 1
+          description: List of labels that are considered passing
+          example: ['yes']
+        model_to_evaluate:
+          $ref: '#/components/schemas/EvaluationModelOrString'
+        input_data_file_path:
           type: string
-          enum: ['dpo']
-        dpo_beta:
-          type: number
-          format: float
-          default: 0.1
-        rpo_alpha:
-          type: number
-          format: float
-          default: 0.0
-        dpo_normalize_logratios_by_length:
-          type: boolean
-          default: false
-        dpo_reference_free:
-          type: boolean
-          default: false
-        simpo_gamma:
-          type: number
-          format: float
-          default: 0.0
-      required:
-        - method
+          description: Data file ID
+          example: 'file-1234-aefd'
 
-    LRScheduler:
+    EvaluationScoreParameters:
       type: object
-      properties:
-        lr_scheduler_type:
-          type: string
-          enum:
-            - linear
-            - cosine
-        lr_scheduler_args:
-          oneOf:
-            - $ref: '#/components/schemas/LinearLRSchedulerArgs'
-            - $ref: '#/components/schemas/CosineLRSchedulerArgs'
       required:
-        - lr_scheduler_type
-    CosineLRSchedulerArgs:
-      type: object
+        - judge
+        - min_score
+        - max_score
+        - pass_threshold
+        - input_data_file_path
       properties:
-        min_lr_ratio:
+        judge:
+          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
+        min_score:
           type: number
           format: float
-          default: 0.0
-          description: The ratio of the final learning rate to the peak learning rate
-        num_cycles:
+          example: 0.0
+          description: Minimum possible score
+        max_score:
           type: number
           format: float
-          default: 0.5
-          description: Number or fraction of cycles for the cosine learning rate scheduler
-      required:
-        - min_lr_ratio
-        - num_cycles
-    LinearLRSchedulerArgs:
-      type: object
-      properties:
-        min_lr_ratio:
+          example: 10.0
+          description: Maximum possible score
+        pass_threshold:
           type: number
           format: float
-          default: 0.0
-          description: The ratio of the final learning rate to the peak learning rate
+          example: 7.0
+          description: Score threshold for passing
+        model_to_evaluate:
+          $ref: '#/components/schemas/EvaluationModelOrString'
+        input_data_file_path:
+          type: string
+          example: 'file-01234567890123456789'
+          description: Data file ID
 
-    Autoscaling:
+    EvaluationCompareParameters:
       type: object
-      description: Configuration for automatic scaling of replicas based on demand.
       required:
-        - min_replicas
-        - max_replicas
+        - judge
+        - input_data_file_path
       properties:
-        min_replicas:
-          type: integer
-          format: int32
-          description: The minimum number of replicas to maintain, even when there is no load
-          examples:
-            - 2
-        max_replicas:
-          type: integer
-          format: int32
-          description: The maximum number of replicas to scale up to under load
-          examples:
-            - 5
+        judge:
+          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
+        model_a:
+          $ref: '#/components/schemas/EvaluationModelOrString'
+        model_b:
+          $ref: '#/components/schemas/EvaluationModelOrString'
+        input_data_file_path:
+          type: string
+          description: Data file name
 
-    HardwareSpec:
+    EvaluationJudgeModelConfig:
       type: object
-      description: Detailed specifications of a hardware configuration
       required:
-        - gpu_type
-        - gpu_link
-        - gpu_memory
-        - gpu_count
+        - model
+        - system_template
+        - model_source
       properties:
-        gpu_type:
+        model:
           type: string
-          description: The type/model of GPU
-          examples:
-            - a100-80gb
-        gpu_link:
+          description: Name of the judge model
+          example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
+        system_template:
+          type: string
+          description: System prompt template for the judge
+          example: 'Imagine you are a helpful assistant'
+        model_source:
+          type: string
+          description: "Source of the judge model."
+          enum: [serverless, dedicated, external]
+        external_api_token:
+          type: string
+          description: "Bearer/API token for external judge models."
+        external_base_url:
           type: string
-          description: The GPU interconnect technology
-          examples:
-            - sxm
-        gpu_memory:
-          type: number
-          format: float
-          description: Amount of GPU memory in GB
-          examples:
-            - 80
-        gpu_count:
-          type: integer
-          format: int32
-          description: Number of GPUs in this configuration
-          examples:
-            - 2
+          description: "Base URL for external judge models. Must be OpenAI-compatible base URL."
 
-    EndpointPricing:
+    EvaluationModelOrString:
+      oneOf:
+        - type: string
+          description: Field name in the input data
+        - $ref: '#/components/schemas/EvaluationModelRequest'
+
+    EvaluationModelRequest:
       type: object
-      description: Pricing details for using an endpoint
       required:
-        - cents_per_minute
+        - model
+        - max_tokens
+        - temperature
+        - system_template
+        - input_template
+        - model_source
       properties:
-        cents_per_minute:
+        model:
+          type: string
+          description: Name of the model to evaluate
+          example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
+        max_tokens:
+          type: integer
+          minimum: 1
+          description: Maximum number of tokens to generate
+          example: 512
+        temperature:
           type: number
           format: float
-          description: Cost per minute of endpoint uptime in cents
-          examples:
-            - 5.42
+          minimum: 0
+          maximum: 2
+          description: Sampling temperature
+          example: 0.7
+        system_template:
+          type: string
+          description: System prompt template
+          example: 'Imagine you are helpful assistant'
+        input_template:
+          type: string
+          description: Input prompt template
+          example: 'Please classify {{prompt}} based on the labels below'
+        model_source:
+          type: string
+          description: "Source of the model."
+          enum: [serverless, dedicated, external]
+        external_api_token:
+          type: string
+          description: "Bearer/API token for external models."
+        external_base_url:
+          type: string
+          description: "Base URL for external models. Must be OpenAI-compatible base URL"
 
-    HardwareAvailability:
+    EvaluationResponse:
       type: object
-      description: Indicates the current availability status of a hardware configuration
-      required:
-        - status
       properties:
+        workflow_id:
+          type: string
+          description: The ID of the created evaluation job
+          example: 'eval-1234-1244513'
         status:
           type: string
-          description: The availability status of the hardware configuration
-          enum:
-            - available
-            - unavailable
-            - insufficient
+          enum: [pending]
+          description: Initial status of the job
 
-    HardwareWithStatus:
+    EvaluationJob:
       type: object
-      description: Hardware configuration details with optional availability status
-      required:
-        - object
-        - id
-        - pricing
-        - specs
-        - updated_at
       properties:
-        object:
+        workflow_id:
           type: string
-          enum:
-            - hardware
-        id:
+          description: The evaluation job ID
+          example: 'eval-1234aedf'
+        type:
           type: string
-          description: Unique identifier for the hardware configuration
-          examples:
-            - 2x_nvidia_a100_80gb_sxm
-        pricing:
-          $ref: '#/components/schemas/EndpointPricing'
-        specs:
-          $ref: '#/components/schemas/HardwareSpec'
-        availability:
-          $ref: '#/components/schemas/HardwareAvailability'
+          enum: [classify, score, compare]
+          description: The type of evaluation
+          example: classify
+        owner_id:
+          type: string
+          description: ID of the job owner (admin only)
+        status:
+          type: string
+          enum: [pending, queued, running, completed, error, user_error]
+          description: Current status of the job
+          example: completed
+        status_updates:
+          type: array
+          items:
+            $ref: '#/components/schemas/EvaluationJobStatusUpdate'
+          description: History of status updates (admin only)
+        parameters:
+          type: object
+          description: The parameters used for this evaluation
+          additionalProperties: true
+        created_at:
+          type: string
+          format: date-time
+          description: When the job was created
+          example: '2025-07-23T17:10:04.837888Z'
         updated_at:
           type: string
           format: date-time
-          description: Timestamp of when the hardware status was last updated
+          description: When the job was last updated
+          example: '2025-07-23T17:10:04.837888Z'
+        results:
+          oneOf:
+            - $ref: '#/components/schemas/EvaluationClassifyResults'
+            - $ref: '#/components/schemas/EvaluationScoreResults'
+            - $ref: '#/components/schemas/EvaluationCompareResults'
+            - type: object
+              properties:
+                error:
+                  type: string
+          nullable: true
+          description: Results of the evaluation (when completed)
 
-    CreateEndpointRequest:
+    EvaluationJobStatusUpdate:
       type: object
-      required:
-        - model
-        - hardware
-        - autoscaling
       properties:
-        display_name:
+        status:
           type: string
-          description: A human-readable name for the endpoint
-          examples:
-            - My Llama3 70b endpoint
-        model:
+          description: The status at this update
+          example: pending
+        message:
           type: string
-          description: The model to deploy on this endpoint
-          examples:
-            - meta-llama/Llama-3-8b-chat-hf
-        hardware:
+          description: Additional message for this update
+          example: Job is pending evaluation
+        timestamp:
           type: string
-          description: The hardware configuration to use for this endpoint
-          examples:
-            - 1x_nvidia_a100_80gb_sxm
-        autoscaling:
-          $ref: '#/components/schemas/Autoscaling'
-          description: Configuration for automatic scaling of the endpoint
-        disable_prompt_cache:
-          type: boolean
-          description: Whether to disable the prompt cache for this endpoint
-          default: false
-        disable_speculative_decoding:
-          type: boolean
-          description: Whether to disable speculative decoding for this endpoint
-          default: false
-        state:
+          format: date-time
+          description: When this update occurred
+          example: '2025-07-23T17:10:04.837888Z'
+
+    EvaluationClassifyResults:
+      type: object
+      properties:
+        generation_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed generations.
+          example: 0
+        judge_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed judge generations
+          example: 0
+        invalid_label_count:
+          type: number
+          format: float
+          nullable: true
+          description: Number of invalid labels
+          example: 0
+        result_file_id:
           type: string
-          description: The desired state of the endpoint
-          enum:
-            - STARTED
-            - STOPPED
-          default: STARTED
-          example: STARTED
-        inactive_timeout:
-          type: integer
-          description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to null, omit or set to 0 to disable automatic timeout.
+          description: Data File ID
+          example: file-1234-aefd
+        pass_percentage:
+          type: number
+          format: integer
+          nullable: true
+          description: Pecentage of pass labels.
+          example: 10
+        label_counts:
+          type: string
+          description: JSON string representing label counts
+          example: '{"yes": 10, "no": 0}'
+
+    EvaluationScoreResults:
+      type: object
+      properties:
+        aggregated_scores:
+          type: object
+          properties:
+            mean_score:
+              type: number
+              format: float
+            std_score:
+              type: number
+              format: float
+            pass_percentage:
+              type: number
+              format: float
+        generation_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed generations.
+          example: 0
+        judge_fail_count:
+          type: number
+          format: integer
           nullable: true
-          example: 60
-        availability_zone:
+          description: Number of failed judge generations
+          example: 0
+        invalid_score_count:
+          type: number
+          format: integer
+          description: number of invalid scores generated from model
+        failed_samples:
+          type: number
+          format: integer
+          description: number of failed samples generated from model
+        result_file_id:
           type: string
-          description: Create the endpoint in a specified availability zone (e.g., us-central-4b)
+          description: Data File ID
+          example: file-1234-aefd
 
-    DedicatedEndpoint:
+    EvaluationCompareResults:
       type: object
-      description: Details about a dedicated endpoint deployment
-      required:
-        - object
-        - id
-        - name
-        - display_name
-        - model
-        - hardware
-        - type
-        - owner
-        - state
-        - autoscaling
-        - created_at
       properties:
-        object:
-          type: string
-          enum:
-            - endpoint
-          description: The type of object
-          example: endpoint
-        id:
-          type: string
-          description: Unique identifier for the endpoint
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
-        name:
-          type: string
-          description: System name for the endpoint
-          example: devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1
-        display_name:
-          type: string
-          description: Human-readable name for the endpoint
-          example: My Llama3 70b endpoint
-        model:
-          type: string
-          description: The model deployed on this endpoint
-          example: meta-llama/Llama-3-8b-chat-hf
-        hardware:
-          type: string
-          description: The hardware configuration used for this endpoint
-          example: 1x_nvidia_a100_80gb_sxm
-        type:
-          type: string
-          enum:
-            - dedicated
-          description: The type of endpoint
-          example: dedicated
-        owner:
-          type: string
-          description: The owner of this endpoint
-          example: devuser
-        state:
-          type: string
-          enum:
-            - PENDING
-            - STARTING
-            - STARTED
-            - STOPPING
-            - STOPPED
-            - ERROR
-          description: Current state of the endpoint
-          example: STARTED
-        autoscaling:
-          $ref: '#/components/schemas/Autoscaling'
-          description: Configuration for automatic scaling of the endpoint
-        created_at:
+        num_samples:
+          type: integer
+          description: Total number of samples compared
+        A_wins:
+          type: integer
+          description: Number of times model A won
+        B_wins:
+          type: integer
+          description: Number of times model B won
+        Ties:
+          type: integer
+          description: Number of ties
+        generation_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed generations.
+          example: 0
+        judge_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed judge generations
+          example: 0
+        result_file_id:
           type: string
-          format: date-time
-          description: Timestamp when the endpoint was created
-          example: 2025-02-04T10:43:55.405Z
+          description: Data File ID
 
-    ListEndpoint:
+    AudioFileBinary:
+      type: string
+      format: binary
+      description: Audio file to transcribe
+
+    AudioFileUrl:
+      type: string
+      format: uri
+      description: Public HTTPS URL to audio file
+
+    CreateVideoBody:
+      title: Create video request
+      description: Parameters for creating a new video generation job.
       type: object
-      description: Details about an endpoint when listed via the list endpoint
       required:
-        - id
-        - object
-        - name
         - model
-        - type
-        - owner
-        - state
-        - created_at
       properties:
-        object:
-          type: string
-          enum:
-            - endpoint
-          description: The type of object
-          example: endpoint
-        id:
-          type: string
-          description: Unique identifier for the endpoint
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
-        name:
-          type: string
-          description: System name for the endpoint
-          example: allenai/OLMo-7B
         model:
           type: string
-          description: The model deployed on this endpoint
-          example: allenai/OLMo-7B
-        type:
-          type: string
-          enum:
-            - serverless
-            - dedicated
-          description: The type of endpoint
-          example: serverless
-        owner:
-          type: string
-          description: The owner of this endpoint
-          example: together
-        state:
-          type: string
-          enum:
-            - PENDING
-            - STARTING
-            - STARTED
-            - STOPPING
-            - STOPPED
-            - ERROR
-          description: Current state of the endpoint
-          example: STARTED
-        created_at:
+          description: The model to be used for the video creation request.
+        prompt:
           type: string
-          format: date-time
-          description: Timestamp when the endpoint was created
-          example: 2024-02-28T21:34:35.444Z
-
-    DisplayorExecuteOutput:
-      properties:
-        data:
-          properties:
-            application/geo+json:
-              type: object
-            application/javascript:
-              type: string
-            application/json:
-              type: object
-            application/pdf:
-              format: byte
-              type: string
-            application/vnd.vega.v5+json:
-              type: object
-            application/vnd.vegalite.v4+json:
-              type: object
-            image/gif:
-              format: byte
-              type: string
-            image/jpeg:
-              format: byte
-              type: string
-            image/png:
-              format: byte
-              type: string
-            image/svg+xml:
-              type: string
-            text/html:
-              type: string
-            text/latex:
-              type: string
-            text/markdown:
-              type: string
-            text/plain:
-              type: string
-          type: object
-        type:
-          enum:
-            - display_data
-            - execute_result
+          maxLength: 32000
+          minLength: 1
+          description: Text prompt that describes the video to generate.
+        height:
+          type: integer
+        width:
+          type: integer
+        seconds:
           type: string
-      required:
-        - type
-        - data
-      title: DisplayorExecuteOutput
-
-    Error:
-      oneOf:
-        - type: string
-        - additionalProperties: true
-          type: object
-      title: Error
+          description: Clip duration in seconds.
+        fps:
+          type: integer
+          description: Frames per second. Defaults to 24.
+        steps:
+          type: integer
+          minimum: 10
+          maximum: 50
+          description: The number of denoising steps the model performs during video generation. More steps typically result in higher quality output but require longer processing time.
+        seed:
+          type: integer
+          description: Seed to use in initializing the video generation.  Using the same seed allows deterministic video generation.  If not provided a random seed is generated for each request.
+        guidance_scale:
+          type: integer
+          description: Controls how closely the video generation follows your prompt. Higher values make the model adhere more strictly to your text description, while lower values allow more creative freedom. guidence_scale affects both visual content and temporal consistency.Recommended range is 6.0-10.0 for most video models. Values above 12 may cause over-guidance artifacts or unnatural motion patterns.
+        output_format:
+          $ref: '#/components/schemas/VideoOutputFormat'
+          description: Specifies the format of the output video. Defaults to MP4.
+        output_quality:
+          type: integer
+          description: Compression quality. Defaults to 20.
+        negative_prompt:
+          type: string
+          description: Similar to prompt, but specifies what to avoid instead of what to include
+        frame_images:
+          description: Array of images to guide video generation, similar to keyframes.
+          example:
+            - [
+              {
+                "input_image": "aac49721-1964-481a-ae78-8a4e29b91402",
+                "frame": 0
+              },
+              {
+                "input_image": "c00abf5f-6cdb-4642-a01d-1bfff7bc3cf7",
+                "frame": 48
+              },
+              {
+                "input_image": "3ad204c3-a9de-4963-8a1a-c3911e3afafe",
+                "frame": "last"
+              }
+            ]
+          type: array
+          items:
+            $ref: '#/components/schemas/VideoFrameImageInput'
+        reference_images:
+          description: Unlike frame_images which constrain specific timeline positions, reference images guide the general appearance that should appear consistently across the video.
+          type: array
+          items:
+            type: string
+    VideoStatus:
+      description: Current lifecycle status of the video job.
+      type: string
+      enum:
+        - in_progress
+        - completed
+        - failed
 
-    ErrorOutput:
-      title: ErrorOutput
-      description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
+    VideoFrameImageInput:
+      type: object
+      required: ['input_image']
       properties:
-        data:
-          type: string
-        type:
-          enum:
-            - error
+        input_image:
           type: string
-      required:
-        - type
-        - data
+          description: URL path to hosted image that is used for a frame
+        frame:
+          description: |
+            Optional param to specify where to insert the frame. If this is omitted, the following heuristics are applied:
+            - frame_images size is one, frame is first.
+            - If size is two, frames are first and last.
+            - If size is larger, frames are first, last and evenly spaced between.
+          anyOf:
+            - type: number
+            - type: string
+              enum:
+                - first
+                - last
 
-    ExecuteRequest:
-      title: ExecuteRequest
-      required:
-        - language
-        - code
+    VideoOutputFormat:
+      type: string
+      enum:
+        - MP4
+        - WEBM
+
+    VideoJob:
       properties:
-        code:
-          description: 'Code snippet to execute.'
-          example: "print('Hello, world!')"
+        id:
+          type: string
+          description: Unique identifier for the video job.
+        object:
+          description: The object type, which is always video.
           type: string
-        files:
-          description: Files to upload to the session. If present, files will be uploaded before executing the given code.
-          items:
-            properties:
-              content:
-                type: string
-              encoding:
-                description: Encoding of the file content. Use `string` for text files such as code, and `base64` for binary files, such as images.
-                enum:
-                  - string
-                  - base64
-                type: string
-              name:
-                type: string
-            required:
-              - name
-              - encoding
-              - content
-            type: object
-          type: array
-        language:
-          default: python
-          description: Programming language for the code to execute. Currently only supports Python, but more will be added.
           enum:
-            - python
-        session_id:
-          description: Identifier of the current session. Used to make follow-up calls. Requests will return an error if the session does not belong to the caller or has expired.
-          example: ses_abcDEF123
-          nullable: false
+            - video
+        model:
           type: string
-
-    ExecuteResponse:
-      title: ExecuteResponse
-      type: object
-      description: 'The result of the execution. If successful, `data` contains the result and `errors` will be null. If unsuccessful, `data` will be null and `errors` will contain the errors.'
-      oneOf:
-        - title: SuccessfulExecution
-          type: object
-          required: [data, errors]
-          properties:
-            errors:
-              type: 'null'
-            data:
-              type: object
-              nullable: false
-              required: [session_id, outputs]
-              properties:
-                outputs:
-                  type: array
-                  items:
-                    discriminator:
-                      propertyName: type
-                    oneOf:
-                      - title: StreamOutput
-                        description: Outputs that were printed to stdout or stderr
-                        type: object
-                        required: [type, data]
-                        properties:
-                          type:
-                            enum:
-                              - stdout
-                              - stderr
-                            type: string
-                          data:
-                            type: string
-                      - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
-                        properties:
-                          data:
-                            type: string
-                          type:
-                            enum:
-                              - error
-                            type: string
-                        required:
-                          - type
-                          - data
-                        title: ErrorOutput
-                      - properties:
-                          data:
-                            properties:
-                              application/geo+json:
-                                type: object
-                                additionalProperties: true
-                              application/javascript:
-                                type: string
-                              application/json:
-                                type: object
-                                additionalProperties: true
-                              application/pdf:
-                                format: byte
-                                type: string
-                              application/vnd.vega.v5+json:
-                                type: object
-                                additionalProperties: true
-                              application/vnd.vegalite.v4+json:
-                                type: object
-                                additionalProperties: true
-                              image/gif:
-                                format: byte
-                                type: string
-                              image/jpeg:
-                                format: byte
-                                type: string
-                              image/png:
-                                format: byte
-                                type: string
-                              image/svg+xml:
-                                type: string
-                              text/html:
-                                type: string
-                              text/latex:
-                                type: string
-                              text/markdown:
-                                type: string
-                              text/plain:
-                                type: string
-                            type: object
-                          type:
-                            enum:
-                              - display_data
-                              - execute_result
-                            type: string
-                        required:
-                          - type
-                          - data
-                        title: DisplayorExecuteOutput
-                    title: InterpreterOutput
-                session_id:
-                  type: string
-                  description: Identifier of the current session. Used to make follow-up calls.
-                  example: ses_abcDEF123
-                  nullable: false
-                status:
-                  type: string
-                  enum:
-                    - success
-                  description: Status of the execution. Currently only supports success.
-        - title: FailedExecution
-          type: object
-          required: [data, errors]
-          properties:
-            data:
-              type: 'null'
-            errors:
-              type: array
-              items:
-                title: Error
-                oneOf:
-                  - type: string
-                  - type: object
-                    additionalProperties: true
-
-    InterpreterOutput:
-      discriminator:
-        propertyName: type
-      oneOf:
-        - description: Outputs that were printed to stdout or stderr
+          description: The video generation model that produced the job.
+        status:
+          $ref: '#/components/schemas/VideoStatus'
+          description: Current lifecycle status of the video job.
+        created_at:
+          type: number
+          description: Unix timestamp (seconds) for when the job was created.
+        completed_at:
+          type: number
+          description: Unix timestamp (seconds) for when the job completed, if finished.
+        size:
+          type: string
+          description: The resolution of the generated video.
+        seconds:
+          type: string
+          description: Duration of the generated clip in seconds.
+        error:
+          description: Error payload that explains why generation failed, if applicable.
+          type: object
           properties:
-            data:
+            code:
               type: string
-            type:
-              enum:
-                - stdout
-                - stderr
+            message:
               type: string
           required:
-            - type
-            - data
-          title: StreamOutput
-        - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
+            - message
+        outputs:
+          description: Available upon completion, the outputs provides the cost charged and the hosted url to access the video
+          type: object
           properties:
-            data:
-              type: string
-            type:
-              enum:
-                - error
-              type: string
-          required:
-            - type
-            - data
-          title: ErrorOutput
-        - properties:
-            data:
-              properties:
-                application/geo+json:
-                  type: object
-                application/javascript:
-                  type: string
-                application/json:
-                  type: object
-                application/pdf:
-                  format: byte
-                  type: string
-                application/vnd.vega.v5+json:
-                  type: object
-                application/vnd.vegalite.v4+json:
-                  type: object
-                image/gif:
-                  format: byte
-                  type: string
-                image/jpeg:
-                  format: byte
-                  type: string
-                image/png:
-                  format: byte
-                  type: string
-                image/svg+xml:
-                  type: string
-                text/html:
-                  type: string
-                text/latex:
-                  type: string
-                text/markdown:
-                  type: string
-                text/plain:
-                  type: string
-              type: object
-            type:
-              enum:
-                - display_data
-                - execute_result
+            cost:
+              type: integer
+              description: The cost of generated video charged to the owners account.
+            video_url:
               type: string
+              description: URL hosting the generated video
           required:
-            - type
-            - data
-          title: DisplayorExecuteOutput
-      title: InterpreterOutput
-
-    Response:
+            - cost
+            - video_url
+      type: object
+      required:
+        - id
+        - model
+        - status
+        - size
+        - seconds
+        - created_at
+      title: Video job
+      description: Structured information describing a generated video job.
+    ContainerStatus:
       properties:
-        errors:
+        finishedAt:
+          description: FinishedAt is the timestamp when the container finished execution
+            (if terminated)
+          type: string
+        message:
+          description: Message provides a human-readable message with details about the
+            container's status
+          type: string
+        name:
+          description: Name is the name of the container
+          type: string
+        reason:
+          description: Reason provides a brief machine-readable reason for the container's
+            current status
+          type: string
+        startedAt:
+          description: StartedAt is the timestamp when the container started execution
+          type: string
+        status:
+          description: Status is the current state of the container (e.g., "Running",
+            "Terminated", "Waiting")
+          type: string
+      type: object
+    CreateDeploymentRequest:
+      properties:
+        args:
+          description: Args overrides the container's CMD. Provide as an array of
+            arguments (e.g., ["python", "app.py"])
           items:
-            oneOf:
-              - type: string
-              - additionalProperties: true
-                type: object
-            title: Error
+            type: string
           type: array
-      title: Response
-      type: object
-
-    SessionListResponse:
-      allOf:
-        - properties:
-            errors:
-              items:
-                oneOf:
-                  - type: string
-                  - additionalProperties: true
-                    type: object
-                title: Error
-              type: array
-          title: Response
-          type: object
-        - properties:
-            data:
-              properties:
-                sessions:
-                  items:
-                    properties:
-                      execute_count:
-                        type: integer
-                      expires_at:
-                        format: date-time
-                        type: string
-                      id:
-                        description: Session Identifier. Used to make follow-up calls.
-                        example: ses_abcDEF123
-                        type: string
-                      last_execute_at:
-                        format: date-time
-                        type: string
-                      started_at:
-                        format: date-time
-                        type: string
-                    required:
-                      - execute_count
-                      - expires_at
-                      - id
-                      - last_execute_at
-                      - started_at
-                    type: object
-                  type: array
-              required:
-                - sessions
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
+            "QueueBacklogPerWorker", "target": "10"} to scale based on queue
+            backlog'
           type: object
-      title: SessionListResponse
-      type: object
-
-    StreamOutput:
-      description: Outputs that were printed to stdout or stderr
-      properties:
-        data:
+        command:
+          description: Command overrides the container's ENTRYPOINT. Provide as an array
+            (e.g., ["/bin/sh", "-c"])
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the number of CPU cores to allocate per container instance
+            (e.g., 0.1 = 100 milli cores)
+          minimum: 0.1
+          type: number
+        description:
+          description: Description is an optional human-readable description of your
+            deployment
           type: string
-        type:
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables to set in
+            the container. Each must have a name and either a value or
+            value_from_secret
+          items:
+            $ref: "#/components/schemas/EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs to allocate per container instance.
+            Defaults to 0 if not specified
+          type: integer
+        gpu_type:
+          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
           enum:
-            - stdout
-            - stderr
+            - h100-80gb
+            - " a100-80gb"
           type: string
+        health_check_path:
+          description: HealthCheckPath is the HTTP path for health checks (e.g.,
+            "/health"). If set, the platform will check this endpoint to
+            determine container health
+          type: string
+        image:
+          description: Image is the container image to deploy from registry.together.ai.
+          type: string
+        max_replicas:
+          description: MaxReplicas is the maximum number of container instances that can
+            be scaled up to. If not set, will be set to MinReplicas
+          type: integer
+        memory:
+          description: Memory is the amount of RAM to allocate per container instance in
+            GiB (e.g., 0.5 = 512MiB)
+          minimum: 0.1
+          type: number
+        min_replicas:
+          description: MinReplicas is the minimum number of container instances to run.
+            Defaults to 1 if not specified
+          type: integer
+        name:
+          description: Name is the unique identifier for your deployment. Must contain
+            only alphanumeric characters, underscores, or hyphens (1-100
+            characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        port:
+          description: Port is the container port your application listens on (e.g., 8080
+            for web servers). Required if your application serves traffic
+          type: integer
+        storage:
+          description: Storage is the amount of ephemeral disk storage to allocate per
+            container instance (e.g., 10 = 10GiB)
+          type: integer
+        termination_grace_period_seconds:
+          description: TerminationGracePeriodSeconds is the time in seconds to wait for
+            graceful shutdown before forcefully terminating the replica
+          type: integer
+        volumes:
+          description: Volumes is a list of volume mounts to attach to the container. Each
+            mount must reference an existing volume by name
+          items:
+            $ref: "#/components/schemas/VolumeMount"
+          type: array
       required:
-        - type
-        - data
-      title: StreamOutput
-
-    CreateBatchRequest:
+        - gpu_type
+        - image
+        - name
       type: object
-      required: [endpoint, input_file_id]
+    CreateSecretRequest:
       properties:
-        endpoint:
+        description:
+          description: Description is an optional human-readable description of the
+            secret's purpose (max 500 characters)
+          maxLength: 500
           type: string
-          description: The endpoint to use for batch processing
-          example: '/v1/chat/completions'
-        input_file_id:
+        name:
+          description: Name is the unique identifier for the secret. Can contain
+            alphanumeric characters, underscores, hyphens, forward slashes, and
+            periods (1-100 characters)
+          maxLength: 100
+          minLength: 1
           type: string
-          description: ID of the uploaded input file containing batch requests
-          example: 'file-abc123def456ghi789'
-        completion_window:
+        project_id:
+          description: ProjectID is ignored - the project is automatically determined from
+            your authentication
           type: string
-          description: Time window for batch completion (optional)
-          example: '24h'
-        priority:
-          type: integer
-          description: Priority for batch processing (optional)
-          example: 1
-        model_id:
+        value:
+          description: Value is the sensitive data to store securely (e.g., API keys,
+            passwords, tokens). This value will be encrypted at rest
+          minLength: 1
           type: string
-          description: 'Model to use for processing batch requests'
-          example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
-    BatchErrorResponse:
+      required:
+        - name
+        - value
       type: object
+    CreateVolumeRequest:
       properties:
-        error:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeContent"
+          description: Content specifies the content configuration for this volume
+        name:
+          description: Name is the unique identifier for the volume within the project
           type: string
-    BatchJobWithWarning:
+        type:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeType"
+          description: Type is the volume type (currently only "readOnly" is supported)
+      required:
+        - content
+        - name
+        - type
       type: object
+    DeploymentListResponse:
       properties:
-        job:
-          $ref: '#/components/schemas/BatchJob'
-        warning:
+        data:
+          description: Data is the array of deployment items
+          items:
+            $ref: "#/components/schemas/DeploymentResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
           type: string
-    BatchJob:
       type: object
+    DeploymentLogs:
       properties:
-        id:
-          type: string
-          format: uuid
-          example: '01234567-8901-2345-6789-012345678901'
-        user_id:
+        lines:
+          items:
+            type: string
+          type: array
+      type: object
+    DeploymentResponseItem:
+      properties:
+        args:
+          description: Args are the arguments passed to the container's command
+          items:
+            type: string
+          type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: Autoscaling contains autoscaling configuration parameters for this
+            deployment
+          type: object
+        command:
+          description: Command is the entrypoint command run in the container
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the amount of CPU resource allocated to each replica in
+            cores (fractional value is allowed)
+          type: number
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this deployment was created
           type: string
-          example: 'user_789xyz012'
-        input_file_id:
+        description:
+          description: Description provides a human-readable explanation of the
+            deployment's purpose or content
           type: string
-          example: 'file-input123abc456def'
-        file_size_bytes:
+        desired_replicas:
+          description: DesiredReplicas is the number of replicas that the orchestrator is
+            targeting
           type: integer
-          format: int64
-          example: 1048576
-          description: 'Size of input file in bytes'
-        status:
-          $ref: '#/components/schemas/BatchJobStatus'
-        job_deadline:
-          type: string
-          format: date-time
-          example: '2024-01-15T15:30:00Z'
-        created_at:
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables set in the
+            container
+          items:
+            $ref: "#/components/schemas/EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs allocated to each replica in this
+            deployment
+          type: integer
+        gpu_type:
+          description: GPUType specifies the type of GPU requested (if any) for this
+            deployment
+          enum:
+            - h100-80gb
+            - " a100-80gb"
           type: string
-          format: date-time
-          example: '2024-01-15T14:30:00Z'
-        endpoint:
+        health_check_path:
+          description: HealthCheckPath is the HTTP path used for health checks of the
+            application
           type: string
-          example: '/v1/chat/completions'
-        progress:
-          type: number
-          format: float64
-          example: 75.0
-          description: 'Completion progress (0.0 to 100)'
-        model_id:
+        id:
+          description: ID is the unique identifier of the deployment
           type: string
-          example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
-          description: 'Model used for processing requests'
-        output_file_id:
+        image:
+          description: Image specifies the container image used for this deployment
           type: string
-          example: 'file-output789xyz012ghi'
-        error_file_id:
+        max_replicas:
+          description: MaxReplicas is the maximum number of replicas to run for this
+            deployment
+          type: integer
+        memory:
+          description: Memory is the amount of memory allocated to each replica in GiB
+            (fractional value is allowed)
+          type: number
+        min_replicas:
+          description: MinReplicas is the minimum number of replicas to run for this
+            deployment
+          type: integer
+        name:
+          description: Name is the name of the deployment
           type: string
-          example: 'file-errors456def789jkl'
-        error:
+        object:
+          description: Object is the type identifier for this response (always "deployment")
           type: string
-        completed_at:
+        port:
+          description: Port is the container port that the deployment exposes
+          type: integer
+        ready_replicas:
+          description: ReadyReplicas is the current number of replicas that are in the
+            Ready state
+          type: integer
+        replica_events:
+          additionalProperties:
+            $ref: "#/components/schemas/ReplicaEvent"
+          description: ReplicaEvents is a mapping of replica names or IDs to their status
+            events
+          type: object
+        status:
+          allOf:
+            - $ref: "#/components/schemas/DeploymentStatus"
+          description: Status represents the overall status of the deployment (e.g.,
+            Updating, Scaling, Ready, Failed)
+          enum:
+            - Updating
+            - Scaling
+            - Ready
+            - Failed
+        storage:
+          description: Storage is the amount of storage (in MB or units as defined by the
+            platform) allocated to each replica
+          type: integer
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this deployment was last
+            updated
           type: string
-          format: date-time
-          example: '2024-01-15T15:45:30Z'
-    BatchJobStatus:
-      type: string
-      enum:
-        - VALIDATING
-        - IN_PROGRESS
-        - COMPLETED
-        - FAILED
-        - EXPIRED
-        - CANCELLED
-      example: 'IN_PROGRESS'
-      description: 'Current status of the batch job'
-
-    EvaluationTypedRequest:
+        volumes:
+          description: Volumes is a list of volume mounts for this deployment
+          items:
+            $ref: "#/components/schemas/VolumeMount"
+          type: array
       type: object
-      required:
-        - type
-        - parameters
+    DeploymentStatus:
+      enum:
+        - Updating
+        - Scaling
+        - Ready
+        - Failed
+      type: string
+      x-enum-varnames:
+        - DeploymentStatusUpdating
+        - DeploymentStatusScaling
+        - DeploymentStatusReady
+        - DeploymentStatusFailed
+    EnvironmentVariable:
       properties:
-        type:
+        name:
+          description: Name is the environment variable name (e.g., "DATABASE_URL"). Must
+            start with a letter or underscore, followed by letters, numbers, or
+            underscores
+          type: string
+        value:
+          description: Value is the plain text value for the environment variable. Use
+            this for non-sensitive values. Either Value or ValueFromSecret must
+            be set, but not both
+          type: string
+        value_from_secret:
+          description: ValueFromSecret references a secret by name or ID to use as the
+            value. Use this for sensitive values like API keys or passwords.
+            Either Value or ValueFromSecret must be set, but not both
           type: string
-          enum: [classify, score, compare]
-          description: The type of evaluation to perform
-          example: 'classify'
-        parameters:
-          oneOf:
-            - $ref: '#/components/schemas/EvaluationClassifyParameters'
-            - $ref: '#/components/schemas/EvaluationScoreParameters'
-            - $ref: '#/components/schemas/EvaluationCompareParameters'
-          description: Type-specific parameters for the evaluation
-
-    EvaluationClassifyParameters:
-      type: object
       required:
-        - judge
-        - labels
-        - pass_labels
-        - input_data_file_path
+        - name
+      type: object
+    ImageListResponse:
       properties:
-        judge:
-          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
-        labels:
-          type: array
+        data:
+          description: Data is the array of image items
           items:
-            type: string
-          minItems: 2
-          description: List of possible classification labels
-          example: ['yes', 'no']
-        pass_labels:
+            $ref: "#/components/schemas/ImageResponseItem"
           type: array
-          items:
-            type: string
-          minItems: 1
-          description: List of labels that are considered passing
-          example: ['yes']
-        model_to_evaluate:
-          $ref: '#/components/schemas/EvaluationModelOrString'
-        input_data_file_path:
+        object:
+          description: Object is the type identifier for this response (always "list")
           type: string
-          description: Data file ID
-          example: 'file-1234-aefd'
-
-    EvaluationScoreParameters:
       type: object
-      required:
-        - judge
-        - min_score
-        - max_score
-        - pass_threshold
-        - input_data_file_path
+    ImageResponseItem:
       properties:
-        judge:
-          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
-        min_score:
-          type: number
-          format: float
-          example: 0.0
-          description: Minimum possible score
-        max_score:
-          type: number
-          format: float
-          example: 10.0
-          description: Maximum possible score
-        pass_threshold:
-          type: number
-          format: float
-          example: 7.0
-          description: Score threshold for passing
-        model_to_evaluate:
-          $ref: '#/components/schemas/EvaluationModelOrString'
-        input_data_file_path:
+        object:
+          description: Object is the type identifier for this response (always "image")
           type: string
-          example: 'file-01234567890123456789'
-          description: Data file ID
-
-    EvaluationCompareParameters:
-      type: object
-      required:
-        - judge
-        - input_data_file_path
-      properties:
-        judge:
-          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
-        model_a:
-          $ref: '#/components/schemas/EvaluationModelOrString'
-        model_b:
-          $ref: '#/components/schemas/EvaluationModelOrString'
-        input_data_file_path:
+        tag:
+          description: Tag is the image tag/version identifier (e.g., "latest", "v1.0.0")
+          type: string
+        url:
+          description: URL is the full registry URL for this image including tag (e.g.,
+            "registry.together.ai/project-id/repository:tag")
           type: string
-          description: Data file name
-
-    EvaluationJudgeModelConfig:
       type: object
-      required:
-        - model
-        - system_template
-        - model_source
+    KubernetesEvent:
       properties:
-        model:
+        action:
+          description: Action is the action taken or reported by this event
           type: string
-          description: Name of the judge model
-          example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
-        system_template:
+        count:
+          description: Count is the number of times this event has occurred
+          type: integer
+        first_seen:
+          description: FirstSeen is the timestamp when this event was first observed
           type: string
-          description: System prompt template for the judge
-          example: 'Imagine you are a helpful assistant'
-        model_source:
+        last_seen:
+          description: LastSeen is the timestamp when this event was last observed
           type: string
-          description: "Source of the judge model."
-          enum: [serverless, dedicated, external]
-        external_api_token:
+        message:
+          description: Message is a human-readable description of the event
           type: string
-          description: "Bearer/API token for external judge models."
-        external_base_url:
+        reason:
+          description: Reason is a brief machine-readable reason for this event (e.g.,
+            "Pulling", "Started", "Failed")
           type: string
-          description: "Base URL for external judge models. Must be OpenAI-compatible base URL."
-
-    EvaluationModelOrString:
-      oneOf:
-        - type: string
-          description: Field name in the input data
-        - $ref: '#/components/schemas/EvaluationModelRequest'
-
-    EvaluationModelRequest:
       type: object
-      required:
-        - model
-        - max_tokens
-        - temperature
-        - system_template
-        - input_template
-        - model_source
+    ListSecretsResponse:
       properties:
-        model:
+        data:
+          description: Data is the array of secret items
+          items:
+            $ref: "#/components/schemas/SecretResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
           type: string
-          description: Name of the model to evaluate
-          example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
-        max_tokens:
-          type: integer
-          minimum: 1
-          description: Maximum number of tokens to generate
-          example: 512
-        temperature:
-          type: number
-          format: float
-          minimum: 0
-          maximum: 2
-          description: Sampling temperature
-          example: 0.7
-        system_template:
+      type: object
+    ListVolumesResponse:
+      properties:
+        data:
+          description: Data is the array of volume items
+          items:
+            $ref: "#/components/schemas/VolumeResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
           type: string
-          description: System prompt template
-          example: 'Imagine you are helpful assistant'
-        input_template:
+      type: object
+    ReplicaEvent:
+      properties:
+        container_status:
+          allOf:
+            - $ref: "#/components/schemas/ContainerStatus"
+          description: ContainerStatus provides detailed status information about the
+            container within this replica
+        events:
+          description: Events is a list of Kubernetes events related to this replica for
+            troubleshooting
+          items:
+            $ref: "#/components/schemas/KubernetesEvent"
+          type: array
+        replica_completed_at:
+          description: ReplicaCompletedAt is the timestamp when the replica finished
+            execution
           type: string
-          description: Input prompt template
-          example: 'Please classify {{prompt}} based on the labels below'
-        model_source:
+        replica_marked_for_termination_at:
+          description: ReplicaMarkedForTerminationAt is the timestamp when the replica was
+            marked for termination
           type: string
-          description: "Source of the model."
-          enum: [serverless, dedicated, external]
-        external_api_token:
+        replica_ready_since:
+          description: ReplicaReadySince is the timestamp when the replica became ready to
+            serve traffic
           type: string
-          description: "Bearer/API token for external models."
-        external_base_url:
+        replica_running_since:
+          description: ReplicaRunningSince is the timestamp when the replica entered the
+            running state
           type: string
-          description: "Base URL for external models. Must be OpenAI-compatible base URL"
-
-    EvaluationResponse:
-      type: object
-      properties:
-        workflow_id:
+        replica_started_at:
+          description: ReplicaStartedAt is the timestamp when the replica was created
           type: string
-          description: The ID of the created evaluation job
-          example: 'eval-1234-1244513'
-        status:
+        replica_status:
+          description: ReplicaStatus is the current status of the replica (e.g.,
+            "Running", "Pending", "Failed")
+          type: string
+        replica_status_message:
+          description: ReplicaStatusMessage provides a human-readable message explaining
+            the replica's status
+          type: string
+        replica_status_reason:
+          description: ReplicaStatusReason provides a brief machine-readable reason for
+            the replica's status
+          type: string
+        scheduled_on_cluster:
+          description: ScheduledOnCluster identifies which cluster this replica is
+            scheduled on
           type: string
-          enum: [pending]
-          description: Initial status of the job
-
-    EvaluationJob:
       type: object
+    RepositoryListResponse:
       properties:
-        workflow_id:
+        data:
+          description: Data is the array of repository items
+          items:
+            $ref: "#/components/schemas/RepositoryResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
           type: string
-          description: The evaluation job ID
-          example: 'eval-1234aedf'
-        type:
+      type: object
+    RepositoryResponseItem:
+      properties:
+        id:
+          description: ID is the unique identifier for this repository (repository name
+            with slashes replaced by "___")
           type: string
-          enum: [classify, score, compare]
-          description: The type of evaluation
-          example: classify
-        owner_id:
+        object:
+          description: Object is the type identifier for this response (always
+            "image-repository")
           type: string
-          description: ID of the job owner (admin only)
-        status:
+        url:
+          description: URL is the full registry URL for this repository (e.g.,
+            "registry.together.ai/project-id/repository-name")
           type: string
-          enum: [pending, queued, running, completed, error, user_error]
-          description: Current status of the job
-          example: completed
-        status_updates:
-          type: array
-          items:
-            $ref: '#/components/schemas/EvaluationJobStatusUpdate'
-          description: History of status updates (admin only)
-        parameters:
-          type: object
-          description: The parameters used for this evaluation
-          additionalProperties: true
+      type: object
+    SecretResponseItem:
+      properties:
         created_at:
+          description: CreatedAt is the ISO8601 timestamp when this secret was created
           type: string
-          format: date-time
-          description: When the job was created
-          example: '2025-07-23T17:10:04.837888Z'
-        updated_at:
+        created_by:
+          description: CreatedBy is the identifier of the user who created this secret
           type: string
-          format: date-time
-          description: When the job was last updated
-          example: '2025-07-23T17:10:04.837888Z'
-        results:
-          oneOf:
-            - $ref: '#/components/schemas/EvaluationClassifyResults'
-            - $ref: '#/components/schemas/EvaluationScoreResults'
-            - $ref: '#/components/schemas/EvaluationCompareResults'
-            - type: object
-              properties:
-                error:
-                  type: string
-          nullable: true
-          description: Results of the evaluation (when completed)
-
-    EvaluationJobStatusUpdate:
-      type: object
-      properties:
-        status:
+        description:
+          description: Description is a human-readable description of the secret's purpose
           type: string
-          description: The status at this update
-          example: pending
-        message:
+        id:
+          description: ID is the unique identifier for this secret
           type: string
-          description: Additional message for this update
-          example: Job is pending evaluation
-        timestamp:
+        last_updated_by:
+          description: LastUpdatedBy is the identifier of the user who last updated this
+            secret
           type: string
-          format: date-time
-          description: When this update occurred
-          example: '2025-07-23T17:10:04.837888Z'
-
-    EvaluationClassifyResults:
-      type: object
-      properties:
-        generation_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed generations.
-          example: 0
-        judge_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed judge generations
-          example: 0
-        invalid_label_count:
-          type: number
-          format: float
-          nullable: true
-          description: Number of invalid labels
-          example: 0
-        result_file_id:
+        name:
+          description: Name is the name/key of the secret
           type: string
-          description: Data File ID
-          example: file-1234-aefd
-        pass_percentage:
-          type: number
-          format: integer
-          nullable: true
-          description: Pecentage of pass labels.
-          example: 10
-        label_counts:
+        object:
+          description: Object is the type identifier for this response (always "secret")
+          type: string
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this secret was last updated
           type: string
-          description: JSON string representing label counts
-          example: '{"yes": 10, "no": 0}'
-
-    EvaluationScoreResults:
       type: object
+    UpdateDeploymentRequest:
       properties:
-        aggregated_scores:
+        args:
+          description: Args overrides the container's CMD. Provide as an array of
+            arguments (e.g., ["python", "app.py"])
+          items:
+            type: string
+          type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
+            "QueueBacklogPerWorker", "target": "10"} to scale based on queue
+            backlog'
           type: object
-          properties:
-            mean_score:
-              type: number
-              format: float
-            std_score:
-              type: number
-              format: float
-            pass_percentage:
-              type: number
-              format: float
-        generation_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed generations.
-          example: 0
-        judge_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed judge generations
-          example: 0
-        invalid_score_count:
-          type: number
-          format: integer
-          description: number of invalid scores generated from model
-        failed_samples:
+        command:
+          description: Command overrides the container's ENTRYPOINT. Provide as an array
+            (e.g., ["/bin/sh", "-c"])
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the number of CPU cores to allocate per container instance
+            (e.g., 0.1 = 100 milli cores)
+          minimum: 0.1
           type: number
-          format: integer
-          description: number of failed samples generated from model
-        result_file_id:
+        description:
+          description: Description is an optional human-readable description of your
+            deployment
           type: string
-          description: Data File ID
-          example: file-1234-aefd
-
-    EvaluationCompareResults:
-      type: object
-      properties:
-        num_samples:
-          type: integer
-          description: Total number of samples compared
-        A_wins:
-          type: integer
-          description: Number of times model A won
-        B_wins:
-          type: integer
-          description: Number of times model B won
-        Ties:
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables to set in
+            the container. This will replace all existing environment variables
+          items:
+            $ref: "#/components/schemas/EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs to allocate per container instance
           type: integer
-          description: Number of ties
-        generation_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed generations.
-          example: 0
-        judge_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed judge generations
-          example: 0
-        result_file_id:
-          type: string
-          description: Data File ID
-
-    AudioFileBinary:
-      type: string
-      format: binary
-      description: Audio file to transcribe
-
-    AudioFileUrl:
-      type: string
-      format: uri
-      description: Public HTTPS URL to audio file
-
-    CreateVideoBody:
-      title: Create video request
-      description: Parameters for creating a new video generation job.
-      type: object
-      required:
-        - model
-      properties:
-        model:
+        gpu_type:
+          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
+          enum:
+            - h100-80gb
+            - " a100-80gb"
           type: string
-          description: The model to be used for the video creation request.
-        prompt:
+        health_check_path:
+          description: HealthCheckPath is the HTTP path for health checks (e.g.,
+            "/health"). Set to empty string to disable health checks
           type: string
-          maxLength: 32000
-          minLength: 1
-          description: Text prompt that describes the video to generate.
-        height:
-          type: integer
-        width:
-          type: integer
-        seconds:
+        image:
+          description: Image is the container image to deploy from registry.together.ai.
           type: string
-          description: Clip duration in seconds.
-        fps:
+        max_replicas:
+          description: MaxReplicas is the maximum number of replicas that can be scaled up
+            to.
           type: integer
-          description: Frames per second. Defaults to 24.
-        steps:
+        memory:
+          description: Memory is the amount of RAM to allocate per container instance in
+            GiB (e.g., 0.5 = 512MiB)
+          minimum: 0.1
+          type: number
+        min_replicas:
+          description: MinReplicas is the minimum number of replicas to run
           type: integer
-          minimum: 10
-          maximum: 50
-          description: The number of denoising steps the model performs during video generation. More steps typically result in higher quality output but require longer processing time.
-        seed:
+        name:
+          description: Name is the new unique identifier for your deployment. Must contain
+            only alphanumeric characters, underscores, or hyphens (1-100
+            characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        port:
+          description: Port is the container port your application listens on (e.g., 8080
+            for web servers)
           type: integer
-          description: Seed to use in initializing the video generation.  Using the same seed allows deterministic video generation.  If not provided a random seed is generated for each request.
-        guidance_scale:
+        storage:
+          description: Storage is the amount of ephemeral disk storage to allocate per
+            container instance (e.g., 10 = 10GiB)
           type: integer
-          description: Controls how closely the video generation follows your prompt. Higher values make the model adhere more strictly to your text description, while lower values allow more creative freedom. guidence_scale affects both visual content and temporal consistency.Recommended range is 6.0-10.0 for most video models. Values above 12 may cause over-guidance artifacts or unnatural motion patterns.
-        output_format:
-          $ref: '#/components/schemas/VideoOutputFormat'
-          description: Specifies the format of the output video. Defaults to MP4.
-        output_quality:
+        termination_grace_period_seconds:
+          description: TerminationGracePeriodSeconds is the time in seconds to wait for
+            graceful shutdown before forcefully terminating the replica
           type: integer
-          description: Compression quality. Defaults to 20.
-        negative_prompt:
-          type: string
-          description: Similar to prompt, but specifies what to avoid instead of what to include
-        frame_images:
-          description: Array of images to guide video generation, similar to keyframes.
-          example:
-            - [
-              {
-                "input_image": "aac49721-1964-481a-ae78-8a4e29b91402",
-                "frame": 0
-              },
-              {
-                "input_image": "c00abf5f-6cdb-4642-a01d-1bfff7bc3cf7",
-                "frame": 48
-              },
-              {
-                "input_image": "3ad204c3-a9de-4963-8a1a-c3911e3afafe",
-                "frame": "last"
-              }
-            ]
-          type: array
+        volumes:
+          description: Volumes is a list of volume mounts to attach to the container. This
+            will replace all existing volumes
           items:
-            $ref: '#/components/schemas/VideoFrameImageInput'
-        reference_images:
-          description: Unlike frame_images which constrain specific timeline positions, reference images guide the general appearance that should appear consistently across the video.
+            $ref: "#/components/schemas/VolumeMount"
           type: array
-          items:
-            type: string
-    VideoStatus:
-      description: Current lifecycle status of the video job.
-      type: string
-      enum:
-        - in_progress
-        - completed
-        - failed
-
-    VideoFrameImageInput:
       type: object
-      required: ['input_image']
+    UpdateSecretRequest:
       properties:
-        input_image:
+        description:
+          description: Description is an optional human-readable description of the
+            secret's purpose (max 500 characters)
+          maxLength: 500
           type: string
-          description: URL path to hosted image that is used for a frame
-        frame:
-          description: |
-            Optional param to specify where to insert the frame. If this is omitted, the following heuristics are applied:
-            - frame_images size is one, frame is first.
-            - If size is two, frames are first and last.
-            - If size is larger, frames are first, last and evenly spaced between.
-          anyOf:
-            - type: number
-            - type: string
-              enum:
-                - first
-                - last
-
-    VideoOutputFormat:
-      type: string
-      enum:
-        - MP4
-        - WEBM
-
-    VideoJob:
+        name:
+          description: Name is the new unique identifier for the secret. Can contain
+            alphanumeric characters, underscores, hyphens, forward slashes, and
+            periods (1-100 characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        project_id:
+          description: ProjectID is ignored - the project is automatically determined from
+            your authentication
+          type: string
+        value:
+          description: Value is the new sensitive data to store securely. Updating this
+            will replace the existing secret value
+          minLength: 1
+          type: string
+      type: object
+    UpdateVolumeRequest:
+      properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeContent"
+          description: Content specifies the new content that will be preloaded to this
+            volume
+        name:
+          description: Name is the new unique identifier for the volume within the project
+          type: string
+        type:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeType"
+          description: Type is the new volume type (currently only "readOnly" is supported)
+      type: object
+    VolumeMount:
+      properties:
+        mount_path:
+          description: MountPath is the path in the container where the volume will be
+            mounted (e.g., "/data")
+          type: string
+        name:
+          description: Name is the name of the volume to mount. Must reference an existing
+            volume by name or ID
+          type: string
+      required:
+        - mount_path
+        - name
+      type: object
+    VolumeResponseItem:
       properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeContent"
+          description: Content specifies the content that will be preloaded to this volume
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this volume was created
+          type: string
         id:
+          description: ID is the unique identifier for this volume
+          type: string
+        name:
+          description: Name is the name of the volume
           type: string
-          description: Unique identifier for the video job.
         object:
-          description: The object type, which is always video.
+          description: Object is the type identifier for this response (always "volume")
           type: string
-          enum:
-            - video
-        model:
+        type:
+          allOf:
+            - $ref: "#/components/schemas/volumes.VolumeType"
+          description: Type is the volume type (e.g., "readOnly")
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this volume was last updated
           type: string
-          description: The video generation model that produced the job.
-        status:
-          $ref: '#/components/schemas/VideoStatus'
-          description: Current lifecycle status of the video job.
-        created_at:
-          type: number
-          description: Unix timestamp (seconds) for when the job was created.
-        completed_at:
-          type: number
-          description: Unix timestamp (seconds) for when the job completed, if finished.
-        size:
+      type: object
+    VolumeContent:
+      properties:
+        source_prefix:
+          description: SourcePrefix is the file path prefix for the content to be
+            preloaded into the volume
+          example: models/
           type: string
-          description: The resolution of the generated video.
-        seconds:
+        type:
+          description: Type is the content type (currently only "files" is supported which
+            allows preloading files uploaded via Files API into the volume)
+          enum:
+            - files
+          example: files
           type: string
-          description: Duration of the generated clip in seconds.
-        error:
-          description: Error payload that explains why generation failed, if applicable.
-          type: object
-          properties:
-            code:
-              type: string
-            message:
-              type: string
-          required:
-            - message
-        outputs:
-          description: Available upon completion, the outputs provides the cost charged and the hosted url to access the video
-          type: object
-          properties:
-            cost:
-              type: integer
-              description: The cost of generated video charged to the owners account.
-            video_url:
-              type: string
-              description: URL hosting the generated video
-          required:
-            - cost
-            - video_url
       type: object
-      required:
-        - id
-        - model
-        - status
-        - size
-        - seconds
-        - created_at
-      title: Video job
-      description: Structured information describing a generated video job.
+    VolumeType:
+      enum:
+        - readOnly
+      type: string
+      x-enum-varnames:
+        - VolumeTypeReadOnly

From 29ef0bd1799714800c2db791e699ce3e95f94c16 Mon Sep 17 00:00:00 2001
From: Blaine Kasten <blainekasten@gmail.com>
Date: Wed, 17 Dec 2025 10:56:39 -0600
Subject: [PATCH 4/4] fix

---
 openapi.yaml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/openapi.yaml b/openapi.yaml
index ae7b6e2..3fd8889 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -9944,14 +9944,14 @@ components:
       properties:
         content:
           allOf:
-            - $ref: "#/components/schemas/volumes.VolumeContent"
+            - $ref: "#/components/schemas/VolumeContent"
           description: Content specifies the content configuration for this volume
         name:
           description: Name is the unique identifier for the volume within the project
           type: string
         type:
           allOf:
-            - $ref: "#/components/schemas/volumes.VolumeType"
+            - $ref: "#/components/schemas/VolumeType"
           description: Type is the volume type (currently only "readOnly" is supported)
       required:
         - content
@@ -10412,7 +10412,7 @@ components:
       properties:
         content:
           allOf:
-            - $ref: "#/components/schemas/volumes.VolumeContent"
+            - $ref: "#/components/schemas/VolumeContent"
           description: Content specifies the new content that will be preloaded to this
             volume
         name:
@@ -10420,7 +10420,7 @@ components:
           type: string
         type:
           allOf:
-            - $ref: "#/components/schemas/volumes.VolumeType"
+            - $ref: "#/components/schemas/VolumeType"
           description: Type is the new volume type (currently only "readOnly" is supported)
       type: object
     VolumeMount:
@@ -10441,7 +10441,7 @@ components:
       properties:
         content:
           allOf:
-            - $ref: "#/components/schemas/volumes.VolumeContent"
+            - $ref: "#/components/schemas/VolumeContent"
           description: Content specifies the content that will be preloaded to this volume
         created_at:
           description: CreatedAt is the ISO8601 timestamp when this volume was created
@@ -10457,7 +10457,7 @@ components:
           type: string
         type:
           allOf:
-            - $ref: "#/components/schemas/volumes.VolumeType"
+            - $ref: "#/components/schemas/VolumeType"
           description: Type is the volume type (e.g., "readOnly")
         updated_at:
           description: UpdatedAt is the ISO8601 timestamp when this volume was last updated