From 292f2cc5220d18f69cd9e6f2b58118aa1b39cd70 Mon Sep 17 00:00:00 2001
From: Alexander Ivkin
Date: Tue, 4 Mar 2025 17:04:58 +0100
Subject: [PATCH 1/2] Fixed parsing JSON for messages with tools
---
Sources/Models/ConversationUpdate.swift | 105 +++++++++++-
Tests/MessageParsingTests.swift | 210 ++++++++++++++++++++++++
2 files changed, 314 insertions(+), 1 deletion(-)
create mode 100644 Tests/MessageParsingTests.swift
diff --git a/Sources/Models/ConversationUpdate.swift b/Sources/Models/ConversationUpdate.swift
index 917a3e8..c592ef1 100644
--- a/Sources/Models/ConversationUpdate.swift
+++ b/Sources/Models/ConversationUpdate.swift
@@ -5,12 +5,115 @@ public struct Message: Codable {
case user = "user"
case assistant = "assistant"
case system = "system"
+ case tool = "tool"
+ case toolCalls = "tool_calls"
+ case bot = "bot"
}
public let role: Role
- public let content: String
+ public let content: String?
+ public let tool_calls: [ToolCall]?
+ public let tool_call_id: String?
+
+ enum CodingKeys: String, CodingKey {
+ case role
+ case content
+ case tool_calls
+ case tool_call_id
+ }
+}
+
+public struct TimestampedMessage: Codable {
+ public enum Role: String, Codable {
+ case user = "user"
+ case bot = "bot"
+ case system = "system"
+ case tool = "tool"
+ case toolCalls = "tool_calls"
+ }
+
+ public let role: Role
+ public let message: String?
+ public let time: Double
+ public let endTime: Double?
+ public let secondsFromStart: Double?
+ public let duration: Double?
+ public let toolCalls: [ToolCall]?
+
+ enum CodingKeys: String, CodingKey {
+ case role
+ case message
+ case time
+ case endTime
+ case secondsFromStart
+ case duration
+ case toolCalls
+ }
+
+ public init(from decoder: Decoder) throws {
+ let container = try decoder.container(keyedBy: CodingKeys.self)
+ role = try container.decode(Role.self, forKey: .role)
+ message = try container.decodeIfPresent(String.self, forKey: .message)
+ toolCalls = try container.decodeIfPresent([ToolCall].self, forKey: .toolCalls)
+
+ if let timeStr = try? container.decodeIfPresent(String.self, forKey: .time),
+ let timeDouble = Double(timeStr) {
+ time = timeDouble
+ } else {
+ time = try container.decode(Double.self, forKey: .time)
+ }
+
+ if let endTimeStr = try? container.decodeIfPresent(String.self, forKey: .endTime),
+ let endTimeDouble = Double(endTimeStr) {
+ endTime = endTimeDouble
+ } else {
+ endTime = try container.decodeIfPresent(Double.self, forKey: .endTime)
+ }
+
+ secondsFromStart = try container.decodeIfPresent(Double.self, forKey: .secondsFromStart)
+
+ if let durationStr = try? container.decodeIfPresent(String.self, forKey: .duration),
+ let durationDouble = Double(durationStr) {
+ duration = durationDouble
+ } else {
+ duration = try container.decodeIfPresent(Double.self, forKey: .duration)
+ }
+ }
+}
+
+public struct ToolCall: Codable {
+ public let type: String?
+ public let id: String?
+ public let function: ToolFunction?
+}
+
+public struct ToolFunction: Codable {
+ public let name: String
+ public let arguments: String
}
public struct ConversationUpdate: Codable {
public let conversation: [Message]
+ public let messages: [TimestampedMessage]?
+ public let messagesOpenAIFormatted: [String]?
+
+ enum CodingKeys: String, CodingKey {
+ case conversation
+ case messages
+ case messagesOpenAIFormatted
+ }
+
+ public init(from decoder: Decoder) throws {
+ let container = try decoder.container(keyedBy: CodingKeys.self)
+ conversation = try container.decode([Message].self, forKey: .conversation)
+ messages = try container.decodeIfPresent([TimestampedMessage].self, forKey: .messages)
+ messagesOpenAIFormatted = try container.decodeIfPresent([String].self, forKey: .messagesOpenAIFormatted)
+ }
+
+ public func encode(to encoder: Encoder) throws {
+ var container = encoder.container(keyedBy: CodingKeys.self)
+ try container.encode(conversation, forKey: .conversation)
+ try container.encodeIfPresent(messages, forKey: .messages)
+ try container.encodeIfPresent(messagesOpenAIFormatted, forKey: .messagesOpenAIFormatted)
+ }
}
diff --git a/Tests/MessageParsingTests.swift b/Tests/MessageParsingTests.swift
new file mode 100644
index 0000000..da6d039
--- /dev/null
+++ b/Tests/MessageParsingTests.swift
@@ -0,0 +1,210 @@
+import XCTest
+import Combine
+@testable import Vapi
+
+final class MessageParsingTests: XCTestCase {
+
+ func testParseMessagesWithToolCallRole() throws {
+ let jsonString = """
+ {
+ "conversation": [
+ {
+ "role": "system",
+ "content": "System message"
+ },
+ {
+ "role": "assistant",
+ "content": "Assistant message",
+ "tool_calls": [
+ {
+ "type": "function",
+ "id": "tool123",
+ "function": {
+ "name": "start_exercise",
+ "arguments": "{}"
+ }
+ }
+ ]
+ },
+ {
+ "role": "tool",
+ "tool_call_id": "tool123",
+ "content": "Tool Result"
+ },
+ {
+ "role": "tool_calls",
+ "content": null,
+ "tool_calls": [
+ {
+ "type": "function",
+ "id": "tool456",
+ "function": {
+ "name": "another_function",
+ "arguments": "{}"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ """
+
+ let jsonData = jsonString.data(using: .utf8)!
+ let decoder = JSONDecoder()
+
+ // This should not throw with our updated Role enum
+ let conversationUpdate = try decoder.decode(ConversationUpdate.self, from: jsonData)
+
+ XCTAssertEqual(conversationUpdate.conversation.count, 4)
+ XCTAssertEqual(conversationUpdate.conversation[0].role, Message.Role.system)
+ XCTAssertEqual(conversationUpdate.conversation[1].role, Message.Role.assistant)
+ XCTAssertEqual(conversationUpdate.conversation[2].role, Message.Role.tool)
+ XCTAssertEqual(conversationUpdate.conversation[3].role, Message.Role.toolCalls)
+
+ // Verify tool calls are properly parsed
+ XCTAssertNotNil(conversationUpdate.conversation[1].tool_calls)
+ XCTAssertEqual(conversationUpdate.conversation[1].tool_calls?.count, 1)
+ XCTAssertEqual(conversationUpdate.conversation[1].tool_calls?[0].function?.name, "start_exercise")
+
+ // Verify tool_call_id is properly parsed
+ XCTAssertEqual(conversationUpdate.conversation[2].tool_call_id, "tool123")
+ }
+
+ func testToolCallsInMessageAreExtractedAsFunctionCalls() throws {
+ let appMessageString = """
+ {
+ "type": "conversation-update",
+ "conversation": [
+ {
+ "role": "system",
+ "content": "System message"
+ },
+ {
+ "role": "tool_calls",
+ "tool_calls": [
+ {
+ "type": "function",
+ "id": "tool456",
+ "function": {
+ "name": "another_function",
+ "arguments": "{}"
+ }
+ }
+ ]
+ }
+ ]
+ }
+ """
+
+ let appMessageData = appMessageString.data(using: .utf8)!
+
+ let decoder = JSONDecoder()
+ let appMessage = try decoder.decode(AppMessage.self, from: appMessageData)
+
+ XCTAssertEqual(appMessage.type, AppMessage.MessageType.conversationUpdate)
+
+ let conversationUpdate = try decoder.decode(ConversationUpdate.self, from: appMessageData)
+ XCTAssertEqual(conversationUpdate.conversation.count, 2)
+
+ let lastMessage = conversationUpdate.conversation.last!
+ XCTAssertEqual(lastMessage.role, Message.Role.toolCalls)
+ XCTAssertNotNil(lastMessage.tool_calls)
+ XCTAssertEqual(lastMessage.tool_calls!.count, 1)
+
+ let toolCall = lastMessage.tool_calls![0]
+ XCTAssertEqual(toolCall.function?.name, "another_function")
+ XCTAssertEqual(toolCall.function?.arguments, "{}")
+ }
+
+ func testParseTimestampedMessagesArray() throws {
+ let appMessageString = """
+ {
+ "type": "conversation-update",
+ "conversation": [
+ {
+ "role": "system",
+ "content": "System message"
+ }
+ ],
+ "messages": [
+ {
+ "role": "system",
+ "message": "System message",
+ "time": 1741093883580,
+ "secondsFromStart": 0
+ },
+ {
+ "role": "bot",
+ "message": "Bot message",
+ "time": 1741093885838,
+ "endTime": 1741093886618,
+ "secondsFromStart": 1.8399999,
+ "duration": 780,
+ "source": ""
+ },
+ {
+ "role": "user",
+ "message": "User message",
+ "time": 1741093897088,
+ "endTime": 1741093898238,
+ "secondsFromStart": 13.09,
+ "duration": 1150
+ },
+ {
+ "toolCalls": [
+ {
+ "type": "function",
+ "id": "tool123",
+ "function": {
+ "name": "test_function",
+ "arguments": "{}"
+ }
+ }
+ ],
+ "role": "tool_calls",
+ "message": "",
+ "time": 1741093903823,
+ "secondsFromStart": 15.179
+ }
+ ],
+ "messagesOpenAIFormatted": []
+ }
+ """
+
+ let appMessageData = appMessageString.data(using: .utf8)!
+
+ let decoder = JSONDecoder()
+ let conversationUpdate = try decoder.decode(ConversationUpdate.self, from: appMessageData)
+
+ // Check that messages array was parsed correctly
+ XCTAssertNotNil(conversationUpdate.messages)
+ XCTAssertEqual(conversationUpdate.messages?.count, 4)
+
+ // Verify the first message (system)
+ XCTAssertEqual(conversationUpdate.messages?[0].role, TimestampedMessage.Role.system)
+ XCTAssertEqual(conversationUpdate.messages?[0].message, "System message")
+ XCTAssertEqual(conversationUpdate.messages?[0].time, 1741093883580.0)
+
+ // Verify the bot message
+ XCTAssertEqual(conversationUpdate.messages?[1].role, TimestampedMessage.Role.bot)
+ XCTAssertEqual(conversationUpdate.messages?[1].message, "Bot message")
+ XCTAssertEqual(conversationUpdate.messages?[1].time, 1741093885838.0)
+ XCTAssertEqual(conversationUpdate.messages?[1].endTime, 1741093886618.0)
+ XCTAssertEqual(conversationUpdate.messages?[1].duration, 780.0)
+
+ // Verify the user message
+ XCTAssertEqual(conversationUpdate.messages?[2].role, TimestampedMessage.Role.user)
+ XCTAssertEqual(conversationUpdate.messages?[2].message, "User message")
+
+ // Verify the tool_calls message
+ XCTAssertEqual(conversationUpdate.messages?[3].role, TimestampedMessage.Role.toolCalls)
+ XCTAssertNotNil(conversationUpdate.messages?[3].toolCalls)
+ XCTAssertEqual(conversationUpdate.messages?[3].toolCalls?.count, 1)
+
+ let toolCall = conversationUpdate.messages?[3].toolCalls?[0]
+ XCTAssertEqual(toolCall?.function?.name, "test_function")
+ XCTAssertEqual(toolCall?.function?.arguments, "{}")
+ XCTAssertEqual(toolCall?.id, "tool123")
+ }
+}
+
From 4c140ebe4a2b8c3a6b4bccdd42a745438ddcf6c6 Mon Sep 17 00:00:00 2001
From: Alexander Ivkin
Date: Tue, 4 Mar 2025 17:05:26 +0100
Subject: [PATCH 2/2] Workaround: call functions on conversation updates with
tool_calls
---
Sources/Vapi.swift | 33 +++++++++++++++++++++++++++++++--
1 file changed, 31 insertions(+), 2 deletions(-)
diff --git a/Sources/Vapi.swift b/Sources/Vapi.swift
index 69bdaa3..b0e74f8 100644
--- a/Sources/Vapi.swift
+++ b/Sources/Vapi.swift
@@ -61,7 +61,9 @@ public final class Vapi: CallClientDelegate {
private let networkManager = NetworkManager()
private var call: CallClient?
-
+
+ private var lastMessageTimestamp: Double = 0
+
// MARK: - Computed Properties
private var publicKey: String {
@@ -479,6 +481,33 @@ public final class Vapi: CallClientDelegate {
case .conversationUpdate:
let conv = try decoder.decode(ConversationUpdate.self, from: unescapedData)
event = Event.conversationUpdate(conv)
+
+ if let messages = conv.messages, !messages.isEmpty {
+ let newMessages = messages.filter { $0.time > self.lastMessageTimestamp }
+
+ if !newMessages.isEmpty {
+ if let latestTime = newMessages.map({ $0.time }).max() {
+ self.lastMessageTimestamp = latestTime
+ }
+
+ for message in newMessages {
+ if message.role == .toolCalls, let toolCalls = message.toolCalls, !toolCalls.isEmpty {
+ for toolCall in toolCalls {
+ if let functionName = toolCall.function?.name,
+ let functionArgs = toolCall.function?.arguments {
+
+ if let argsData = functionArgs.data(using: .utf8),
+ let parameters = try? JSONSerialization.jsonObject(with: argsData, options: []) as? [String: Any] {
+
+ let functionCall = FunctionCall(name: functionName, parameters: parameters)
+ eventSubject.send(Event.functionCall(functionCall))
+ }
+ }
+ }
+ }
+ }
+ }
+ }
case .statusUpdate:
let statusUpdate = try decoder.decode(StatusUpdate.self, from: unescapedData)
event = Event.statusUpdate(statusUpdate)
@@ -495,7 +524,7 @@ public final class Vapi: CallClientDelegate {
eventSubject.send(event)
} catch {
let messageText = String(data: jsonData, encoding: .utf8)
- print("Error parsing app message \"\(messageText ?? "")\": \(error.localizedDescription)")
+ print("Error parsing app message \"\(messageText ?? "")\": \(String(describing: error))")
}
}
}