Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
from google.adk.agents import Agent
from google.genai.types import (
GenerateContentConfig,
HarmBlockThreshold,
HarmCategory,
SafetySetting,
)
from google.genai.types import GenerateContentConfig

from .prompts import AGENT_INSTRUCTION

genai_config = GenerateContentConfig(
temperature=0.5
)
genai_config = GenerateContentConfig(temperature=0.5)

root_agent = Agent(
name="example_agent",
model="gemini-live-2.5-flash-preview-native-audio",
description="A helpful AI assistant.",
instruction=AGENT_INSTRUCTION
)
name="example_agent",
model="gemini-live-2.5-flash-preview-native-audio",
description="A helpful AI assistant.",
instruction=AGENT_INSTRUCTION,
)
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@
* *Example:* "I understand you want the answer, but my real job is to help you learn *how* to get it yourself. I promise you'll feel great when you solve it! Let's try this first step..."

8. **Verify Understanding:** Once the student reaches a final answer, ask them to explain their reasoning. "Excellent! Can you tell me how you got that and why you're confident it's correct?" This solidifies their learning.
"""
"""
137 changes: 75 additions & 62 deletions python/agents/realtime-conversational-agent/server/main.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,31 @@
import json
import asyncio
import base64
import json
import logging
import os

from dotenv import load_dotenv

from google.genai.types import (
Part,
Content,
Blob,
)

from google.adk.runners import InMemoryRunner
from example_agent.agent import root_agent
from fastapi import FastAPI, WebSocket
from google.adk.agents import LiveRequestQueue
from google.adk.agents.run_config import RunConfig
from google.adk.runners import InMemoryRunner
from google.genai import types

from fastapi import FastAPI, WebSocket


import logging
from google.genai.types import (
Blob,
Content,
Part,
)
from starlette.websockets import WebSocketDisconnect

from example_agent.agent import root_agent

load_dotenv()


async def start_agent_session(user_id: str):
"""Starts an agent session"""

# Create a Runner
runner = InMemoryRunner(
app_name=os.getenv("APP_NAME"),
agent=root_agent
)
runner = InMemoryRunner(app_name=os.getenv("APP_NAME"), agent=root_agent)

# Create a Session
session = await runner.session_service.create_session(
Expand All @@ -44,7 +36,7 @@ async def start_agent_session(user_id: str):
# Create a LiveRequestQueue for this session
live_request_queue = LiveRequestQueue()

# Setup RunConfig
# Setup RunConfig
run_config = RunConfig(
streaming_mode="bidi",
session_resumption=types.SessionResumptionConfig(transparent=True),
Expand All @@ -56,17 +48,17 @@ async def start_agent_session(user_id: str):
silence_duration_ms=0,
)
),
response_modalities = ["AUDIO"],
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(
voice_name=os.getenv("AGENT_VOICE")
)
),
language_code=os.getenv("AGENT_LANGUAGE")
language_code=os.getenv("AGENT_LANGUAGE"),
),
output_audio_transcription = {},
input_audio_transcription = {},
output_audio_transcription={},
input_audio_transcription={},
)

# Start agent session
Expand All @@ -89,67 +81,84 @@ async def agent_to_client_messaging(websocket: WebSocket, live_events):
"interrupted": event.interrupted or False,
"parts": [],
"input_transcription": None,
"output_transcription": None
"output_transcription": None,
}

if not event.content:
if (message_to_send["turn_complete"] or message_to_send["interrupted"]):
if message_to_send["turn_complete"] or message_to_send["interrupted"]:
await websocket.send_text(json.dumps(message_to_send))
continue
continue

transcription_text = "".join(
part.text for part in event.content.parts if part.text
)

transcription_text = "".join(part.text for part in event.content.parts if part.text)

if hasattr(event.content, "role") and event.content.role == "user":
if transcription_text:
message_to_send["input_transcription"] = {
"text": transcription_text,
"is_final": not event.partial
"is_final": not event.partial,
}

elif hasattr(event.content, "role") and event.content.role == "model":
if transcription_text:
message_to_send["output_transcription"] = {
"text": transcription_text,
"is_final": not event.partial
"is_final": not event.partial,
}
message_to_send["parts"].append({"type": "text", "data": transcription_text})
message_to_send["parts"].append(
{"type": "text", "data": transcription_text}
)

for part in event.content.parts:
if part.inline_data and part.inline_data.mime_type.startswith("audio/pcm"):
if part.inline_data and part.inline_data.mime_type.startswith(
"audio/pcm"
):
audio_data = part.inline_data.data
encoded_audio = base64.b64encode(audio_data).decode("ascii")
message_to_send["parts"].append({"type": "audio/pcm", "data": encoded_audio})

message_to_send["parts"].append(
{"type": "audio/pcm", "data": encoded_audio}
)

elif part.function_call:
message_to_send["parts"].append({
"type": "function_call",
"data": {
"name": part.function_call.name,
"args": part.function_call.args or {}
message_to_send["parts"].append(
{
"type": "function_call",
"data": {
"name": part.function_call.name,
"args": part.function_call.args or {},
},
}
})
)

elif part.function_response:
message_to_send["parts"].append({
"type": "function_response",
"data": {
"name": part.function_response.name,
"response": part.function_response.response or {}
message_to_send["parts"].append(
{
"type": "function_response",
"data": {
"name": part.function_response.name,
"response": part.function_response.response or {},
},
}
})

if (message_to_send["parts"] or
message_to_send["turn_complete"] or
message_to_send["interrupted"] or
message_to_send["input_transcription"] or
message_to_send["output_transcription"]):

)

if (
message_to_send["parts"]
or message_to_send["turn_complete"]
or message_to_send["interrupted"]
or message_to_send["input_transcription"]
or message_to_send["output_transcription"]
):

await websocket.send_text(json.dumps(message_to_send))

except Exception as e:
logging.error(f"Error in agent_to_client_messaging: {e}")

async def client_to_agent_messaging(websocket: WebSocket, live_request_queue: LiveRequestQueue):

async def client_to_agent_messaging(
websocket: WebSocket, live_request_queue: LiveRequestQueue
):
"""Client to agent communication"""
while True:
try:
Expand All @@ -165,13 +174,17 @@ async def client_to_agent_messaging(websocket: WebSocket, live_request_queue: Li
elif mime_type == "audio/pcm":
data = message["data"]
decoded_data = base64.b64decode(data)
live_request_queue.send_realtime(Blob(data=decoded_data, mime_type=mime_type))
live_request_queue.send_realtime(
Blob(data=decoded_data, mime_type=mime_type)
)

elif mime_type == "image/jpeg":
data = message["data"]
decoded_data = base64.b64decode(data)
live_request_queue.send_realtime(Blob(data=decoded_data, mime_type=mime_type))

live_request_queue.send_realtime(
Blob(data=decoded_data, mime_type=mime_type)
)

else:
logging.warning(f"Mime type not supported: {mime_type}")

Expand All @@ -185,6 +198,7 @@ async def client_to_agent_messaging(websocket: WebSocket, live_request_queue: Li

app = FastAPI()


@app.websocket("/ws/{user_id}")
async def websocket_endpoint(websocket: WebSocket, user_id: str):
"""Client websocket endpoint"""
Expand All @@ -211,4 +225,3 @@ async def websocket_endpoint(websocket: WebSocket, user_id: str):
# Close LiveRequestQueue
live_request_queue.close()
print(f"Client #{user_id} disconnected")