From 9e01369e647fa6926caf8e8abcf8d1b910dd187d Mon Sep 17 00:00:00 2001 From: Shahin Saadati Date: Mon, 8 Dec 2025 16:24:27 -0800 Subject: [PATCH] Reformatted the realtime-conversational-agent sample by ensuring black, isort, and flake8 pass --- .../server/example_agent/agent.py | 21 +-- .../server/example_agent/prompts.py | 2 +- .../server/main.py | 137 ++++++++++-------- 3 files changed, 83 insertions(+), 77 deletions(-) diff --git a/python/agents/realtime-conversational-agent/server/example_agent/agent.py b/python/agents/realtime-conversational-agent/server/example_agent/agent.py index 49c0bcd6c..a8c36d471 100644 --- a/python/agents/realtime-conversational-agent/server/example_agent/agent.py +++ b/python/agents/realtime-conversational-agent/server/example_agent/agent.py @@ -1,20 +1,13 @@ from google.adk.agents import Agent -from google.genai.types import ( - GenerateContentConfig, - HarmBlockThreshold, - HarmCategory, - SafetySetting, -) +from google.genai.types import GenerateContentConfig from .prompts import AGENT_INSTRUCTION -genai_config = GenerateContentConfig( - temperature=0.5 -) +genai_config = GenerateContentConfig(temperature=0.5) root_agent = Agent( - name="example_agent", - model="gemini-live-2.5-flash-preview-native-audio", - description="A helpful AI assistant.", - instruction=AGENT_INSTRUCTION -) \ No newline at end of file + name="example_agent", + model="gemini-live-2.5-flash-preview-native-audio", + description="A helpful AI assistant.", + instruction=AGENT_INSTRUCTION, +) diff --git a/python/agents/realtime-conversational-agent/server/example_agent/prompts.py b/python/agents/realtime-conversational-agent/server/example_agent/prompts.py index d82641b06..d2725c041 100644 --- a/python/agents/realtime-conversational-agent/server/example_agent/prompts.py +++ b/python/agents/realtime-conversational-agent/server/example_agent/prompts.py @@ -26,4 +26,4 @@ * *Example:* "I understand you want the answer, but my real job is to help you learn *how* to get it yourself. I promise you'll feel great when you solve it! Let's try this first step..." 8. **Verify Understanding:** Once the student reaches a final answer, ask them to explain their reasoning. "Excellent! Can you tell me how you got that and why you're confident it's correct?" This solidifies their learning. -""" \ No newline at end of file +""" diff --git a/python/agents/realtime-conversational-agent/server/main.py b/python/agents/realtime-conversational-agent/server/main.py index 4933811d5..50578d5ec 100644 --- a/python/agents/realtime-conversational-agent/server/main.py +++ b/python/agents/realtime-conversational-agent/server/main.py @@ -1,39 +1,31 @@ -import json import asyncio import base64 +import json +import logging import os from dotenv import load_dotenv - -from google.genai.types import ( - Part, - Content, - Blob, -) - -from google.adk.runners import InMemoryRunner +from example_agent.agent import root_agent +from fastapi import FastAPI, WebSocket from google.adk.agents import LiveRequestQueue from google.adk.agents.run_config import RunConfig +from google.adk.runners import InMemoryRunner from google.genai import types - -from fastapi import FastAPI, WebSocket - - -import logging +from google.genai.types import ( + Blob, + Content, + Part, +) from starlette.websockets import WebSocketDisconnect -from example_agent.agent import root_agent - load_dotenv() + async def start_agent_session(user_id: str): """Starts an agent session""" # Create a Runner - runner = InMemoryRunner( - app_name=os.getenv("APP_NAME"), - agent=root_agent - ) + runner = InMemoryRunner(app_name=os.getenv("APP_NAME"), agent=root_agent) # Create a Session session = await runner.session_service.create_session( @@ -44,7 +36,7 @@ async def start_agent_session(user_id: str): # Create a LiveRequestQueue for this session live_request_queue = LiveRequestQueue() - # Setup RunConfig + # Setup RunConfig run_config = RunConfig( streaming_mode="bidi", session_resumption=types.SessionResumptionConfig(transparent=True), @@ -56,17 +48,17 @@ async def start_agent_session(user_id: str): silence_duration_ms=0, ) ), - response_modalities = ["AUDIO"], + response_modalities=["AUDIO"], speech_config=types.SpeechConfig( voice_config=types.VoiceConfig( prebuilt_voice_config=types.PrebuiltVoiceConfig( voice_name=os.getenv("AGENT_VOICE") ) ), - language_code=os.getenv("AGENT_LANGUAGE") + language_code=os.getenv("AGENT_LANGUAGE"), ), - output_audio_transcription = {}, - input_audio_transcription = {}, + output_audio_transcription={}, + input_audio_transcription={}, ) # Start agent session @@ -89,67 +81,84 @@ async def agent_to_client_messaging(websocket: WebSocket, live_events): "interrupted": event.interrupted or False, "parts": [], "input_transcription": None, - "output_transcription": None + "output_transcription": None, } if not event.content: - if (message_to_send["turn_complete"] or message_to_send["interrupted"]): + if message_to_send["turn_complete"] or message_to_send["interrupted"]: await websocket.send_text(json.dumps(message_to_send)) - continue + continue + + transcription_text = "".join( + part.text for part in event.content.parts if part.text + ) - transcription_text = "".join(part.text for part in event.content.parts if part.text) - if hasattr(event.content, "role") and event.content.role == "user": if transcription_text: message_to_send["input_transcription"] = { "text": transcription_text, - "is_final": not event.partial + "is_final": not event.partial, } - + elif hasattr(event.content, "role") and event.content.role == "model": if transcription_text: message_to_send["output_transcription"] = { "text": transcription_text, - "is_final": not event.partial + "is_final": not event.partial, } - message_to_send["parts"].append({"type": "text", "data": transcription_text}) + message_to_send["parts"].append( + {"type": "text", "data": transcription_text} + ) for part in event.content.parts: - if part.inline_data and part.inline_data.mime_type.startswith("audio/pcm"): + if part.inline_data and part.inline_data.mime_type.startswith( + "audio/pcm" + ): audio_data = part.inline_data.data encoded_audio = base64.b64encode(audio_data).decode("ascii") - message_to_send["parts"].append({"type": "audio/pcm", "data": encoded_audio}) - + message_to_send["parts"].append( + {"type": "audio/pcm", "data": encoded_audio} + ) + elif part.function_call: - message_to_send["parts"].append({ - "type": "function_call", - "data": { - "name": part.function_call.name, - "args": part.function_call.args or {} + message_to_send["parts"].append( + { + "type": "function_call", + "data": { + "name": part.function_call.name, + "args": part.function_call.args or {}, + }, } - }) - + ) + elif part.function_response: - message_to_send["parts"].append({ - "type": "function_response", - "data": { - "name": part.function_response.name, - "response": part.function_response.response or {} + message_to_send["parts"].append( + { + "type": "function_response", + "data": { + "name": part.function_response.name, + "response": part.function_response.response or {}, + }, } - }) - - if (message_to_send["parts"] or - message_to_send["turn_complete"] or - message_to_send["interrupted"] or - message_to_send["input_transcription"] or - message_to_send["output_transcription"]): - + ) + + if ( + message_to_send["parts"] + or message_to_send["turn_complete"] + or message_to_send["interrupted"] + or message_to_send["input_transcription"] + or message_to_send["output_transcription"] + ): + await websocket.send_text(json.dumps(message_to_send)) except Exception as e: logging.error(f"Error in agent_to_client_messaging: {e}") -async def client_to_agent_messaging(websocket: WebSocket, live_request_queue: LiveRequestQueue): + +async def client_to_agent_messaging( + websocket: WebSocket, live_request_queue: LiveRequestQueue +): """Client to agent communication""" while True: try: @@ -165,13 +174,17 @@ async def client_to_agent_messaging(websocket: WebSocket, live_request_queue: Li elif mime_type == "audio/pcm": data = message["data"] decoded_data = base64.b64decode(data) - live_request_queue.send_realtime(Blob(data=decoded_data, mime_type=mime_type)) + live_request_queue.send_realtime( + Blob(data=decoded_data, mime_type=mime_type) + ) elif mime_type == "image/jpeg": data = message["data"] decoded_data = base64.b64decode(data) - live_request_queue.send_realtime(Blob(data=decoded_data, mime_type=mime_type)) - + live_request_queue.send_realtime( + Blob(data=decoded_data, mime_type=mime_type) + ) + else: logging.warning(f"Mime type not supported: {mime_type}") @@ -185,6 +198,7 @@ async def client_to_agent_messaging(websocket: WebSocket, live_request_queue: Li app = FastAPI() + @app.websocket("/ws/{user_id}") async def websocket_endpoint(websocket: WebSocket, user_id: str): """Client websocket endpoint""" @@ -211,4 +225,3 @@ async def websocket_endpoint(websocket: WebSocket, user_id: str): # Close LiveRequestQueue live_request_queue.close() print(f"Client #{user_id} disconnected") -