diff --git a/docs/builtin-tools.md b/docs/builtin-tools.md index 86f41b5c60..85acb7dd37 100644 --- a/docs/builtin-tools.md +++ b/docs/builtin-tools.md @@ -243,7 +243,7 @@ The [`ImageGenerationTool`][pydantic_ai.builtin_tools.ImageGenerationTool] enabl | Provider | Supported | Notes | |----------|-----------|-------| | OpenAI Responses | ✅ | Full feature support. Only supported by models newer than `gpt-5`. Metadata about the generated image, like the [`revised_prompt`](https://platform.openai.com/docs/guides/tools-image-generation#revised-prompt) sent to the underlying image model, is available on the [`BuiltinToolReturnPart`][pydantic_ai.messages.BuiltinToolReturnPart] that's available via [`ModelResponse.builtin_tool_calls`][pydantic_ai.messages.ModelResponse.builtin_tool_calls]. | -| Google | ✅ | No parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image` and `gemini-3-pro-image-preview`. These models do not support [function tools](tools.md). These models will always have the option of generating images, even if this built-in tool is not explicitly specified. | +| Google | ✅ | Limited parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image` and `gemini-3-pro-image-preview`. These models do not support [function tools](tools.md) and will always have the option of generating images, even if this built-in tool is not explicitly specified. | | Anthropic | ❌ | | | Groq | ❌ | | | Bedrock | ❌ | | @@ -332,6 +332,27 @@ assert isinstance(result.output, BinaryImage) _(This example is complete, it can be run "as is")_ +OpenAI Responses models also respect the `aspect_ratio` parameter. Because the OpenAI API only exposes discrete image sizes, +Pydantic AI maps `'1:1'` -> `1024x1024`, `'2:3'` -> `1024x1536`, and `'3:2'` -> `1536x1024`. Providing any other aspect ratio +results in an error, and if you also set `size` it must match the computed value. + +To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly: + +```py {title="image_generation_google_aspect_ratio.py"} +from pydantic_ai import Agent, BinaryImage, ImageGenerationTool + +agent = Agent( + 'google-gla:gemini-2.5-flash-image', + builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')], + output_type=BinaryImage, +) + +result = agent.run_sync('Generate a wide illustration of an axolotl city skyline.') +assert isinstance(result.output, BinaryImage) +``` + +_(This example is complete, it can be run "as is")_ + For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool]. #### Provider Support @@ -346,6 +367,7 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG | `partial_images` | ✅ | ❌ | | `quality` | ✅ | ❌ | | `size` | ✅ | ❌ | +| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ | ## Web Fetch Tool diff --git a/pydantic_ai_slim/pydantic_ai/builtin_tools.py b/pydantic_ai_slim/pydantic_ai/builtin_tools.py index 1baaf09f3a..912538b715 100644 --- a/pydantic_ai_slim/pydantic_ai/builtin_tools.py +++ b/pydantic_ai_slim/pydantic_ai/builtin_tools.py @@ -22,6 +22,9 @@ _BUILTIN_TOOL_TYPES: dict[str, type[AbstractBuiltinTool]] = {} +ImageAspectRatio = Literal['21:9', '16:9', '4:3', '3:2', '1:1', '9:16', '3:4', '2:3', '5:4', '4:5'] +"""Supported aspect ratios for image generation tools.""" + @dataclass(kw_only=True) class AbstractBuiltinTool(ABC): @@ -316,6 +319,15 @@ class ImageGenerationTool(AbstractBuiltinTool): * OpenAI Responses """ + aspect_ratio: ImageAspectRatio | None = None + """The aspect ratio to use for generated images. + + Supported by: + + * Google image-generation models (Gemini) + * OpenAI Responses (maps '1:1', '2:3', and '3:2' to supported sizes) + """ + kind: str = 'image_generation' """The kind of tool.""" diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index 8aca93c720..bf3ba0206d 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -74,6 +74,7 @@ GoogleSearchDict, GroundingMetadata, HttpOptionsDict, + ImageConfigDict, MediaResolution, Modality, Part, @@ -335,12 +336,16 @@ async def request_stream( response = await self._generate_content(messages, True, model_settings, model_request_parameters) yield await self._process_streamed_response(response, model_request_parameters) # type: ignore - def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[ToolDict] | None: + def _get_tools( + self, model_request_parameters: ModelRequestParameters + ) -> tuple[list[ToolDict] | None, ImageConfigDict | None]: tools: list[ToolDict] = [ ToolDict(function_declarations=[_function_declaration_from_tool(t)]) for t in model_request_parameters.tool_defs.values() ] + image_config: ImageConfigDict | None = None + if model_request_parameters.builtin_tools: if model_request_parameters.function_tools: raise UserError('Google does not support function tools and built-in tools at the same time.') @@ -357,11 +362,13 @@ def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[T raise UserError( "`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead." ) + if tool.aspect_ratio: + image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio) else: # pragma: no cover raise UserError( f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.' ) - return tools or None + return tools or None, image_config def _get_tool_config( self, model_request_parameters: ModelRequestParameters, tools: list[ToolDict] | None @@ -420,7 +427,7 @@ async def _build_content_and_config( model_settings: GoogleModelSettings, model_request_parameters: ModelRequestParameters, ) -> tuple[list[ContentUnionDict], GenerateContentConfigDict]: - tools = self._get_tools(model_request_parameters) + tools, image_config = self._get_tools(model_request_parameters) if model_request_parameters.function_tools and not self.profile.supports_tools: raise UserError('Tools are not supported by this model.') @@ -476,7 +483,9 @@ async def _build_content_and_config( response_mime_type=response_mime_type, response_json_schema=response_schema, response_modalities=modalities, + image_config=image_config, ) + return contents, config def _process_response(self, response: GenerateContentResponse) -> ModelResponse: diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index a37be4f024..67f75b2f72 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -19,7 +19,7 @@ from .._run_context import RunContext from .._thinking_part import split_content_into_text_and_thinking from .._utils import guard_tool_call_id as _guard_tool_call_id, now_utc as _now_utc, number_to_datetime -from ..builtin_tools import CodeExecutionTool, ImageGenerationTool, MCPServerTool, WebSearchTool +from ..builtin_tools import CodeExecutionTool, ImageAspectRatio, ImageGenerationTool, MCPServerTool, WebSearchTool from ..exceptions import UserError from ..messages import ( AudioUrl, @@ -159,6 +159,36 @@ 'failed': 'error', } +_OPENAI_ASPECT_RATIO_TO_SIZE: dict[ImageAspectRatio, Literal['1024x1024', '1024x1536', '1536x1024']] = { + '1:1': '1024x1024', + '2:3': '1024x1536', + '3:2': '1536x1024', +} + + +def _resolve_openai_image_generation_size( + tool: ImageGenerationTool, +) -> Literal['auto', '1024x1024', '1024x1536', '1536x1024']: + """Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided.""" + aspect_ratio = tool.aspect_ratio + if aspect_ratio is None: + return tool.size + + mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio) + if mapped_size is None: + supported = ', '.join(_OPENAI_ASPECT_RATIO_TO_SIZE) + raise UserError( + f'OpenAI image generation only supports `aspect_ratio` values: {supported}. ' + 'Specify one of those values or omit `aspect_ratio`.' + ) + + if tool.size not in ('auto', mapped_size): + raise UserError( + '`ImageGenerationTool` cannot combine `aspect_ratio` with a conflicting `size` when using OpenAI.' + ) + + return mapped_size + class OpenAIChatModelSettings(ModelSettings, total=False): """Settings used for an OpenAI model request.""" @@ -1469,6 +1499,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) - tools.append(mcp_tool) elif isinstance(tool, ImageGenerationTool): # pragma: no branch has_image_generating_tool = True + size = _resolve_openai_image_generation_size(tool) tools.append( responses.tool_param.ImageGeneration( type='image_generation', @@ -1479,7 +1510,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) - output_format=tool.output_format or 'png', partial_images=tool.partial_images, quality=tool.quality, - size=tool.size, + size=size, ) ) else: diff --git a/tests/models/test_google.py b/tests/models/test_google.py index 3ef8cd5dda..7bb9c655af 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -3597,6 +3597,15 @@ async def test_google_image_generation_tool(allow_model_requests: None, google_p await agent.run('Generate an image of an axolotl.') +async def test_google_image_generation_tool_aspect_ratio(google_provider: GoogleProvider) -> None: + model = GoogleModel('gemini-2.5-flash-image', provider=google_provider) + params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')]) + + tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage] + assert tools is None + assert image_config == {'aspect_ratio': '16:9'} + + async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider): model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider) diff --git a/tests/models/test_model_request_parameters.py b/tests/models/test_model_request_parameters.py index 78a8dffa79..a3f378652f 100644 --- a/tests/models/test_model_request_parameters.py +++ b/tests/models/test_model_request_parameters.py @@ -98,6 +98,7 @@ def test_model_request_parameters_are_serializable(): 'partial_images': 0, 'quality': 'auto', 'size': '1024x1024', + 'aspect_ratio': None, }, {'kind': 'memory'}, { diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 47bdeaea3f..17964a2bdf 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1,7 +1,7 @@ import json import re from dataclasses import replace -from typing import Any, cast +from typing import Any, Literal, cast import pytest from inline_snapshot import snapshot @@ -32,17 +32,19 @@ ToolCallPartDelta, ToolReturnPart, UnexpectedModelBehavior, + UserError, UserPromptPart, capture_run_messages, ) from pydantic_ai.agent import Agent -from pydantic_ai.builtin_tools import CodeExecutionTool, MCPServerTool, WebSearchTool +from pydantic_ai.builtin_tools import CodeExecutionTool, ImageAspectRatio, MCPServerTool, WebSearchTool from pydantic_ai.exceptions import ModelHTTPError, ModelRetry from pydantic_ai.messages import ( BuiltinToolCallEvent, # pyright: ignore[reportDeprecated] BuiltinToolResultEvent, # pyright: ignore[reportDeprecated] ) from pydantic_ai.models import ModelRequestParameters +from pydantic_ai.models.openai import _resolve_openai_image_generation_size # pyright: ignore[reportPrivateUsage] from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput from pydantic_ai.profiles.openai import openai_model_profile from pydantic_ai.tools import ToolDefinition @@ -128,6 +130,37 @@ async def test_openai_responses_image_detail_vendor_metadata(allow_model_request assert all(part['detail'] == 'high' for part in image_parts) +@pytest.mark.parametrize( + ('aspect_ratio', 'explicit_size', 'expected_size'), + [ + ('1:1', 'auto', '1024x1024'), + ('2:3', '1024x1536', '1024x1536'), + ('3:2', 'auto', '1536x1024'), + ], +) +def test_openai_responses_image_generation_tool_aspect_ratio_mapping( + aspect_ratio: ImageAspectRatio, + explicit_size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'], + expected_size: Literal['1024x1024', '1024x1536', '1536x1024'], +) -> None: + tool = ImageGenerationTool(aspect_ratio=aspect_ratio, size=explicit_size) + assert _resolve_openai_image_generation_size(tool) == expected_size + + +def test_openai_responses_image_generation_tool_aspect_ratio_invalid() -> None: + tool = ImageGenerationTool(aspect_ratio='16:9') + + with pytest.raises(UserError, match='OpenAI image generation only supports `aspect_ratio` values'): + _resolve_openai_image_generation_size(tool) + + +def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size() -> None: + tool = ImageGenerationTool(aspect_ratio='1:1', size='1536x1024') + + with pytest.raises(UserError, match='cannot combine `aspect_ratio` with a conflicting `size`'): + _resolve_openai_image_generation_size(tool) + + async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str): model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key)) diff --git a/tests/test_examples.py b/tests/test_examples.py index f5cf196b7c..8ed0828250 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -692,6 +692,12 @@ async def model_logic( # noqa: C901 FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='160d47')), ] ) + elif m.content == 'Generate a wide illustration of an axolotl city skyline.': + return ModelResponse( + parts=[ + FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')), + ] + ) elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.': return ModelResponse( parts=[