Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion src/agents/_run_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1739,6 +1739,7 @@ async def execute(
shell_output_payload: list[dict[str, Any]] | None = None
provider_meta: dict[str, Any] | None = None
max_output_length: int | None = None
requested_max_output_length = shell_call.action.max_output_length

try:
executor_result = call.shell_tool.executor(request)
Expand All @@ -1748,12 +1749,23 @@ async def execute(

if isinstance(result, ShellResult):
normalized = [_normalize_shell_output(entry) for entry in result.output]
max_output_length = (
result.max_output_length
if result.max_output_length is not None
else requested_max_output_length
)
Comment on lines +1752 to +1756

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve action max_output_length=0

If a tool call sets max_output_length to 0 (e.g., to suppress output), _coerce_shell_call currently uses ... or ... to read the field, so 0 is coerced to None. That means requested_max_output_length is None here and this branch won’t truncate at all, even though the caller explicitly asked for zero-length output. This is a regression for the new enforcement path: a model-supplied max_output_length: 0 still leaks full output. Consider preserving zero via an explicit is not None check when parsing the action payload.

Useful? React with 👍 / 👎.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gustavz This look like an edge case, but can resolve it as well? Also, running codex -m gpt-5.2-codex + /review before pushing again, and resolving reviews if there is anything further would be appreciated

if max_output_length is not None:
normalized = _truncate_shell_outputs(normalized, max_output_length)
output_text = _render_shell_outputs(normalized)
if max_output_length is not None:
output_text = output_text[:max_output_length]
shell_output_payload = [_serialize_shell_output(entry) for entry in normalized]
provider_meta = dict(result.provider_data or {})
max_output_length = result.max_output_length
else:
output_text = str(result)
if requested_max_output_length is not None:
max_output_length = requested_max_output_length
output_text = output_text[:max_output_length]
except Exception as exc:
status = "failed"
output_text = _format_shell_error(exc)
Expand Down Expand Up @@ -2029,6 +2041,45 @@ def _render_shell_outputs(outputs: Sequence[ShellCommandOutput]) -> str:
return "\n\n".join(rendered_chunks)


def _truncate_shell_outputs(
outputs: Sequence[ShellCommandOutput], max_length: int
) -> list[ShellCommandOutput]:
if max_length <= 0:
return [
ShellCommandOutput(
stdout="",
stderr="",
outcome=output.outcome,
command=output.command,
provider_data=output.provider_data,
)
for output in outputs
]

remaining = max_length
truncated: list[ShellCommandOutput] = []
for output in outputs:
stdout = ""
stderr = ""
if remaining > 0 and output.stdout:
stdout = output.stdout[:remaining]
remaining -= len(stdout)
if remaining > 0 and output.stderr:
stderr = output.stderr[:remaining]
remaining -= len(stderr)
truncated.append(
ShellCommandOutput(
stdout=stdout,
stderr=stderr,
outcome=output.outcome,
command=output.command,
provider_data=output.provider_data,
)
)

return truncated


def _format_shell_error(error: Exception | BaseException | Any) -> str:
if isinstance(error, Exception):
message = str(error)
Expand Down
93 changes: 93 additions & 0 deletions tests/test_shell_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,96 @@ def __call__(self, request):
assert "status" not in payload_dict
assert "shell_output" not in payload_dict
assert "provider_data" not in payload_dict


@pytest.mark.asyncio
async def test_shell_tool_output_respects_max_output_length() -> None:
shell_tool = ShellTool(
executor=lambda request: ShellResult(
output=[
ShellCommandOutput(
stdout="0123456789",
stderr="abcdef",
outcome=ShellCallOutcome(type="exit", exit_code=0),
)
],
)
)

tool_call = {
"type": "shell_call",
"id": "shell_call",
"call_id": "call_shell",
"status": "completed",
"action": {
"commands": ["echo hi"],
"timeout_ms": 1000,
"max_output_length": 6,
},
}

tool_run = ToolRunShellCall(tool_call=tool_call, shell_tool=shell_tool)
agent = Agent(name="shell-agent", tools=[shell_tool])
context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)

result = await ShellAction.execute(
agent=agent,
call=tool_run,
hooks=RunHooks[Any](),
context_wrapper=context_wrapper,
config=RunConfig(),
)

assert isinstance(result, ToolCallOutputItem)
assert result.output == "012345"
raw_item = cast(dict[str, Any], result.raw_item)
assert raw_item["max_output_length"] == 6
assert raw_item["output"][0]["stdout"] == "012345"
assert raw_item["output"][0]["stderr"] == ""


@pytest.mark.asyncio
async def test_shell_tool_executor_can_override_max_output_length_to_zero() -> None:
shell_tool = ShellTool(
executor=lambda request: ShellResult(
output=[
ShellCommandOutput(
stdout="0123456789",
stderr="abcdef",
outcome=ShellCallOutcome(type="exit", exit_code=0),
)
],
max_output_length=0,
)
)

tool_call = {
"type": "shell_call",
"id": "shell_call",
"call_id": "call_shell",
"status": "completed",
"action": {
"commands": ["echo hi"],
"timeout_ms": 1000,
"max_output_length": 6,
},
}

tool_run = ToolRunShellCall(tool_call=tool_call, shell_tool=shell_tool)
agent = Agent(name="shell-agent", tools=[shell_tool])
context_wrapper: RunContextWrapper[Any] = RunContextWrapper(context=None)

result = await ShellAction.execute(
agent=agent,
call=tool_run,
hooks=RunHooks[Any](),
context_wrapper=context_wrapper,
config=RunConfig(),
)

assert isinstance(result, ToolCallOutputItem)
assert result.output == ""
raw_item = cast(dict[str, Any], result.raw_item)
assert raw_item["max_output_length"] == 0
assert raw_item["output"][0]["stdout"] == ""
assert raw_item["output"][0]["stderr"] == ""