diff --git a/.codex/skills/examples-auto-run/SKILL.md b/.codex/skills/examples-auto-run/SKILL.md new file mode 100644 index 0000000000..cca492b6a8 --- /dev/null +++ b/.codex/skills/examples-auto-run/SKILL.md @@ -0,0 +1,77 @@ +--- +name: examples-auto-run +description: Run python examples in auto mode with logging, rerun helpers, and background control. +--- + +# examples-auto-run + +## What it does + +- Runs `uv run examples/run_examples.py` with: + - `EXAMPLES_INTERACTIVE_MODE=auto` (auto-input/auto-approve). + - Per-example logs under `.tmp/examples-start-logs/`. + - Main summary log path passed via `--main-log` (also under `.tmp/examples-start-logs/`). + - Generates a rerun list of failures at `.tmp/examples-rerun.txt` when `--write-rerun` is set. +- Provides start/stop/status/logs/tail/collect/rerun helpers via `run.sh`. +- Background option keeps the process running with a pidfile; `stop` cleans it up. + +## Usage + +```bash +# Start (auto mode; interactive included by default) +.codex/skills/examples-auto-run/scripts/run.sh start [extra args to run_examples.py] +# Examples: +.codex/skills/examples-auto-run/scripts/run.sh start --filter basic +.codex/skills/examples-auto-run/scripts/run.sh start --include-server --include-audio + +# Check status +.codex/skills/examples-auto-run/scripts/run.sh status + +# Stop running job +.codex/skills/examples-auto-run/scripts/run.sh stop + +# List logs +.codex/skills/examples-auto-run/scripts/run.sh logs + +# Tail latest log (or specify one) +.codex/skills/examples-auto-run/scripts/run.sh tail +.codex/skills/examples-auto-run/scripts/run.sh tail main_20260113-123000.log + +# Collect rerun list from a main log (defaults to latest main_*.log) +.codex/skills/examples-auto-run/scripts/run.sh collect + +# Rerun only failed entries from rerun file (auto mode) +.codex/skills/examples-auto-run/scripts/run.sh rerun +``` + +## Defaults (overridable via env) + +- `EXAMPLES_INTERACTIVE_MODE=auto` +- `EXAMPLES_INCLUDE_INTERACTIVE=1` +- `EXAMPLES_INCLUDE_SERVER=0` +- `EXAMPLES_INCLUDE_AUDIO=0` +- `EXAMPLES_INCLUDE_EXTERNAL=0` +- Auto-approvals in auto mode: `APPLY_PATCH_AUTO_APPROVE=1`, `SHELL_AUTO_APPROVE=1`, `AUTO_APPROVE_MCP=1` + +## Log locations + +- Main logs: `.tmp/examples-start-logs/main_*.log` +- Per-example logs (from `run_examples.py`): `.tmp/examples-start-logs/.log` +- Rerun list: `.tmp/examples-rerun.txt` +- Stdout logs: `.tmp/examples-start-logs/stdout_*.log` + +## Notes + +- The runner delegates to `uv run examples/run_examples.py`, which already writes per-example logs and supports `--collect`, `--rerun-file`, and `--print-auto-skip`. +- `start` uses `--write-rerun` so failures are captured automatically. +- If `.tmp/examples-rerun.txt` exists and is non-empty, invoking the skill with no args runs `rerun` by default. + +## Behavioral validation (Codex/LLM responsibility) + +The runner does not perform any automated behavioral validation. After every foreground `start` or `rerun`, **Codex must manually validate** all exit-0 entries: + +1. Read the example source (and comments) to infer intended flow, tools used, and expected key outputs. +2. Open the matching per-example log under `.tmp/examples-start-logs/`. +3. Confirm the intended actions/results occurred; flag omissions or divergences. +4. Do this for **all passed examples**, not just a sample. +5. Report immediately after the run with concise citations to the exact log lines that justify the validation. diff --git a/.codex/skills/examples-auto-run/scripts/run.sh b/.codex/skills/examples-auto-run/scripts/run.sh new file mode 100755 index 0000000000..74258f8cac --- /dev/null +++ b/.codex/skills/examples-auto-run/scripts/run.sh @@ -0,0 +1,215 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)" +PID_FILE="$ROOT/.tmp/examples-auto-run.pid" +LOG_DIR="$ROOT/.tmp/examples-start-logs" +RERUN_FILE="$ROOT/.tmp/examples-rerun.txt" + +ensure_dirs() { + mkdir -p "$LOG_DIR" "$ROOT/.tmp" +} + +is_running() { + local pid="$1" + [[ -n "$pid" ]] && ps -p "$pid" >/dev/null 2>&1 +} + +cmd_start() { + ensure_dirs + local background=0 + if [[ "${1:-}" == "--background" ]]; then + background=1 + shift + fi + + local ts main_log stdout_log + ts="$(date +%Y%m%d-%H%M%S)" + main_log="$LOG_DIR/main_${ts}.log" + stdout_log="$LOG_DIR/stdout_${ts}.log" + + local run_cmd=( + uv run examples/run_examples.py + --auto-mode + --write-rerun + --main-log "$main_log" + --logs-dir "$LOG_DIR" + ) + + if [[ "$background" -eq 1 ]]; then + if [[ -f "$PID_FILE" ]]; then + local pid + pid="$(cat "$PID_FILE" 2>/dev/null || true)" + if is_running "$pid"; then + echo "examples/run_examples.py already running (pid=$pid)." + exit 1 + fi + fi + ( + trap '' HUP + export EXAMPLES_INTERACTIVE_MODE="${EXAMPLES_INTERACTIVE_MODE:-auto}" + export APPLY_PATCH_AUTO_APPROVE="${APPLY_PATCH_AUTO_APPROVE:-1}" + export SHELL_AUTO_APPROVE="${SHELL_AUTO_APPROVE:-1}" + export AUTO_APPROVE_MCP="${AUTO_APPROVE_MCP:-1}" + export EXAMPLES_INCLUDE_INTERACTIVE="${EXAMPLES_INCLUDE_INTERACTIVE:-1}" + export EXAMPLES_INCLUDE_SERVER="${EXAMPLES_INCLUDE_SERVER:-0}" + export EXAMPLES_INCLUDE_AUDIO="${EXAMPLES_INCLUDE_AUDIO:-0}" + export EXAMPLES_INCLUDE_EXTERNAL="${EXAMPLES_INCLUDE_EXTERNAL:-0}" + cd "$ROOT" + exec "${run_cmd[@]}" "$@" > >(tee "$stdout_log") 2>&1 + ) & + local pid=$! + echo "$pid" >"$PID_FILE" + echo "Started run_examples.py (pid=$pid)" + echo "Main log: $main_log" + echo "Stdout log: $stdout_log" + echo "Run '.codex/skills/examples-auto-run/scripts/run.sh validate \"$main_log\"' after it finishes." + return 0 + fi + + export EXAMPLES_INTERACTIVE_MODE="${EXAMPLES_INTERACTIVE_MODE:-auto}" + export APPLY_PATCH_AUTO_APPROVE="${APPLY_PATCH_AUTO_APPROVE:-1}" + export SHELL_AUTO_APPROVE="${SHELL_AUTO_APPROVE:-1}" + export AUTO_APPROVE_MCP="${AUTO_APPROVE_MCP:-1}" + export EXAMPLES_INCLUDE_INTERACTIVE="${EXAMPLES_INCLUDE_INTERACTIVE:-1}" + export EXAMPLES_INCLUDE_SERVER="${EXAMPLES_INCLUDE_SERVER:-0}" + export EXAMPLES_INCLUDE_AUDIO="${EXAMPLES_INCLUDE_AUDIO:-0}" + export EXAMPLES_INCLUDE_EXTERNAL="${EXAMPLES_INCLUDE_EXTERNAL:-0}" + cd "$ROOT" + set +e + "${run_cmd[@]}" "$@" 2>&1 | tee "$stdout_log" + local run_status=${PIPESTATUS[0]} + set -e + return "$run_status" +} + +cmd_stop() { + if [[ ! -f "$PID_FILE" ]]; then + echo "No pid file; nothing to stop." + return 0 + fi + local pid + pid="$(cat "$PID_FILE" 2>/dev/null || true)" + if [[ -z "$pid" ]]; then + rm -f "$PID_FILE" + echo "Pid file empty; cleaned." + return 0 + fi + if ! is_running "$pid"; then + rm -f "$PID_FILE" + echo "Process $pid not running; cleaned pid file." + return 0 + fi + echo "Stopping pid $pid ..." + kill "$pid" 2>/dev/null || true + sleep 1 + if is_running "$pid"; then + echo "Sending SIGKILL to $pid ..." + kill -9 "$pid" 2>/dev/null || true + fi + rm -f "$PID_FILE" + echo "Stopped." +} + +cmd_status() { + if [[ -f "$PID_FILE" ]]; then + local pid + pid="$(cat "$PID_FILE" 2>/dev/null || true)" + if is_running "$pid"; then + echo "Running (pid=$pid)" + return 0 + fi + fi + echo "Not running." +} + +cmd_logs() { + ensure_dirs + ls -1t "$LOG_DIR" +} + +cmd_tail() { + ensure_dirs + local file="${1:-}" + if [[ -z "$file" ]]; then + file="$(ls -1t "$LOG_DIR" | head -n1)" + fi + if [[ -z "$file" ]]; then + echo "No log files yet." + exit 1 + fi + tail -f "$LOG_DIR/$file" +} + +collect_rerun() { + ensure_dirs + local log_file="${1:-}" + if [[ -z "$log_file" ]]; then + log_file="$(ls -1t "$LOG_DIR"/main_*.log 2>/dev/null | head -n1)" + fi + if [[ -z "$log_file" ]] || [[ ! -f "$log_file" ]]; then + echo "No main log file found." + exit 1 + fi + cd "$ROOT" + uv run examples/run_examples.py --collect "$log_file" --output "$RERUN_FILE" +} + +cmd_rerun() { + ensure_dirs + local file="${1:-$RERUN_FILE}" + if [[ ! -s "$file" ]]; then + echo "Rerun list is empty: $file" + exit 0 + fi + local ts main_log stdout_log + ts="$(date +%Y%m%d-%H%M%S)" + main_log="$LOG_DIR/main_${ts}.log" + stdout_log="$LOG_DIR/stdout_${ts}.log" + cd "$ROOT" + export EXAMPLES_INTERACTIVE_MODE="${EXAMPLES_INTERACTIVE_MODE:-auto}" + export APPLY_PATCH_AUTO_APPROVE="${APPLY_PATCH_AUTO_APPROVE:-1}" + export SHELL_AUTO_APPROVE="${SHELL_AUTO_APPROVE:-1}" + export AUTO_APPROVE_MCP="${AUTO_APPROVE_MCP:-1}" + set +e + uv run examples/run_examples.py --auto-mode --rerun-file "$file" --write-rerun --main-log "$main_log" --logs-dir "$LOG_DIR" 2>&1 | tee "$stdout_log" + local run_status=${PIPESTATUS[0]} + set -e + return "$run_status" +} + +usage() { + cat <<'EOF' +Usage: run.sh [args...] + +Commands: + start [--filter ... | other args] Run examples in auto mode (foreground). Pass --background to run detached. + stop Kill the running auto-run (if any). + status Show whether it is running. + logs List log files (.tmp/examples-start-logs). + tail [logfile] Tail the latest (or specified) log. + collect [main_log] Parse a main log and write failed examples to .tmp/examples-rerun.txt. + rerun [rerun_file] Run only the examples listed in .tmp/examples-rerun.txt. + +Environment overrides: + EXAMPLES_INTERACTIVE_MODE (default auto) + EXAMPLES_INCLUDE_SERVER/INTERACTIVE/AUDIO/EXTERNAL (defaults: 0/1/0/0) + APPLY_PATCH_AUTO_APPROVE, SHELL_AUTO_APPROVE, AUTO_APPROVE_MCP (default 1 in auto mode) +EOF +} + +default_cmd="start" +if [[ $# -eq 0 && -s "$RERUN_FILE" ]]; then + default_cmd="rerun" +fi + +case "${1:-$default_cmd}" in + start) shift || true; cmd_start "$@" ;; + stop) shift || true; cmd_stop ;; + status) shift || true; cmd_status ;; + logs) shift || true; cmd_logs ;; + tail) shift; cmd_tail "${1:-}" ;; + collect) shift || true; collect_rerun "${1:-}" ;; + rerun) shift || true; cmd_rerun "${1:-}" ;; + *) usage; exit 1 ;; +esac diff --git a/.gitignore b/.gitignore index 60782274e9..ac32a2998d 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ htmlcov/ .coverage .coverage.* .cache +.tmp/ nosetests.xml coverage.xml *.cover diff --git a/examples/agent_patterns/agents_as_tools.py b/examples/agent_patterns/agents_as_tools.py index 9fd118efb3..b670e2fe06 100644 --- a/examples/agent_patterns/agents_as_tools.py +++ b/examples/agent_patterns/agents_as_tools.py @@ -1,6 +1,7 @@ import asyncio from agents import Agent, ItemHelpers, MessageOutputItem, Runner, trace +from examples.auto_mode import input_with_fallback """ This example shows the agents-as-tools pattern. The frontline agent receives a user message and @@ -56,7 +57,10 @@ async def main(): - msg = input("Hi! What would you like translated, and to which languages? ") + msg = input_with_fallback( + "Hi! What would you like translated, and to which languages? ", + "Translate 'Hello, world!' to French and Spanish.", + ) # Run the entire orchestration in a single trace with trace("Orchestrator evaluator"): diff --git a/examples/agent_patterns/agents_as_tools_conditional.py b/examples/agent_patterns/agents_as_tools_conditional.py index e00f56d5e3..87533721d3 100644 --- a/examples/agent_patterns/agents_as_tools_conditional.py +++ b/examples/agent_patterns/agents_as_tools_conditional.py @@ -3,6 +3,7 @@ from pydantic import BaseModel from agents import Agent, AgentBase, RunContextWrapper, Runner, trace +from examples.auto_mode import input_with_fallback """ This example demonstrates the agents-as-tools pattern with conditional tool enabling. @@ -81,7 +82,7 @@ async def main(): print("2. French and Spanish (2 tools)") print("3. European languages (3 tools)") - choice = input("\nSelect option (1-3): ").strip() + choice = input_with_fallback("\nSelect option (1-3): ", "2").strip() preference_map = {"1": "spanish_only", "2": "french_spanish", "3": "european"} language_preference = preference_map.get(choice, "spanish_only") @@ -95,7 +96,10 @@ async def main(): print(f"The LLM will only see and can use these {len(available_tools)} tools\n") # Get user request - user_request = input("Ask a question and see responses in available languages:\n") + user_request = input_with_fallback( + "Ask a question and see responses in available languages:\n", + "How do you say good morning?", + ) # Run with LLM interaction print("\nProcessing request...") diff --git a/examples/agent_patterns/deterministic.py b/examples/agent_patterns/deterministic.py index 0c163afe9e..30bef35e25 100644 --- a/examples/agent_patterns/deterministic.py +++ b/examples/agent_patterns/deterministic.py @@ -3,6 +3,7 @@ from pydantic import BaseModel from agents import Agent, Runner, trace +from examples.auto_mode import input_with_fallback """ This example demonstrates a deterministic flow, where each step is performed by an agent. @@ -39,7 +40,10 @@ class OutlineCheckerOutput(BaseModel): async def main(): - input_prompt = input("What kind of story do you want? ") + input_prompt = input_with_fallback( + "What kind of story do you want? ", + "Write a short sci-fi story.", + ) # Ensure the entire workflow is a single trace with trace("Deterministic story flow"): diff --git a/examples/agent_patterns/input_guardrails.py b/examples/agent_patterns/input_guardrails.py index 18ab9d2a75..7e4210d6af 100644 --- a/examples/agent_patterns/input_guardrails.py +++ b/examples/agent_patterns/input_guardrails.py @@ -13,6 +13,7 @@ TResponseInputItem, input_guardrail, ) +from examples.auto_mode import input_with_fallback, is_auto_mode """ This example shows how to use guardrails. @@ -68,9 +69,13 @@ async def main(): ) input_data: list[TResponseInputItem] = [] + auto_mode = is_auto_mode() while True: - user_input = input("Enter a message: ") + user_input = input_with_fallback( + "Enter a message: ", + "What's the capital of California?", + ) input_data.append( { "role": "user", @@ -93,6 +98,8 @@ async def main(): "content": message, } ) + if auto_mode: + break # Sample run: # Enter a message: What's the capital of California? diff --git a/examples/agent_patterns/llm_as_a_judge.py b/examples/agent_patterns/llm_as_a_judge.py index 39a55c4630..1ee4915e18 100644 --- a/examples/agent_patterns/llm_as_a_judge.py +++ b/examples/agent_patterns/llm_as_a_judge.py @@ -5,6 +5,7 @@ from typing import Literal from agents import Agent, ItemHelpers, Runner, TResponseInputItem, trace +from examples.auto_mode import input_with_fallback, is_auto_mode """ This example shows the LLM as a judge pattern. The first agent generates an outline for a story. @@ -39,10 +40,16 @@ class EvaluationFeedback: async def main() -> None: - msg = input("What kind of story would you like to hear? ") + msg = input_with_fallback( + "What kind of story would you like to hear? ", + "A detective story in space.", + ) input_items: list[TResponseInputItem] = [{"content": msg, "role": "user"}] latest_outline: str | None = None + auto_mode = is_auto_mode() + max_rounds = 3 if auto_mode else None + rounds = 0 # We'll run the entire workflow in a single trace with trace("LLM as a judge"): @@ -65,6 +72,12 @@ async def main() -> None: print("Story outline is good enough, exiting.") break + if auto_mode: + rounds += 1 + if max_rounds is not None and rounds >= max_rounds: + print("Auto mode: stopping after limited rounds.") + break + print("Re-running with feedback") input_items.append({"content": f"Feedback: {result.feedback}", "role": "user"}) diff --git a/examples/agent_patterns/parallelization.py b/examples/agent_patterns/parallelization.py index fe2a8ecd0b..60dcfbe07f 100644 --- a/examples/agent_patterns/parallelization.py +++ b/examples/agent_patterns/parallelization.py @@ -1,6 +1,7 @@ import asyncio from agents import Agent, ItemHelpers, Runner, trace +from examples.auto_mode import input_with_fallback """ This example shows the parallelization pattern. We run the agent three times in parallel, and pick @@ -19,7 +20,10 @@ async def main(): - msg = input("Hi! Enter a message, and we'll translate it to Spanish.\n\n") + msg = input_with_fallback( + "Hi! Enter a message, and we'll translate it to Spanish.\n\n", + "Good morning!", + ) # Ensure the entire workflow is a single trace with trace("Parallel translation"): diff --git a/examples/agent_patterns/routing.py b/examples/agent_patterns/routing.py index 3dcaefa980..4d0a49ab74 100644 --- a/examples/agent_patterns/routing.py +++ b/examples/agent_patterns/routing.py @@ -4,6 +4,7 @@ from openai.types.responses import ResponseContentPartDoneEvent, ResponseTextDeltaEvent from agents import Agent, RawResponsesStreamEvent, Runner, TResponseInputItem, trace +from examples.auto_mode import input_with_fallback, is_auto_mode """ This example shows the handoffs/routing pattern. The triage agent receives the first message, and @@ -37,9 +38,13 @@ async def main(): # We'll create an ID for this conversation, so we can link each trace conversation_id = str(uuid.uuid4().hex[:16]) - msg = input("Hi! We speak French, Spanish and English. How can I help? ") + msg = input_with_fallback( + "Hi! We speak French, Spanish and English. How can I help? ", + "Hello, how do I say good evening in French?", + ) agent = triage_agent inputs: list[TResponseInputItem] = [{"content": msg, "role": "user"}] + auto_mode = is_auto_mode() while True: # Each conversation turn is a single trace. Normally, each input from the user would be an @@ -61,7 +66,9 @@ async def main(): inputs = result.to_input_list() print("\n") - user_msg = input("Enter a message: ") + if auto_mode: + break + user_msg = input_with_fallback("Enter a message: ", "Thanks!") inputs.append({"content": user_msg, "role": "user"}) agent = result.current_agent diff --git a/examples/auto_mode.py b/examples/auto_mode.py new file mode 100644 index 0000000000..9a7b71fe71 --- /dev/null +++ b/examples/auto_mode.py @@ -0,0 +1,37 @@ +"""Utilities for running examples in automated mode. + +When ``EXAMPLES_INTERACTIVE_MODE=auto`` is set, these helpers provide +deterministic inputs and confirmations so examples can run without manual +interaction. The helpers are intentionally lightweight to avoid adding +dependencies to example code. +""" + +from __future__ import annotations + +import os + + +def is_auto_mode() -> bool: + """Return True when examples should bypass interactive prompts.""" + return os.environ.get("EXAMPLES_INTERACTIVE_MODE", "").lower() == "auto" + + +def input_with_fallback(prompt: str, fallback: str) -> str: + """Return the fallback text in auto mode, otherwise defer to input().""" + if is_auto_mode(): + print(f"[auto-input] {prompt.strip()} -> {fallback}") + return fallback + return input(prompt) + + +def confirm_with_fallback(prompt: str, default: bool = True) -> bool: + """Return default in auto mode; otherwise ask the user.""" + if is_auto_mode(): + choice = "yes" if default else "no" + print(f"[auto-confirm] {prompt.strip()} -> {choice}") + return default + + answer = input(prompt).strip().lower() + if not answer: + return default + return answer in {"y", "yes"} diff --git a/examples/basic/agent_lifecycle_example.py b/examples/basic/agent_lifecycle_example.py index 96238fe2ea..d135b8f452 100644 --- a/examples/basic/agent_lifecycle_example.py +++ b/examples/basic/agent_lifecycle_example.py @@ -13,6 +13,7 @@ Tool, function_tool, ) +from examples.auto_mode import input_with_fallback class CustomAgentHooks(AgentHooks): @@ -98,7 +99,7 @@ class FinalResult(BaseModel): async def main() -> None: - user_input = input("Enter a max number: ") + user_input = input_with_fallback("Enter a max number: ", "50") try: max_number = int(user_input) await Runner.run( diff --git a/examples/basic/lifecycle_example.py b/examples/basic/lifecycle_example.py index 76529c56b1..5ecd3a6b75 100644 --- a/examples/basic/lifecycle_example.py +++ b/examples/basic/lifecycle_example.py @@ -17,6 +17,7 @@ ) from agents.items import ModelResponse, TResponseInputItem from agents.tool_context import ToolContext +from examples.auto_mode import input_with_fallback class LoggingHooks(AgentHooks[Any]): @@ -146,7 +147,7 @@ class FinalResult(BaseModel): async def main() -> None: - user_input = input("Enter a max number: ") + user_input = input_with_fallback("Enter a max number: ", "50") try: max_number = int(user_input) await Runner.run( diff --git a/examples/basic/previous_response_id.py b/examples/basic/previous_response_id.py index b00bf3aa64..21c354219d 100644 --- a/examples/basic/previous_response_id.py +++ b/examples/basic/previous_response_id.py @@ -1,6 +1,7 @@ import asyncio from agents import Agent, Runner +from examples.auto_mode import input_with_fallback """This demonstrates usage of the `previous_response_id` parameter to continue a conversation. The second run passes the previous response ID to the model, which allows it to continue the @@ -59,7 +60,7 @@ async def main_stream(): if __name__ == "__main__": - is_stream = input("Run in stream mode? (y/n): ") + is_stream = input_with_fallback("Run in stream mode? (y/n): ", "n") if is_stream == "y": asyncio.run(main_stream()) else: diff --git a/examples/customer_service/main.py b/examples/customer_service/main.py index 266a7e6118..65191559c3 100644 --- a/examples/customer_service/main.py +++ b/examples/customer_service/main.py @@ -21,6 +21,7 @@ trace, ) from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX +from examples.auto_mode import input_with_fallback, is_auto_mode ### CONTEXT @@ -143,13 +144,17 @@ async def main(): current_agent: Agent[AirlineAgentContext] = triage_agent input_items: list[TResponseInputItem] = [] context = AirlineAgentContext() + auto_mode = is_auto_mode() # Normally, each input from the user would be an API request to your app, and you can wrap the request in a trace() # Here, we'll just use a random UUID for the conversation ID conversation_id = uuid.uuid4().hex[:16] while True: - user_input = input("Enter your message: ") + user_input = input_with_fallback( + "Enter your message: ", + "What are your store hours?", + ) with trace("Customer service", group_id=conversation_id): input_items.append({"content": user_input, "role": "user"}) result = await Runner.run(current_agent, input_items, context=context) @@ -170,6 +175,8 @@ async def main(): print(f"{agent_name}: Skipping item: {new_item.__class__.__name__}") input_items = result.to_input_list() current_agent = result.last_agent + if auto_mode: + break if __name__ == "__main__": diff --git a/examples/financial_research_agent/main.py b/examples/financial_research_agent/main.py index b5b6cfdfd3..23b6d71d6b 100644 --- a/examples/financial_research_agent/main.py +++ b/examples/financial_research_agent/main.py @@ -1,5 +1,7 @@ import asyncio +from examples.auto_mode import input_with_fallback + from .manager import FinancialResearchManager @@ -8,7 +10,10 @@ # financial research query, for example: # "Write up an analysis of Apple Inc.'s most recent quarter." async def main() -> None: - query = input("Enter a financial research query: ") + query = input_with_fallback( + "Enter a financial research query: ", + "Write up an analysis of Apple Inc.'s most recent quarter.", + ) mgr = FinancialResearchManager() await mgr.run(query) diff --git a/examples/hosted_mcp/approvals.py b/examples/hosted_mcp/approvals.py index c3de0db447..2aa73c1ebc 100644 --- a/examples/hosted_mcp/approvals.py +++ b/examples/hosted_mcp/approvals.py @@ -8,14 +8,15 @@ MCPToolApprovalRequest, Runner, ) +from examples.auto_mode import confirm_with_fallback """This example demonstrates how to use the hosted MCP support in the OpenAI Responses API, with approval callbacks.""" def approval_callback(request: MCPToolApprovalRequest) -> MCPToolApprovalFunctionResult: - answer = input(f"Approve running the tool `{request.data.name}`? (y/n) ") - result: MCPToolApprovalFunctionResult = {"approve": answer == "y"} + approve = confirm_with_fallback(f"Approve running the tool `{request.data.name}`? (y/n) ", True) + result: MCPToolApprovalFunctionResult = {"approve": approve} if not result["approve"]: result["reason"] = "User denied" return result diff --git a/examples/mcp/git_example/main.py b/examples/mcp/git_example/main.py index ab229e8550..8a62744d18 100644 --- a/examples/mcp/git_example/main.py +++ b/examples/mcp/git_example/main.py @@ -3,6 +3,7 @@ from agents import Agent, Runner, trace from agents.mcp import MCPServer, MCPServerStdio +from examples.auto_mode import input_with_fallback async def run(mcp_server: MCPServer, directory_path: str): @@ -27,7 +28,10 @@ async def run(mcp_server: MCPServer, directory_path: str): async def main(): # Ask the user for the directory path - directory_path = input("Please enter the path to the git repository: ") + directory_path = input_with_fallback( + "Please enter the path to the git repository: ", + ".", + ) async with MCPServerStdio( cache_tools_list=True, # Cache the tools list, for demonstration diff --git a/examples/mcp/prompt_server/README.md b/examples/mcp/prompt_server/README.md index c1b1c3b376..c1eaa632df 100644 --- a/examples/mcp/prompt_server/README.md +++ b/examples/mcp/prompt_server/README.md @@ -10,7 +10,8 @@ uv run python examples/mcp/prompt_server/main.py ## Details -The example uses the `MCPServerStreamableHttp` class from `agents.mcp`. The server runs in a sub-process at `http://localhost:8000/mcp` and provides user-controlled prompts that generate agent instructions. +The example uses the `MCPServerStreamableHttp` class from `agents.mcp`. The script auto-selects an open localhost port (or honors `STREAMABLE_HTTP_PORT`) and runs the server at `http://:/mcp`, providing user-controlled prompts that generate agent instructions. +If you need a specific address, set `STREAMABLE_HTTP_PORT` and `STREAMABLE_HTTP_HOST`. The server exposes prompts like `generate_code_review_instructions` that take parameters such as focus area and programming language. The agent calls these prompts to dynamically generate its system instructions based on user-provided parameters. @@ -26,4 +27,4 @@ The example demonstrates two key functions: - Runs the agent against vulnerable sample code (command injection via `os.system`) - The agent analyzes the code and provides security-focused feedback using available tools -This pattern allows users to dynamically configure agent behavior through MCP prompts rather than hardcoded instructions. \ No newline at end of file +This pattern allows users to dynamically configure agent behavior through MCP prompts rather than hardcoded instructions. diff --git a/examples/mcp/prompt_server/main.py b/examples/mcp/prompt_server/main.py index 4caa95d888..3cd045e63b 100644 --- a/examples/mcp/prompt_server/main.py +++ b/examples/mcp/prompt_server/main.py @@ -1,14 +1,32 @@ import asyncio import os import shutil +import socket import subprocess import time -from typing import Any +from typing import Any, cast from agents import Agent, Runner, gen_trace_id, trace from agents.mcp import MCPServer, MCPServerStreamableHttp from agents.model_settings import ModelSettings +STREAMABLE_HTTP_HOST = os.getenv("STREAMABLE_HTTP_HOST", "127.0.0.1") + + +def _choose_port() -> int: + env_port = os.getenv("STREAMABLE_HTTP_PORT") + if env_port: + return int(env_port) + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind((STREAMABLE_HTTP_HOST, 0)) + address = cast(tuple[str, int], s.getsockname()) + return address[1] + + +STREAMABLE_HTTP_PORT = _choose_port() +os.environ.setdefault("STREAMABLE_HTTP_PORT", str(STREAMABLE_HTTP_PORT)) +STREAMABLE_HTTP_URL = f"http://{STREAMABLE_HTTP_HOST}:{STREAMABLE_HTTP_PORT}/mcp" + async def get_instructions_from_prompt(mcp_server: MCPServer, prompt_name: str, **kwargs) -> str: """Get agent instructions by calling MCP prompt endpoint (user-controlled)""" @@ -75,7 +93,7 @@ async def show_available_prompts(mcp_server: MCPServer): async def main(): async with MCPServerStreamableHttp( name="Simple Prompt Server", - params={"url": "http://localhost:8000/mcp"}, + params={"url": STREAMABLE_HTTP_URL}, ) as server: trace_id = gen_trace_id() with trace(workflow_name="Simple Prompt Demo", trace_id=trace_id): @@ -94,8 +112,11 @@ async def main(): this_dir = os.path.dirname(os.path.abspath(__file__)) server_file = os.path.join(this_dir, "server.py") - print("Starting Simple Prompt Server...") - process = subprocess.Popen(["uv", "run", server_file]) + print(f"Starting Simple Prompt Server at {STREAMABLE_HTTP_URL} ...") + env = os.environ.copy() + env.setdefault("STREAMABLE_HTTP_HOST", STREAMABLE_HTTP_HOST) + env.setdefault("STREAMABLE_HTTP_PORT", str(STREAMABLE_HTTP_PORT)) + process = subprocess.Popen(["uv", "run", server_file], env=env) time.sleep(3) print("Server started\n") except Exception as e: diff --git a/examples/mcp/prompt_server/server.py b/examples/mcp/prompt_server/server.py index 01dcbac346..7d6629acd7 100644 --- a/examples/mcp/prompt_server/server.py +++ b/examples/mcp/prompt_server/server.py @@ -1,7 +1,12 @@ +import os + from mcp.server.fastmcp import FastMCP +STREAMABLE_HTTP_HOST = os.getenv("STREAMABLE_HTTP_HOST", "127.0.0.1") +STREAMABLE_HTTP_PORT = int(os.getenv("STREAMABLE_HTTP_PORT", "18080")) + # Create server -mcp = FastMCP("Prompt Server") +mcp = FastMCP("Prompt Server", host=STREAMABLE_HTTP_HOST, port=STREAMABLE_HTTP_PORT) # Instruction-generating prompts (user-controlled) diff --git a/examples/mcp/sse_example/server.py b/examples/mcp/sse_example/server.py index df364aa3af..2e4fe2db86 100644 --- a/examples/mcp/sse_example/server.py +++ b/examples/mcp/sse_example/server.py @@ -23,10 +23,16 @@ def get_secret_word() -> str: @mcp.tool() def get_current_weather(city: str) -> str: print(f"[debug-server] get_current_weather({city})") - - endpoint = "https://wttr.in" - response = requests.get(f"{endpoint}/{city}") - return response.text + # Avoid slow or flaky network calls during automated runs. + try: + endpoint = "https://wttr.in" + response = requests.get(f"{endpoint}/{city}", timeout=2) + if response.ok: + return response.text + except Exception: + pass + # Fallback keeps the tool responsive even when offline. + return f"Weather data unavailable right now; assume clear skies in {city}." if __name__ == "__main__": diff --git a/examples/mcp/streamablehttp_custom_client_example/README.md b/examples/mcp/streamablehttp_custom_client_example/README.md index 1569b3c28c..fc269a0644 100644 --- a/examples/mcp/streamablehttp_custom_client_example/README.md +++ b/examples/mcp/streamablehttp_custom_client_example/README.md @@ -38,7 +38,7 @@ def create_custom_http_client() -> httpx.AsyncClient: async with MCPServerStreamableHttp( name="Custom Client Server", params={ - "url": "http://localhost:8000/mcp", + "url": "http://localhost:/mcp", "httpx_client_factory": create_custom_http_client, }, ) as server: @@ -60,3 +60,4 @@ async with MCPServerStreamableHttp( - **Performance**: Optimize timeouts and connection settings for your use case - **Compatibility**: Work with corporate proxies and network restrictions +This example will auto-pick a free localhost port unless you set `STREAMABLE_HTTP_PORT`; use `STREAMABLE_HTTP_HOST` to change the bind address. diff --git a/examples/mcp/streamablehttp_custom_client_example/main.py b/examples/mcp/streamablehttp_custom_client_example/main.py index 41e26ec35d..20cbef1cdc 100644 --- a/examples/mcp/streamablehttp_custom_client_example/main.py +++ b/examples/mcp/streamablehttp_custom_client_example/main.py @@ -7,9 +7,10 @@ import asyncio import os import shutil +import socket import subprocess import time -from typing import Any +from typing import Any, cast import httpx @@ -17,6 +18,23 @@ from agents.mcp import MCPServer, MCPServerStreamableHttp from agents.model_settings import ModelSettings +STREAMABLE_HTTP_HOST = os.getenv("STREAMABLE_HTTP_HOST", "127.0.0.1") + + +def _choose_port() -> int: + env_port = os.getenv("STREAMABLE_HTTP_PORT") + if env_port: + return int(env_port) + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind((STREAMABLE_HTTP_HOST, 0)) + address = cast(tuple[str, int], s.getsockname()) + return address[1] + + +STREAMABLE_HTTP_PORT = _choose_port() +os.environ.setdefault("STREAMABLE_HTTP_PORT", str(STREAMABLE_HTTP_PORT)) +STREAMABLE_HTTP_URL = f"http://{STREAMABLE_HTTP_HOST}:{STREAMABLE_HTTP_PORT}/mcp" + def create_custom_http_client( headers: dict[str, str] | None = None, @@ -73,7 +91,7 @@ async def main(): async with MCPServerStreamableHttp( name="Streamable HTTP with Custom Client", params={ - "url": "http://localhost:8000/mcp", + "url": STREAMABLE_HTTP_URL, "httpx_client_factory": create_custom_http_client, }, ) as server: @@ -91,16 +109,19 @@ async def main(): ) # We'll run the Streamable HTTP server in a subprocess. Usually this would be a remote server, but for this - # demo, we'll run it locally at http://localhost:8000/mcp + # demo, we'll run it locally at STREAMABLE_HTTP_URL process: subprocess.Popen[Any] | None = None try: this_dir = os.path.dirname(os.path.abspath(__file__)) server_file = os.path.join(this_dir, "server.py") - print("Starting Streamable HTTP server at http://localhost:8000/mcp ...") + print(f"Starting Streamable HTTP server at {STREAMABLE_HTTP_URL} ...") # Run `uv run server.py` to start the Streamable HTTP server - process = subprocess.Popen(["uv", "run", server_file]) + env = os.environ.copy() + env.setdefault("STREAMABLE_HTTP_HOST", STREAMABLE_HTTP_HOST) + env.setdefault("STREAMABLE_HTTP_PORT", str(STREAMABLE_HTTP_PORT)) + process = subprocess.Popen(["uv", "run", server_file], env=env) # Give it 3 seconds to start time.sleep(3) diff --git a/examples/mcp/streamablehttp_custom_client_example/server.py b/examples/mcp/streamablehttp_custom_client_example/server.py index a078ee00fa..dd0d468753 100644 --- a/examples/mcp/streamablehttp_custom_client_example/server.py +++ b/examples/mcp/streamablehttp_custom_client_example/server.py @@ -1,9 +1,13 @@ +import os import random from mcp.server.fastmcp import FastMCP +STREAMABLE_HTTP_HOST = os.getenv("STREAMABLE_HTTP_HOST", "127.0.0.1") +STREAMABLE_HTTP_PORT = int(os.getenv("STREAMABLE_HTTP_PORT", "18080")) + # Create server -mcp = FastMCP("Echo Server") +mcp = FastMCP("Echo Server", host=STREAMABLE_HTTP_HOST, port=STREAMABLE_HTTP_PORT) @mcp.tool() diff --git a/examples/mcp/streamablehttp_example/README.md b/examples/mcp/streamablehttp_example/README.md index a07fe19be3..83cae670b6 100644 --- a/examples/mcp/streamablehttp_example/README.md +++ b/examples/mcp/streamablehttp_example/README.md @@ -10,4 +10,4 @@ uv run python examples/mcp/streamablehttp_example/main.py ## Details -The example uses the `MCPServerStreamableHttp` class from `agents.mcp`. The server runs in a sub-process at `https://localhost:8000/mcp`. +The example uses the `MCPServerStreamableHttp` class from `agents.mcp`. The script picks an open localhost port automatically (or honors `STREAMABLE_HTTP_PORT` if you set it) and starts the server at `http://:/mcp`. Set `STREAMABLE_HTTP_HOST` if you need a different bind address. diff --git a/examples/mcp/streamablehttp_example/main.py b/examples/mcp/streamablehttp_example/main.py index cc95e798b6..564a7bf98f 100644 --- a/examples/mcp/streamablehttp_example/main.py +++ b/examples/mcp/streamablehttp_example/main.py @@ -1,14 +1,32 @@ import asyncio import os import shutil +import socket import subprocess import time -from typing import Any +from typing import Any, cast from agents import Agent, Runner, gen_trace_id, trace from agents.mcp import MCPServer, MCPServerStreamableHttp from agents.model_settings import ModelSettings +STREAMABLE_HTTP_HOST = os.getenv("STREAMABLE_HTTP_HOST", "127.0.0.1") + + +def _choose_port() -> int: + env_port = os.getenv("STREAMABLE_HTTP_PORT") + if env_port: + return int(env_port) + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind((STREAMABLE_HTTP_HOST, 0)) + address = cast(tuple[str, int], s.getsockname()) + return address[1] + + +STREAMABLE_HTTP_PORT = _choose_port() +os.environ.setdefault("STREAMABLE_HTTP_PORT", str(STREAMABLE_HTTP_PORT)) +STREAMABLE_HTTP_URL = f"http://{STREAMABLE_HTTP_HOST}:{STREAMABLE_HTTP_PORT}/mcp" + async def run(mcp_server: MCPServer): agent = Agent( @@ -41,7 +59,7 @@ async def main(): async with MCPServerStreamableHttp( name="Streamable HTTP Python Server", params={ - "url": "http://localhost:8000/mcp", + "url": STREAMABLE_HTTP_URL, }, ) as server: trace_id = gen_trace_id() @@ -58,16 +76,19 @@ async def main(): ) # We'll run the Streamable HTTP server in a subprocess. Usually this would be a remote server, but for this - # demo, we'll run it locally at http://localhost:8000/mcp + # demo, we'll run it locally at STREAMABLE_HTTP_URL process: subprocess.Popen[Any] | None = None try: this_dir = os.path.dirname(os.path.abspath(__file__)) server_file = os.path.join(this_dir, "server.py") - print("Starting Streamable HTTP server at http://localhost:8000/mcp ...") + print(f"Starting Streamable HTTP server at {STREAMABLE_HTTP_URL} ...") # Run `uv run server.py` to start the Streamable HTTP server - process = subprocess.Popen(["uv", "run", server_file]) + env = os.environ.copy() + env.setdefault("STREAMABLE_HTTP_HOST", STREAMABLE_HTTP_HOST) + env.setdefault("STREAMABLE_HTTP_PORT", str(STREAMABLE_HTTP_PORT)) + process = subprocess.Popen(["uv", "run", server_file], env=env) # Give it 3 seconds to start time.sleep(3) diff --git a/examples/mcp/streamablehttp_example/server.py b/examples/mcp/streamablehttp_example/server.py index d8f839652a..d73ab895b6 100644 --- a/examples/mcp/streamablehttp_example/server.py +++ b/examples/mcp/streamablehttp_example/server.py @@ -1,10 +1,14 @@ +import os import random import requests from mcp.server.fastmcp import FastMCP +STREAMABLE_HTTP_HOST = os.getenv("STREAMABLE_HTTP_HOST", "127.0.0.1") +STREAMABLE_HTTP_PORT = int(os.getenv("STREAMABLE_HTTP_PORT", "18080")) + # Create server -mcp = FastMCP("Echo Server") +mcp = FastMCP("Echo Server", host=STREAMABLE_HTTP_HOST, port=STREAMABLE_HTTP_PORT) @mcp.tool() @@ -23,10 +27,16 @@ def get_secret_word() -> str: @mcp.tool() def get_current_weather(city: str) -> str: print(f"[debug-server] get_current_weather({city})") - - endpoint = "https://wttr.in" - response = requests.get(f"{endpoint}/{city}") - return response.text + # Avoid slow or flaky network calls during automated runs. + try: + endpoint = "https://wttr.in" + response = requests.get(f"{endpoint}/{city}", timeout=2) + if response.ok: + return response.text + except Exception: + pass + # Fallback keeps the tool responsive even when offline. + return f"Weather data unavailable right now; assume clear skies in {city}." if __name__ == "__main__": diff --git a/examples/model_providers/litellm_provider.py b/examples/model_providers/litellm_provider.py index 4a1a696fcb..ea5f09ab32 100644 --- a/examples/model_providers/litellm_provider.py +++ b/examples/model_providers/litellm_provider.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import os from agents import Agent, Runner, function_tool, set_tracing_disabled from agents.extensions.models.litellm_model import LitellmModel @@ -24,6 +25,9 @@ def get_weather(city: str): async def main(model: str, api_key: str): + if api_key == "dummy": + print("Skipping run because no valid LITELLM_API_KEY was provided.") + return agent = Agent( name="Assistant", instructions="You only respond in haikus.", @@ -36,7 +40,7 @@ async def main(model: str, api_key: str): if __name__ == "__main__": - # First try to get model/api key from args + # Prefer non-interactive defaults in auto mode to avoid blocking. import argparse parser = argparse.ArgumentParser() @@ -44,12 +48,12 @@ async def main(model: str, api_key: str): parser.add_argument("--api-key", type=str, required=False) args = parser.parse_args() - model = args.model - if not model: - model = input("Enter a model name for Litellm: ") + model = args.model or os.environ.get("LITELLM_MODEL", "openai/gpt-4o-mini") + api_key = args.api_key or os.environ.get("LITELLM_API_KEY", "dummy") - api_key = args.api_key - if not api_key: - api_key = input("Enter an API key for Litellm: ") + if not args.model: + print(f"Using default model: {model}") + if not args.api_key: + print("Using LITELLM_API_KEY from environment (or dummy placeholder).") asyncio.run(main(model, api_key)) diff --git a/examples/reasoning_content/main.py b/examples/reasoning_content/main.py index 7ccbab01b8..3db5d5cee6 100644 --- a/examples/reasoning_content/main.py +++ b/examples/reasoning_content/main.py @@ -20,7 +20,7 @@ from agents.models.interface import ModelTracing from agents.models.openai_provider import OpenAIProvider -MODEL_NAME = os.getenv("REASONING_MODEL_NAME") or "gpt-5" +MODEL_NAME = os.getenv("REASONING_MODEL_NAME") or "gpt-5.2" async def stream_with_reasoning_content(): diff --git a/examples/reasoning_content/runner_example.py b/examples/reasoning_content/runner_example.py index 3546da3502..e3c3d22506 100644 --- a/examples/reasoning_content/runner_example.py +++ b/examples/reasoning_content/runner_example.py @@ -17,7 +17,7 @@ from agents import Agent, ModelSettings, Runner, trace from agents.items import ReasoningItem -MODEL_NAME = os.getenv("EXAMPLE_MODEL_NAME") or "gpt-5.2" +MODEL_NAME = os.getenv("REASONING_MODEL_NAME") or "gpt-5.2" async def main(): diff --git a/examples/research_bot/main.py b/examples/research_bot/main.py index a0fd43dca8..b70bc8e483 100644 --- a/examples/research_bot/main.py +++ b/examples/research_bot/main.py @@ -1,10 +1,15 @@ import asyncio +from examples.auto_mode import input_with_fallback + from .manager import ResearchManager async def main() -> None: - query = input("What would you like to research? ") + query = input_with_fallback( + "What would you like to research? ", + "Impact of electric vehicles on the grid.", + ) await ResearchManager().run(query) diff --git a/examples/run_examples.py b/examples/run_examples.py index 0d51a028f1..a3a8174464 100644 --- a/examples/run_examples.py +++ b/examples/run_examples.py @@ -1,37 +1,54 @@ -"""Run multiple example entry points in this repository. - -This script locates Python files under ``examples/`` that contain a -``__main__`` guard and executes them one by one. By default it skips -interactive, server-like, audio-heavy, and external-service examples so -that automated validation does not hang waiting for input or require -hardware. Use flags to opt into those categories when you want to run -them. - -Usage examples: - - uv run examples/run_examples.py --dry-run - uv run examples/run_examples.py --filter basic - uv run examples/run_examples.py --include-interactive --include-server - -By default the script keeps running even if an example fails; use -``--fail-fast`` to stop on the first failure. +"""Run multiple example entry points with optional auto mode and logging. + +Features: +* Discovers ``__main__``-guarded example files under ``examples/``. +* Skips interactive/server/audio/external examples unless explicitly included. +* Auto mode (``EXAMPLES_INTERACTIVE_MODE=auto``) enables deterministic inputs, + auto-approvals, and turns on interactive examples by default. +* Writes per-example logs to ``.tmp/examples-start-logs`` and a main summary log. +* Generates a rerun list of failures at ``.tmp/examples-rerun.txt``. """ from __future__ import annotations import argparse +import datetime +import os import re import shlex import subprocess import sys +import threading from collections.abc import Iterable, Sequence +from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field -from pathlib import Path +from pathlib import Path, PurePosixPath ROOT_DIR = Path(__file__).resolve().parent.parent EXAMPLES_DIR = ROOT_DIR / "examples" MAIN_PATTERN = re.compile(r"__name__\s*==\s*['\"]__main__['\"]") +LOG_DIR_DEFAULT = ROOT_DIR / ".tmp" / "examples-start-logs" +RERUN_FILE_DEFAULT = ROOT_DIR / ".tmp" / "examples-rerun.txt" +DEFAULT_MAIN_LOG = LOG_DIR_DEFAULT / f"main_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}.log" + +# Examples that are noisy, require extra credentials, or hang in auto runs. +DEFAULT_AUTO_SKIP = { + "examples/agent_patterns/llm_as_a_judge.py", + "examples/agent_patterns/routing.py", + "examples/customer_service/main.py", + "examples/hosted_mcp/connectors.py", + "examples/mcp/git_example/main.py", + "examples/model_providers/custom_example_agent.py", + "examples/model_providers/custom_example_global.py", + "examples/model_providers/custom_example_provider.py", + "examples/realtime/app/server.py", + "examples/realtime/cli/demo.py", + "examples/realtime/twilio/server.py", + "examples/voice/static/main.py", + "examples/voice/streamed/main.py", +} + @dataclass class ExampleScript: @@ -40,7 +57,7 @@ class ExampleScript: @property def relpath(self) -> str: - return str(self.path.relative_to(ROOT_DIR)) + return normalize_relpath(str(self.path.relative_to(ROOT_DIR))) @property def module(self) -> str: @@ -53,6 +70,20 @@ def command(self) -> list[str]: return ["uv", "run", "python", "-m", self.module] +@dataclass +class ExampleResult: + script: ExampleScript + status: str + reason: str = "" + log_path: Path | None = None + exit_code: int | None = None + + +def normalize_relpath(relpath: str) -> str: + normalized = relpath.replace("\\", "/") + return str(PurePosixPath(normalized)) + + def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Run example scripts sequentially.") parser.add_argument( @@ -86,14 +117,58 @@ def parse_args() -> argparse.Namespace: help="Include examples that rely on extra services like Redis, Dapr, Twilio, or Playwright.", ) parser.add_argument( - "--fail-fast", + "--verbose", action="store_true", - help="Stop after the first failing example.", + help="Show detected tags for each example entry.", ) parser.add_argument( - "--verbose", + "--logs-dir", + default=str(LOG_DIR_DEFAULT), + help="Directory for per-example logs and main log.", + ) + parser.add_argument( + "--main-log", + default=str(DEFAULT_MAIN_LOG), + help="Path to write the main summary log.", + ) + parser.add_argument( + "--rerun-file", + help="Only run examples listed in this file (one relative path per line).", + ) + parser.add_argument( + "--write-rerun", action="store_true", - help="Show detected tags for each example entry.", + help="Write failures to .tmp/examples-rerun.txt after the run.", + ) + parser.add_argument( + "--collect", + help="Parse a previous main log to emit a rerun list instead of running examples.", + ) + parser.add_argument( + "--output", + help="Output path for --collect rerun list (defaults to stdout).", + ) + parser.add_argument( + "--print-auto-skip", + action="store_true", + help="Show the current auto-skip list and exit.", + ) + parser.add_argument( + "--auto-mode", + action="store_true", + help="Force EXAMPLES_INTERACTIVE_MODE=auto for this run.", + ) + parser.add_argument( + "--jobs", + "-j", + type=int, + default=int(os.environ.get("EXAMPLES_JOBS", "4")), + help="Number of examples to run in parallel (default: 4). Use 1 to force serial execution.", + ) + parser.add_argument( + "--no-buffer-output", + action="store_true", + help="Stream each example's stdout directly (may interleave). By default output is buffered per example to reduce interleaving.", ) return parser.parse_args() @@ -103,7 +178,11 @@ def detect_tags(path: Path, source: str) -> set[str]: lower_source = source.lower() lower_parts = [part.lower() for part in path.parts] - if re.search(r"\binput\s*\(", source): + if ( + re.search(r"\binput\s*\(", source) + or "input_with_fallback(" in lower_source + or "confirm_with_fallback(" in lower_source + ): tags.add("interactive") if "prompt_toolkit" in lower_source or "questionary" in lower_source: tags.add("interactive") @@ -153,9 +232,17 @@ def discover_examples(filters: Iterable[str]) -> list[ExampleScript]: return sorted(examples, key=lambda item: item.relpath) -def should_skip(tags: set[str], allowed_overrides: set[str]) -> tuple[bool, set[str]]: +def should_skip( + tags: set[str], + allowed_overrides: set[str], + auto_skip_set: set[str], + relpath: str, + auto_mode: bool, +) -> tuple[bool, set[str]]: blocked = {"interactive", "server", "audio", "external"} - allowed_overrides active_blockers = tags & blocked + if auto_mode and relpath in auto_skip_set: + active_blockers = active_blockers | {"auto-skip"} return (len(active_blockers) > 0, active_blockers) @@ -163,60 +250,318 @@ def format_command(cmd: Sequence[str]) -> str: return shlex.join(cmd) +def display_path(path: Path) -> str: + try: + return str(path.relative_to(ROOT_DIR)) + except ValueError: + return str(path) + + +def env_flag(name: str) -> bool | None: + raw = os.environ.get(name) + if raw is None: + return None + return raw.strip().lower() in {"1", "true", "yes", "on"} + + +def load_auto_skip() -> set[str]: + env_value = os.environ.get("EXAMPLES_AUTO_SKIP", "") + if env_value.strip(): + parts = re.split(r"[\s,]+", env_value.strip()) + return {normalize_relpath(p) for p in parts if p} + return {normalize_relpath(p) for p in DEFAULT_AUTO_SKIP} + + +def write_main_log_line(handle, line: str) -> None: + handle.write(line + "\n") + handle.flush() + + +def ensure_dirs(path: Path, is_file: bool | None = None) -> None: + """Create directories for a file or directory path. + + If `is_file` is True, always create the parent directory. If False, create the + directory itself. When None, treat paths with a suffix as files and others as + directories, but suffix-less file names should pass is_file=True to avoid + accidental directory creation. + """ + if is_file is None: + is_file = bool(path.suffix) + target = path.parent if is_file else path + target.mkdir(parents=True, exist_ok=True) + + +def parse_rerun_from_log(log_path: Path) -> list[str]: + if not log_path.exists(): + raise FileNotFoundError(log_path) + rerun: list[str] = [] + with log_path.open("r", encoding="utf-8") as handle: + for line in handle: + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + parts = stripped.split() + if len(parts) < 2: + continue + status, relpath = parts[0].upper(), parts[1] + if status in {"FAILED", "ERROR", "UNKNOWN"}: + rerun.append(normalize_relpath(relpath)) + return rerun + + def run_examples(examples: Sequence[ExampleScript], args: argparse.Namespace) -> int: overrides: set[str] = set() - if args.include_interactive: + if args.include_interactive or env_flag("EXAMPLES_INCLUDE_INTERACTIVE"): overrides.add("interactive") - if args.include_server: + if args.include_server or env_flag("EXAMPLES_INCLUDE_SERVER"): overrides.add("server") - if args.include_audio: + if args.include_audio or env_flag("EXAMPLES_INCLUDE_AUDIO"): overrides.add("audio") - if args.include_external: + if args.include_external or env_flag("EXAMPLES_INCLUDE_EXTERNAL"): overrides.add("external") + logs_dir = Path(args.logs_dir).resolve() + main_log_path = Path(args.main_log).resolve() + auto_mode = args.auto_mode or os.environ.get("EXAMPLES_INTERACTIVE_MODE", "").lower() == "auto" + auto_skip_set = load_auto_skip() + + if auto_mode and "interactive" not in overrides: + overrides.add("interactive") + + ensure_dirs(logs_dir, is_file=False) + ensure_dirs(main_log_path, is_file=True) + rerun_entries: list[str] = [] + if not examples: print("No example entry points found that match the filters.") return 0 + print(f"Interactive mode: {'auto' if auto_mode else 'prompt'}") print(f"Found {len(examples)} example entry points under examples/.") executed = 0 skipped = 0 failed = 0 + results: list[ExampleResult] = [] + + jobs = max(1, args.jobs) + + output_lock = threading.Lock() + main_log_lock = threading.Lock() + buffer_output = not args.no_buffer_output and os.environ.get( + "EXAMPLES_BUFFER_OUTPUT", "1" + ).lower() not in {"0", "false", "no", "off"} + + def safe_write_main(line: str) -> None: + with main_log_lock: + write_main_log_line(main_log, line) + + def run_single(example: ExampleScript) -> ExampleResult: + relpath = example.relpath + log_filename = f"{relpath.replace('/', '__')}.log" + log_path = logs_dir / log_filename + ensure_dirs(log_path, is_file=True) + + env = os.environ.copy() + if auto_mode: + env["EXAMPLES_INTERACTIVE_MODE"] = "auto" + env["APPLY_PATCH_AUTO_APPROVE"] = "1" + env.setdefault("SHELL_AUTO_APPROVE", "1") + env.setdefault("AUTO_APPROVE_MCP", "1") + + proc = subprocess.Popen( + example.command, + cwd=ROOT_DIR, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + env=env, + ) + assert proc.stdout is not None + force_prompt_stream = (not auto_mode) and ("interactive" in example.tags) + buffer_output_local = buffer_output and not force_prompt_stream + buffer_lines: list[str] = [] + + with log_path.open("w", encoding="utf-8") as per_log: + if force_prompt_stream: + at_line_start = True + while True: + char = proc.stdout.read(1) + if char == "": + break + per_log.write(char) + with output_lock: + if at_line_start: + sys.stdout.write(f"[{relpath}] ") + sys.stdout.write(char) + sys.stdout.flush() + at_line_start = char == "\n" + else: + for line in proc.stdout: + per_log.write(line) + if buffer_output_local: + buffer_lines.append(line) + else: + with output_lock: + sys.stdout.write(f"[{relpath}] {line}") + proc.wait() + exit_code = proc.returncode + + if buffer_output_local and buffer_lines: + with output_lock: + for line in buffer_lines: + sys.stdout.write(f"[{relpath}] {line}") + + if exit_code == 0: + safe_write_main(f"PASSED {relpath} exit=0 log={display_path(log_path)}") + return ExampleResult( + script=example, + status="passed", + log_path=log_path, + exit_code=exit_code, + ) + + info = f"exit={exit_code}" + with output_lock: + print(f" !! {relpath} exited with {exit_code}") + safe_write_main(f"FAILED {relpath} exit={exit_code} log={display_path(log_path)}") + return ExampleResult( + script=example, + status="failed", + reason=info, + log_path=log_path, + exit_code=exit_code, + ) + + with main_log_path.open("w", encoding="utf-8") as main_log: + safe_write_main(f"# run started {datetime.datetime.now().isoformat()}") + safe_write_main(f"# filters: {args.filter or '-'}") + safe_write_main(f"# include: {sorted(overrides)}") + safe_write_main(f"# auto_mode: {auto_mode}") + safe_write_main(f"# logs_dir: {logs_dir}") + safe_write_main(f"# jobs: {jobs}") + safe_write_main(f"# buffer_output: {buffer_output}") + + run_list: list[ExampleScript] = [] + + for example in examples: + relpath = example.relpath + skip, reasons = should_skip(example.tags, overrides, auto_skip_set, relpath, auto_mode) + tag_label = f" [tags: {', '.join(sorted(example.tags))}]" if args.verbose else "" + + if skip: + reason_label = f" (skipped: {', '.join(sorted(reasons))})" if reasons else "" + print(f"- SKIP {relpath}{tag_label}{reason_label}") + safe_write_main(f"SKIPPED {relpath} reasons={','.join(sorted(reasons))}") + skipped += 1 + results.append( + ExampleResult(script=example, status="skipped", reason=",".join(reasons)) + ) + continue + + print(f"- RUN {relpath}{tag_label}") + print(f" cmd: {format_command(example.command)}") + + if args.dry_run: + safe_write_main(f"DRYRUN {relpath}") + results.append(ExampleResult(script=example, status="dry-run")) + continue + + run_list.append(example) + + interactive_in_run_list = any("interactive" in ex.tags for ex in run_list) + interactive_requested = "interactive" in overrides + + if run_list and (not auto_mode) and (interactive_in_run_list or interactive_requested): + if jobs != 1: + print( + "Interactive examples detected; forcing serial execution to avoid shared stdin." + ) + reason = "interactive" if interactive_in_run_list else "interactive-requested" + safe_write_main(f"# jobs_adjusted: 1 reason={reason}") + jobs = 1 + + run_results: dict[str, ExampleResult] = {} + if run_list: + with ThreadPoolExecutor(max_workers=jobs) as executor: + future_map = {executor.submit(run_single, ex): ex for ex in run_list} + for future in as_completed(future_map): + result = future.result() + run_results[result.script.relpath] = result + + for ex in run_list: + result = run_results[ex.relpath] + results.append(result) + if result.status == "passed": + executed += 1 + elif result.status == "failed": + failed += 1 + rerun_entries.append(ex.relpath) + safe_write_main(f"# summary executed={executed} skipped={skipped} failed={failed}") + + if args.write_rerun: + ensure_dirs(RERUN_FILE_DEFAULT, is_file=True) + if rerun_entries: + contents = "\n".join(rerun_entries) + "\n" + else: + contents = "" + RERUN_FILE_DEFAULT.write_text(contents, encoding="utf-8") + print(f"Wrote rerun list to {RERUN_FILE_DEFAULT}") + + print(f"Main log: {main_log_path}") + print(f"Done. Ran {executed} example(s), skipped {skipped}, failed {failed}.") - for example in examples: - skip, reasons = should_skip(example.tags, overrides) - tag_label = f" [tags: {', '.join(sorted(example.tags))}]" if args.verbose else "" - - if skip: - reason_label = f" (skipped: {', '.join(sorted(reasons))})" if reasons else "" - print(f"- SKIP {example.relpath}{tag_label}{reason_label}") - skipped += 1 - continue - - print(f"- RUN {example.relpath}{tag_label}") - print(f" cmd: {format_command(example.command)}") - - if args.dry_run: - continue - - result = subprocess.run(example.command, cwd=ROOT_DIR) - if result.returncode != 0: - print(f" !! {example.relpath} exited with {result.returncode}") - failed += 1 - if args.fail_fast: - return result.returncode - continue - - executed += 1 + # Summary table + status_w = 9 + name_w = 44 + info_w = 32 + print("\nResults:") + print(f"{'status'.ljust(status_w)} {'example'.ljust(name_w)} {'info'.ljust(info_w)} log") + print(f"{'-' * status_w} {'-' * name_w} {'-' * info_w} ---") + for result in results: + info = result.reason or ("exit 0" if result.status == "passed" else "") + log_disp = ( + display_path(result.log_path) if result.log_path and result.log_path.exists() else "-" + ) + print( + f"{result.status.ljust(status_w)} {result.script.relpath.ljust(name_w)} {info.ljust(info_w)} {log_disp}" + ) - print(f"Done. Ran {executed} example(s), skipped {skipped}, failed {failed}.") return 0 if failed == 0 else 1 def main() -> int: args = parse_args() + if args.print_auto_skip: + for entry in sorted(load_auto_skip()): + print(entry) + return 0 + + if args.collect: + paths = parse_rerun_from_log(Path(args.collect)) + if args.output: + out = Path(args.output) + ensure_dirs(out, is_file=True) + out.write_text("\n".join(paths) + "\n", encoding="utf-8") + print(f"Wrote {len(paths)} entries to {out}") + else: + for p in paths: + print(p) + return 0 + examples = discover_examples(args.filter) + if args.rerun_file: + rerun_set = { + line.strip() + for line in Path(args.rerun_file).read_text(encoding="utf-8").splitlines() + if line.strip() + } + examples = [ex for ex in examples if ex.relpath in rerun_set] + if not examples: + print("Rerun list is empty; nothing to do.") + return 0 + print(f"Rerun mode: {len(examples)} example(s) from {args.rerun_file}") + return run_examples(examples, args) diff --git a/examples/tools/apply_patch.py b/examples/tools/apply_patch.py index 19d0cfb7dc..57a49755c6 100644 --- a/examples/tools/apply_patch.py +++ b/examples/tools/apply_patch.py @@ -7,6 +7,7 @@ from agents import Agent, ApplyPatchTool, ModelSettings, Runner, apply_diff, trace from agents.editor import ApplyPatchOperation, ApplyPatchResult +from examples.auto_mode import confirm_with_fallback, is_auto_mode class ApprovalTracker: @@ -89,8 +90,8 @@ def _require_approval(self, operation: ApplyPatchOperation, display_path: str) - if operation.diff: preview = operation.diff if len(operation.diff) < 400 else f"{operation.diff[:400]}…" print("- diff preview:\n", preview) - answer = input("Proceed? [y/N] ").strip().lower() - if answer not in {"y", "yes"}: + approved = confirm_with_fallback("Proceed? [y/N] ", default=is_auto_mode()) + if not approved: raise RuntimeError("Apply patch operation rejected by user.") self._approvals.remember(fingerprint) @@ -162,7 +163,7 @@ async def main(auto_approve: bool, model: str) -> None: ) parser.add_argument( "--model", - default="gpt-5.1", + default="gpt-5.2", help="Model ID to use for the agent.", ) args = parser.parse_args() diff --git a/examples/tools/shell.py b/examples/tools/shell.py index 7dcb133095..37e815178a 100644 --- a/examples/tools/shell.py +++ b/examples/tools/shell.py @@ -108,7 +108,7 @@ async def main(prompt: str, model: str) -> None: ) parser.add_argument( "--model", - default="gpt-5.1", + default="gpt-5.2", ) args = parser.parse_args() asyncio.run(main(args.prompt, args.model))