openai · seratch · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026
diff --git a/.codex/skills/examples-auto-run/SKILL.md b/.codex/skills/examples-auto-run/SKILL.md
@@ -0,0 +1,77 @@
+---
+name: examples-auto-run
+description: Run python examples in auto mode with logging, rerun helpers, and background control.
+---
+
+# examples-auto-run
+
+## What it does
+
+- Runs `uv run examples/run_examples.py` with:
+  - `EXAMPLES_INTERACTIVE_MODE=auto` (auto-input/auto-approve).
+  - Per-example logs under `.tmp/examples-start-logs/`.
+  - Main summary log path passed via `--main-log` (also under `.tmp/examples-start-logs/`).
+  - Generates a rerun list of failures at `.tmp/examples-rerun.txt` when `--write-rerun` is set.
+- Provides start/stop/status/logs/tail/collect/rerun helpers via `run.sh`.
+- Background option keeps the process running with a pidfile; `stop` cleans it up.
+
+## Usage
+
+```bash
+# Start (auto mode; interactive included by default)
+.codex/skills/examples-auto-run/scripts/run.sh start [extra args to run_examples.py]
+# Examples:
+.codex/skills/examples-auto-run/scripts/run.sh start --filter basic
+.codex/skills/examples-auto-run/scripts/run.sh start --include-server --include-audio
+
+# Check status
+.codex/skills/examples-auto-run/scripts/run.sh status
+
+# Stop running job
+.codex/skills/examples-auto-run/scripts/run.sh stop
+
+# List logs
+.codex/skills/examples-auto-run/scripts/run.sh logs
+
+# Tail latest log (or specify one)
+.codex/skills/examples-auto-run/scripts/run.sh tail
+.codex/skills/examples-auto-run/scripts/run.sh tail main_20260113-123000.log
+
+# Collect rerun list from a main log (defaults to latest main_*.log)
+.codex/skills/examples-auto-run/scripts/run.sh collect
+
+# Rerun only failed entries from rerun file (auto mode)
+.codex/skills/examples-auto-run/scripts/run.sh rerun
+```
+
+## Defaults (overridable via env)
+
+- `EXAMPLES_INTERACTIVE_MODE=auto`
+- `EXAMPLES_INCLUDE_INTERACTIVE=1`
+- `EXAMPLES_INCLUDE_SERVER=0`
+- `EXAMPLES_INCLUDE_AUDIO=0`
+- `EXAMPLES_INCLUDE_EXTERNAL=0`
+- Auto-approvals in auto mode: `APPLY_PATCH_AUTO_APPROVE=1`, `SHELL_AUTO_APPROVE=1`, `AUTO_APPROVE_MCP=1`
+
+## Log locations
+
+- Main logs: `.tmp/examples-start-logs/main_*.log`
+- Per-example logs (from `run_examples.py`): `.tmp/examples-start-logs/<module_path>.log`
+- Rerun list: `.tmp/examples-rerun.txt`
+- Stdout logs: `.tmp/examples-start-logs/stdout_*.log`
+
+## Notes
+
+- The runner delegates to `uv run examples/run_examples.py`, which already writes per-example logs and supports `--collect`, `--rerun-file`, and `--print-auto-skip`.
+- `start` uses `--write-rerun` so failures are captured automatically.
+- If `.tmp/examples-rerun.txt` exists and is non-empty, invoking the skill with no args runs `rerun` by default.
+
+## Behavioral validation (Codex/LLM responsibility)
+
+The runner does not perform any automated behavioral validation. After every foreground `start` or `rerun`, **Codex must manually validate** all exit-0 entries:
+
+1. Read the example source (and comments) to infer intended flow, tools used, and expected key outputs.
+2. Open the matching per-example log under `.tmp/examples-start-logs/`.
+3. Confirm the intended actions/results occurred; flag omissions or divergences.
+4. Do this for **all passed examples**, not just a sample.
+5. Report immediately after the run with concise citations to the exact log lines that justify the validation.
diff --git a/.codex/skills/examples-auto-run/scripts/run.sh b/.codex/skills/examples-auto-run/scripts/run.sh
@@ -0,0 +1,215 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../../.." && pwd)"
+PID_FILE="$ROOT/.tmp/examples-auto-run.pid"
+LOG_DIR="$ROOT/.tmp/examples-start-logs"
+RERUN_FILE="$ROOT/.tmp/examples-rerun.txt"
+
+ensure_dirs() {
+  mkdir -p "$LOG_DIR" "$ROOT/.tmp"
+}
+
+is_running() {
+  local pid="$1"
+  [[ -n "$pid" ]] && ps -p "$pid" >/dev/null 2>&1
+}
+
+cmd_start() {
+  ensure_dirs
+  local background=0
+  if [[ "${1:-}" == "--background" ]]; then
+    background=1
+    shift
+  fi
+
+  local ts main_log stdout_log
+  ts="$(date +%Y%m%d-%H%M%S)"
+  main_log="$LOG_DIR/main_${ts}.log"
+  stdout_log="$LOG_DIR/stdout_${ts}.log"
+
+  local run_cmd=(
+    uv run examples/run_examples.py
+    --auto-mode
+    --write-rerun
+    --main-log "$main_log"
+    --logs-dir "$LOG_DIR"
+  )
+
+  if [[ "$background" -eq 1 ]]; then
+    if [[ -f "$PID_FILE" ]]; then
+      local pid
+      pid="$(cat "$PID_FILE" 2>/dev/null || true)"
+      if is_running "$pid"; then
+        echo "examples/run_examples.py already running (pid=$pid)."
+        exit 1
+      fi
+    fi
+    (
+      trap '' HUP
+      export EXAMPLES_INTERACTIVE_MODE="${EXAMPLES_INTERACTIVE_MODE:-auto}"
+      export APPLY_PATCH_AUTO_APPROVE="${APPLY_PATCH_AUTO_APPROVE:-1}"
+      export SHELL_AUTO_APPROVE="${SHELL_AUTO_APPROVE:-1}"
+      export AUTO_APPROVE_MCP="${AUTO_APPROVE_MCP:-1}"
+      export EXAMPLES_INCLUDE_INTERACTIVE="${EXAMPLES_INCLUDE_INTERACTIVE:-1}"
+      export EXAMPLES_INCLUDE_SERVER="${EXAMPLES_INCLUDE_SERVER:-0}"
+      export EXAMPLES_INCLUDE_AUDIO="${EXAMPLES_INCLUDE_AUDIO:-0}"
+      export EXAMPLES_INCLUDE_EXTERNAL="${EXAMPLES_INCLUDE_EXTERNAL:-0}"
+      cd "$ROOT"
+      exec "${run_cmd[@]}" "$@" > >(tee "$stdout_log") 2>&1
+    ) &
+    local pid=$!
+    echo "$pid" >"$PID_FILE"
+    echo "Started run_examples.py (pid=$pid)"
+    echo "Main log: $main_log"
+    echo "Stdout log: $stdout_log"
+    echo "Run '.codex/skills/examples-auto-run/scripts/run.sh validate \"$main_log\"' after it finishes."
+    return 0
+  fi
+
+  export EXAMPLES_INTERACTIVE_MODE="${EXAMPLES_INTERACTIVE_MODE:-auto}"
+  export APPLY_PATCH_AUTO_APPROVE="${APPLY_PATCH_AUTO_APPROVE:-1}"
+  export SHELL_AUTO_APPROVE="${SHELL_AUTO_APPROVE:-1}"
+  export AUTO_APPROVE_MCP="${AUTO_APPROVE_MCP:-1}"
+  export EXAMPLES_INCLUDE_INTERACTIVE="${EXAMPLES_INCLUDE_INTERACTIVE:-1}"
+  export EXAMPLES_INCLUDE_SERVER="${EXAMPLES_INCLUDE_SERVER:-0}"
+  export EXAMPLES_INCLUDE_AUDIO="${EXAMPLES_INCLUDE_AUDIO:-0}"
+  export EXAMPLES_INCLUDE_EXTERNAL="${EXAMPLES_INCLUDE_EXTERNAL:-0}"
+  cd "$ROOT"
+  set +e
+  "${run_cmd[@]}" "$@" 2>&1 | tee "$stdout_log"
+  local run_status=${PIPESTATUS[0]}
+  set -e
+  return "$run_status"
+}
+
+cmd_stop() {
+  if [[ ! -f "$PID_FILE" ]]; then
+    echo "No pid file; nothing to stop."
+    return 0
+  fi
+  local pid
+  pid="$(cat "$PID_FILE" 2>/dev/null || true)"
+  if [[ -z "$pid" ]]; then
+    rm -f "$PID_FILE"
+    echo "Pid file empty; cleaned."
+    return 0
+  fi
+  if ! is_running "$pid"; then
+    rm -f "$PID_FILE"
+    echo "Process $pid not running; cleaned pid file."
+    return 0
+  fi
+  echo "Stopping pid $pid ..."
+  kill "$pid" 2>/dev/null || true
+  sleep 1
+  if is_running "$pid"; then
+    echo "Sending SIGKILL to $pid ..."
+    kill -9 "$pid" 2>/dev/null || true
+  fi
+  rm -f "$PID_FILE"
+  echo "Stopped."
+}
+
+cmd_status() {
+  if [[ -f "$PID_FILE" ]]; then
+    local pid
+    pid="$(cat "$PID_FILE" 2>/dev/null || true)"
+    if is_running "$pid"; then
+      echo "Running (pid=$pid)"
+      return 0
+    fi
+  fi
+  echo "Not running."
+}
+
+cmd_logs() {
+  ensure_dirs
+  ls -1t "$LOG_DIR"
+}
+
+cmd_tail() {
+  ensure_dirs
+  local file="${1:-}"
+  if [[ -z "$file" ]]; then
+    file="$(ls -1t "$LOG_DIR" | head -n1)"
+  fi
+  if [[ -z "$file" ]]; then
+    echo "No log files yet."
+    exit 1
+  fi
+  tail -f "$LOG_DIR/$file"
+}
+
+collect_rerun() {
+  ensure_dirs
+  local log_file="${1:-}"
+  if [[ -z "$log_file" ]]; then
+    log_file="$(ls -1t "$LOG_DIR"/main_*.log 2>/dev/null | head -n1)"
+  fi
+  if [[ -z "$log_file" ]] || [[ ! -f "$log_file" ]]; then
+    echo "No main log file found."
+    exit 1
+  fi
+  cd "$ROOT"
+  uv run examples/run_examples.py --collect "$log_file" --output "$RERUN_FILE"
+}
+
+cmd_rerun() {
+  ensure_dirs
+  local file="${1:-$RERUN_FILE}"
+  if [[ ! -s "$file" ]]; then
+    echo "Rerun list is empty: $file"
+    exit 0
+  fi
+  local ts main_log stdout_log
+  ts="$(date +%Y%m%d-%H%M%S)"
+  main_log="$LOG_DIR/main_${ts}.log"
+  stdout_log="$LOG_DIR/stdout_${ts}.log"
+  cd "$ROOT"
+  export EXAMPLES_INTERACTIVE_MODE="${EXAMPLES_INTERACTIVE_MODE:-auto}"
+  export APPLY_PATCH_AUTO_APPROVE="${APPLY_PATCH_AUTO_APPROVE:-1}"
+  export SHELL_AUTO_APPROVE="${SHELL_AUTO_APPROVE:-1}"
+  export AUTO_APPROVE_MCP="${AUTO_APPROVE_MCP:-1}"
+  set +e
+  uv run examples/run_examples.py --auto-mode --rerun-file "$file" --write-rerun --main-log "$main_log" --logs-dir "$LOG_DIR" 2>&1 | tee "$stdout_log"
+  local run_status=${PIPESTATUS[0]}
+  set -e
+  return "$run_status"
+}
+
+usage() {
+  cat <<'EOF'
+Usage: run.sh <start|stop|status|logs|tail|collect|rerun> [args...]
+
+Commands:
+  start [--filter ... | other args]   Run examples in auto mode (foreground). Pass --background to run detached.
+  stop                                Kill the running auto-run (if any).
+  status                              Show whether it is running.
+  logs                                List log files (.tmp/examples-start-logs).
+  tail [logfile]                      Tail the latest (or specified) log.
+  collect [main_log]                  Parse a main log and write failed examples to .tmp/examples-rerun.txt.
+  rerun [rerun_file]                  Run only the examples listed in .tmp/examples-rerun.txt.
+
+Environment overrides:
+  EXAMPLES_INTERACTIVE_MODE (default auto)
+  EXAMPLES_INCLUDE_SERVER/INTERACTIVE/AUDIO/EXTERNAL (defaults: 0/1/0/0)
+  APPLY_PATCH_AUTO_APPROVE, SHELL_AUTO_APPROVE, AUTO_APPROVE_MCP (default 1 in auto mode)
+EOF
+}
+
+default_cmd="start"
+if [[ $# -eq 0 && -s "$RERUN_FILE" ]]; then
+  default_cmd="rerun"
+fi
+
+case "${1:-$default_cmd}" in
+  start) shift || true; cmd_start "$@" ;;
+  stop) shift || true; cmd_stop ;;
+  status) shift || true; cmd_status ;;
+  logs) shift || true; cmd_logs ;;
+  tail) shift; cmd_tail "${1:-}" ;;
+  collect) shift || true; collect_rerun "${1:-}" ;;
+  rerun) shift || true; cmd_rerun "${1:-}" ;;
+  *) usage; exit 1 ;;
+esac
diff --git a/.gitignore b/.gitignore
@@ -45,6 +45,7 @@ htmlcov/
 .coverage
 .coverage.*
 .cache
+.tmp/
 nosetests.xml
 coverage.xml
 *.cover

diff --git a/examples/agent_patterns/agents_as_tools.py b/examples/agent_patterns/agents_as_tools.py
@@ -1,6 +1,7 @@
 import asyncio
 
 from agents import Agent, ItemHelpers, MessageOutputItem, Runner, trace
+from examples.auto_mode import input_with_fallback
 
 """
 This example shows the agents-as-tools pattern. The frontline agent receives a user message and
@@ -56,7 +57,10 @@
 
 
 async def main():
-    msg = input("Hi! What would you like translated, and to which languages? ")
+    msg = input_with_fallback(
+        "Hi! What would you like translated, and to which languages? ",
+        "Translate 'Hello, world!' to French and Spanish.",
+    )
 
     # Run the entire orchestration in a single trace
     with trace("Orchestrator evaluator"):

diff --git a/examples/agent_patterns/agents_as_tools_conditional.py b/examples/agent_patterns/agents_as_tools_conditional.py
@@ -3,6 +3,7 @@
 from pydantic import BaseModel
 
 from agents import Agent, AgentBase, RunContextWrapper, Runner, trace
+from examples.auto_mode import input_with_fallback
 
 """
 This example demonstrates the agents-as-tools pattern with conditional tool enabling.
@@ -81,7 +82,7 @@ async def main():
     print("2. French and Spanish (2 tools)")
     print("3. European languages (3 tools)")
 
-    choice = input("\nSelect option (1-3): ").strip()
+    choice = input_with_fallback("\nSelect option (1-3): ", "2").strip()
     preference_map = {"1": "spanish_only", "2": "french_spanish", "3": "european"}
     language_preference = preference_map.get(choice, "spanish_only")
 
@@ -95,7 +96,10 @@ async def main():
     print(f"The LLM will only see and can use these {len(available_tools)} tools\n")
 
     # Get user request
-    user_request = input("Ask a question and see responses in available languages:\n")
+    user_request = input_with_fallback(
+        "Ask a question and see responses in available languages:\n",
+        "How do you say good morning?",
+    )
 
     # Run with LLM interaction
     print("\nProcessing request...")

diff --git a/examples/agent_patterns/deterministic.py b/examples/agent_patterns/deterministic.py
@@ -3,6 +3,7 @@
 from pydantic import BaseModel
 
 from agents import Agent, Runner, trace
+from examples.auto_mode import input_with_fallback
 
 """
 This example demonstrates a deterministic flow, where each step is performed by an agent.
@@ -39,7 +40,10 @@ class OutlineCheckerOutput(BaseModel):
 
 
 async def main():
-    input_prompt = input("What kind of story do you want? ")
+    input_prompt = input_with_fallback(
+        "What kind of story do you want? ",
+        "Write a short sci-fi story.",
+    )
 
     # Ensure the entire workflow is a single trace
     with trace("Deterministic story flow"):