From b57004d95eb698c531682ac420f1d899f7fc59aa Mon Sep 17 00:00:00 2001 From: David Elner Date: Fri, 12 Dec 2025 17:34:09 -0500 Subject: [PATCH 1/6] Added: Plan for auto instrumentation --- .../milestones/01-integration-framework.md | 346 ++++++++++++++++++ .../milestones/02-openai-integration.md | 254 +++++++++++++ .../milestones/03-instrument-api.md | 140 +++++++ .../milestones/04-init-auto-instrument.md | 154 ++++++++ .../05-require-time-auto-instrument.md | 181 +++++++++ .../milestones/06-cli-wrapper.md | 220 +++++++++++ .../milestones/07-anthropic-integration.md | 192 ++++++++++ .../milestones/08-ruby-openai-integration.md | 238 ++++++++++++ .../milestones/09-ruby-llm-integration.md | 179 +++++++++ .plan/auto_instrument/plan.md | 136 +++++++ .plan/auto_instrument/ref/design.md | 264 +++++++++++++ .plan/auto_instrument/ref/future-work.md | 233 ++++++++++++ 12 files changed, 2537 insertions(+) create mode 100644 .plan/auto_instrument/milestones/01-integration-framework.md create mode 100644 .plan/auto_instrument/milestones/02-openai-integration.md create mode 100644 .plan/auto_instrument/milestones/03-instrument-api.md create mode 100644 .plan/auto_instrument/milestones/04-init-auto-instrument.md create mode 100644 .plan/auto_instrument/milestones/05-require-time-auto-instrument.md create mode 100644 .plan/auto_instrument/milestones/06-cli-wrapper.md create mode 100644 .plan/auto_instrument/milestones/07-anthropic-integration.md create mode 100644 .plan/auto_instrument/milestones/08-ruby-openai-integration.md create mode 100644 .plan/auto_instrument/milestones/09-ruby-llm-integration.md create mode 100644 .plan/auto_instrument/plan.md create mode 100644 .plan/auto_instrument/ref/design.md create mode 100644 .plan/auto_instrument/ref/future-work.md diff --git a/.plan/auto_instrument/milestones/01-integration-framework.md b/.plan/auto_instrument/milestones/01-integration-framework.md new file mode 100644 index 0000000..a0bc3b0 --- /dev/null +++ b/.plan/auto_instrument/milestones/01-integration-framework.md @@ -0,0 +1,346 @@ +# Milestone 01: Integration Framework + +## Goal + +Establish the contrib framework foundation that provides a consistent interface for all integrations. + +## What You Get + +- Consistent interface across integrations +- Scales to many libraries without code duplication +- Enables reliable auto-instrumentation in later milestones + +## Success Criteria + +- `Braintrust::Contrib::Registry` can register and look up integrations +- `Braintrust::Contrib::Integration` module defines the integration contract +- `Braintrust::Contrib::Patcher` base class handles thread-safe patching +- All base classes have tests + +## Files to Create + +### `lib/braintrust/contrib.rb` + +Entry point that loads the contrib framework: + +```ruby +# lib/braintrust/contrib.rb +require_relative "contrib/registry" +require_relative "contrib/integration" +require_relative "contrib/patcher" + +module Braintrust + module Contrib + class << self + def registry + Registry.instance + end + end + end +end + +# Load integration stubs (eager load minimal metadata) +# These will be added in subsequent milestones +``` + +### `lib/braintrust/contrib/registry.rb` + +Thread-safe singleton registry with double-checked locking: + +```ruby +# lib/braintrust/contrib/registry.rb +require "singleton" + +module Braintrust + module Contrib + class Registry + include Singleton + + def initialize + @integrations = {} + @require_path_map = nil # Lazy cache + @mutex = Mutex.new + end + + def register(integration_class) + @mutex.synchronize do + @integrations[integration_class.integration_name] = integration_class + @require_path_map = nil # Invalidate cache + end + end + + def [](name) + @integrations[name.to_sym] + end + + def all + @integrations.values + end + + def available + @integrations.values.select(&:available?) + end + + def each(&block) + @integrations.values.each(&block) + end + + # Returns integrations associated with this require path + # Thread-safe with double-checked locking for performance + def integrations_for_require_path(path) + map = @require_path_map + if map.nil? + map = @mutex.synchronize do + @require_path_map ||= build_require_path_map + end + end + basename = File.basename(path.to_s, ".rb") + map.fetch(basename, EMPTY_ARRAY) + end + + private + + EMPTY_ARRAY = [].freeze + + def build_require_path_map + map = {} + @integrations.each_value do |integration| + integration.require_paths.each do |req| + map[req] ||= [] + map[req] << integration + end + end + map.each_value(&:freeze) + map.freeze + end + end + end +end +``` + +### `lib/braintrust/contrib/integration.rb` + +Base module defining the integration contract (schema only, delegates to patcher): + +```ruby +# lib/braintrust/contrib/integration.rb +module Braintrust + module Contrib + module Integration + def self.included(base) + base.extend(ClassMethods) + end + + module ClassMethods + # Unique symbol name for this integration (e.g., :openai, :anthropic) + def integration_name + raise NotImplementedError, "#{self} must implement integration_name" + end + + # Array of gem names this integration supports + def gem_names + raise NotImplementedError, "#{self} must implement gem_names" + end + + # Require paths for auto-instrument detection (default: gem_names) + def require_paths + gem_names + end + + # Is the target library loaded? + def available? + gem_names.any? { |name| Gem.loaded_specs.key?(name) } + end + + # Minimum compatible version (optional, inclusive) + def minimum_version + nil + end + + # Maximum compatible version (optional, inclusive) + def maximum_version + nil + end + + # Is the library version compatible? + def compatible? + return false unless available? + + gem_names.each do |name| + spec = Gem.loaded_specs[name] + next unless spec + + version = spec.version + return false if minimum_version && version < Gem::Version.new(minimum_version) + return false if maximum_version && version > Gem::Version.new(maximum_version) + return true + end + false + end + + # Array of patcher classes for this integration + # Override to return multiple patchers for version-specific logic + # @return [Array] Array of patcher classes + def patcher_classes + [patcher] # Default: single patcher + end + + # Convenience method for single patcher (existing pattern) + # Override this OR patcher_classes (not both) + def patcher + raise NotImplementedError, "#{self} must implement patcher or patcher_classes" + end + + # Already patched? Returns true if ANY patcher was applied + def patched? + patcher_classes.any?(&:patched?) + end + + # Apply instrumentation (idempotent). Tries each patcher until one succeeds + def patch!(tracer_provider: nil) + return false unless available? && compatible? + + # Try each patcher (each checks applicable? internally) + patcher_classes.each do |patcher_class| + # Check if this patcher is applicable + next unless patcher_class.applicable? + + # Attempt to patch (patcher checks applicable? again under lock) + result = patcher_class.patch!(tracer_provider: tracer_provider) + return true if result # Stop on first success + end + + Braintrust::Log.debug("No applicable patcher found for #{integration_name}") + false + end + + # Register this integration with the global registry + def register! + Registry.instance.register(self) + end + end + end + end +end +``` + +### `lib/braintrust/contrib/patcher.rb` + +Base class for all patchers with thread-safe patching: + +```ruby +# lib/braintrust/contrib/patcher.rb +module Braintrust + module Contrib + # Context passed to perform_patch - extensible without breaking signatures + PatchContext = Struct.new(:tracer_provider, keyword_init: true) + + # Base class for all patchers + class Patcher + class << self + def patched? + @patched == true + end + + # Override in subclasses to check if patcher should apply + # Called after patcher loads but before perform_patch + # @return [Boolean] true if this patcher should be applied + def applicable? + true # Default: always applicable + end + + def patch!(tracer_provider: nil) + return true if patched? # Fast path + + @patch_mutex ||= Mutex.new + @patch_mutex.synchronize do + return true if patched? # Double-check under lock + + # Check if this patcher should apply + unless applicable? + Braintrust::Log.debug("Skipping #{name} - not applicable") + return false + end + + context = build_context(tracer_provider: tracer_provider) + + perform_patch(context) + @patched = true + end + Braintrust::Log.debug("Patched #{name}") + true + rescue => e + Braintrust::Log.error("Failed to patch #{name}: #{e.message}") + false + end + + def build_context(tracer_provider: nil) + PatchContext.new( + tracer_provider: tracer_provider + ) + end + + # Subclasses implement this - receives PatchContext + def perform_patch(context) + raise NotImplementedError, "#{self} must implement perform_patch" + end + end + end + end +end +``` + +## Files to Modify + +### `lib/braintrust.rb` + +Add require for contrib framework: + +```ruby +# Add after other requires +require_relative "braintrust/contrib" +``` + +## Tests to Create + +### `test/braintrust/contrib/registry_test.rb` + +- Test registration of integrations +- Test lookup by name +- Test `available` filtering +- Test `integrations_for_require_path` caching and thread-safety + +### `test/braintrust/contrib/integration_test.rb` + +- Test `available?` with mock gem specs +- Test `compatible?` with version constraints +- Test `patched?` delegation to patcher +- Test `patch!` delegation and return values +- Test `patcher_classes` with single patcher (default) +- Test `patcher_classes` with multiple patchers +- Test only applicable patcher is patched +- Test stops on first successful patch + +### `test/braintrust/contrib/patcher_test.rb` + +- Test idempotency (calling patch! twice) +- Test thread-safety (concurrent patch! calls) +- Test error handling (perform_patch raises) +- Test `applicable?` returning false skips patching +- Test `applicable?` is checked under lock +- Test logging when patcher not applicable + +## Documentation + +Add brief section to README on contrib architecture (can be expanded in later milestones). + +## Dependencies + +None - this is the foundation milestone. + +## Thread Safety Summary + +| Component | Issue | Solution | +|-----------|-------|----------| +| Registry cache | Race condition reading `@require_path_map` | Double-checked locking pattern | +| Patcher `patch!` | Race condition setting `@patched` | Mutex with double-check | diff --git a/.plan/auto_instrument/milestones/02-openai-integration.md b/.plan/auto_instrument/milestones/02-openai-integration.md new file mode 100644 index 0000000..5c8872f --- /dev/null +++ b/.plan/auto_instrument/milestones/02-openai-integration.md @@ -0,0 +1,254 @@ +# Milestone 02: OpenAI Integration + +## Goal + +First working integration as proof of concept, demonstrating the contrib framework with class-level patching. + +## What You Get + +All OpenAI clients auto-traced with `Braintrust::Contrib::OpenAI::Integration.patch!` + +```ruby +require "braintrust" +Braintrust.init + +# Explicitly patch OpenAI (auto-instrument comes in later milestones) +Braintrust::Contrib::OpenAI::Integration.patch! + +# All clients now auto-traced +client = OpenAI::Client.new +client.chat.completions.create(...) # Traced! +``` + +## Success Criteria + +- `Braintrust::Contrib::OpenAI::Integration.patch!` instruments all OpenAI clients +- Class-level patching (new instances are auto-traced) +- Idempotent (calling patch! twice doesn't double-wrap) +- Existing `.wrap(client)` API still works (backwards compatible) +- All existing OpenAI tests pass + +## Files to Create + +### `lib/braintrust/contrib/openai/integration.rb` + +Stub file with minimal metadata (eager loaded): + +```ruby +# lib/braintrust/contrib/openai/integration.rb +require_relative "../integration" + +module Braintrust + module Contrib + module OpenAI + class Integration + include Braintrust::Contrib::Integration + + def self.integration_name + :openai + end + + def self.gem_names + ["openai"] # Official openai gem only + end + + def self.require_paths + ["openai"] + end + + def self.minimum_version + "0.1.0" + end + + # Override available? to distinguish from ruby-openai gem + def self.available? + $LOADED_FEATURES.any? { |f| f.end_with?("/openai.rb") && f.include?("/openai-") } || + Gem.loaded_specs.key?("openai") + end + + # Lazy-load the patcher only when actually patching + def self.patcher + require_relative "patcher" + Patcher + end + end + end + end +end +``` + +### `lib/braintrust/contrib/openai/patcher.rb` + +Heavy file with patching logic (lazy loaded): + +```ruby +# lib/braintrust/contrib/openai/patcher.rb +require_relative "../patcher" + +module Braintrust + module Contrib + module OpenAI + class Patcher < Braintrust::Contrib::Patcher + class << self + def perform_patch(context) + patch_chat_completions + patch_responses if responses_available? + end + + private + + def patch_chat_completions + # Patch at class level - affects all future instances + ::OpenAI::Client.prepend(ChatCompletionsPatch) + end + + def patch_responses + ::OpenAI::Client.prepend(ResponsesPatch) + end + + def responses_available? + defined?(::OpenAI::Client) && + ::OpenAI::Client.instance_methods.include?(:responses) + end + end + + # Module to prepend to OpenAI::Client for chat.completions + module ChatCompletionsPatch + def chat + chat_resource = super + unless chat_resource.completions.singleton_class.ancestors.include?(CompletionsWrapper) + chat_resource.completions.singleton_class.prepend(CompletionsWrapper) + end + chat_resource + end + end + + # Module to prepend to chat.completions + module CompletionsWrapper + def create(parameters: {}) + # Tracing logic here - refactored from existing openai.rb + Braintrust::Trace.traced(name: "OpenAI Chat Completion", type: "llm") do |span| + # ... span attributes, metrics, etc. + super + end + end + + # Also wrap stream, stream_raw methods + end + + # Module for responses API (if available) + module ResponsesPatch + # Similar pattern for responses.create + end + end + end + end +end +``` + +**Note:** The actual patcher implementation will be refactored from the existing `lib/braintrust/trace/contrib/openai.rb` code. The wrapper modules will reuse the span creation, aggregation, and metrics logic already implemented. + +## Files to Modify + +### `lib/braintrust/contrib.rb` + +Add require for OpenAI integration stub: + +```ruby +# Load integration stubs (eager load minimal metadata) +require_relative "contrib/openai/integration" + +# Register the integration +Contrib::OpenAI::Integration.register! +``` + +**Note:** Registration is explicit in `contrib.rb` rather than automatic in the integration file. This allows integrations to be loaded without side effects, which is useful for testing and tooling that may want to inspect integrations without registering them globally. + +### `lib/braintrust/contrib/openai.rb` + +Add per-client `instrument!` method (the new API): + +```ruby +# lib/braintrust/contrib/openai.rb +module Braintrust + module Contrib + module OpenAI + # Instrument a specific client instance + # This is the new API; Braintrust::Trace::OpenAI.wrap is the backwards-compat alias + def self.instrument!(client) + # Same behavior as the existing wrap() function + # ... wrapping logic ... + client + end + end + end +end +``` + +### `lib/braintrust/trace/contrib/openai.rb` + +Convert to compatibility shim that delegates to new API: + +```ruby +# lib/braintrust/trace/contrib/openai.rb +# Backwards compatibility - delegates to new contrib framework + +module Braintrust + module Trace + module OpenAI + def self.wrap(client) + Braintrust::Contrib::OpenAI.instrument!(client) + end + end + end +end +``` + +### `lib/braintrust/trace.rb` + +Remove direct require of `trace/contrib/openai.rb` (it's now loaded via contrib): + +```ruby +# Remove: require_relative "trace/contrib/openai" +``` + +## Tests to Create/Update + +### `test/braintrust/contrib/openai/integration_test.rb` + +- Test `integration_name`, `gem_names`, `require_paths` +- Test `available?` correctly detects official openai gem (not ruby-openai) +- Test `available?` checks $LOADED_FEATURES for gem disambiguation +- Test `compatible?` +- Test `patch!` calls patcher + +### `test/braintrust/contrib/openai/patcher_test.rb` + +- Test class-level patching (new clients are instrumented) +- Test idempotency (patch! twice doesn't double-wrap) +- Test `applicable?` returns true for this patcher +- Test `chat.completions.create` is traced +- Test streaming methods are traced + +### `test/braintrust/trace/openai_test.rb` (existing) + +- Verify existing tests still pass +- Add tests for `.wrap()` compatibility shim + +## Documentation + +Add example usage to README showing explicit `patch!` call. + +## Dependencies + +- [01-core-infrastructure.md](01-core-infrastructure.md) must be complete + +## Notes + +### Existing Clients Not Patched + +Class-level patching only affects clients created *after* patching occurs. Clients instantiated before `patch!` is called will not be instrumented. This is documented as expected behavior. + +### Refactoring Existing Code + +The bulk of the work is refactoring existing `trace/contrib/openai.rb` logic into the new patcher structure. The tracing logic itself doesn't change - just where it lives and how it's activated. diff --git a/.plan/auto_instrument/milestones/03-instrument-api.md b/.plan/auto_instrument/milestones/03-instrument-api.md new file mode 100644 index 0000000..14804e4 --- /dev/null +++ b/.plan/auto_instrument/milestones/03-instrument-api.md @@ -0,0 +1,140 @@ +# Milestone 03: Instrument API + +## Goal + +Provide a clean public API for explicitly instrumenting specific integrations. + +## What You Get + +Clean `Braintrust::Contrib.instrument!` method for selective instrumentation: + +```ruby +require "braintrust" +Braintrust.init + +# Instrument specific integrations +Braintrust::Contrib.instrument!(:openai) + +# Or instrument all available +Braintrust::Contrib.instrument! + +# With filtering +Braintrust::Contrib.instrument!(only: [:openai, :anthropic]) +Braintrust::Contrib.instrument!(except: [:ruby_llm]) +``` + +## Success Criteria + +- `Braintrust::Contrib.instrument!` patches specified integrations +- Support for `only:` and `except:` filtering +- Returns hash of results `{ openai: true, anthropic: false, ... }` +- Idempotent (calling twice is safe) + +## Files to Modify + +### `lib/braintrust/contrib.rb` + +Add `instrument!` method: + +```ruby +# lib/braintrust/contrib.rb +require_relative "contrib/registry" +require_relative "contrib/integration" +require_relative "contrib/patcher" + +module Braintrust + module Contrib + class << self + def registry + Registry.instance + end + + # Instrument integrations + # + # @param integrations [Array] specific integrations to instrument (optional) + # @param only [Array] whitelist of integrations + # @param except [Array] blacklist of integrations + # @return [Hash] results per integration + # + # @example Instrument all available + # Braintrust::Contrib.instrument! + # + # @example Instrument specific integrations + # Braintrust::Contrib.instrument!(:openai, :anthropic) + # + # @example With filtering + # Braintrust::Contrib.instrument!(only: [:openai]) + # Braintrust::Contrib.instrument!(except: [:ruby_llm]) + # + def instrument!(*integrations, only: nil, except: nil) + # If specific integrations provided, use those + if integrations.any? + targets = integrations.map { |name| registry[name] }.compact + else + targets = registry.available + end + + # Apply filters + if only + only_syms = Array(only).map(&:to_sym) + targets = targets.select { |i| only_syms.include?(i.integration_name) } + end + + if except + except_syms = Array(except).map(&:to_sym) + targets = targets.reject { |i| except_syms.include?(i.integration_name) } + end + + # Patch each and collect results + results = {} + targets.each do |integration| + results[integration.integration_name] = integration.patch! + end + results + end + end + end +end + +# Load integration stubs +require_relative "contrib/openai/integration" +``` + +### `lib/braintrust/contrib/registry.rb` + +Add `instrument!` method to registry (delegates to module method but available on registry too): + +```ruby +# Add to Registry class +def instrument!(only: nil, except: nil) + targets = available + targets = targets.select { |i| only.include?(i.integration_name) } if only + targets = targets.reject { |i| except.include?(i.integration_name) } if except + + results = {} + targets.each do |integration| + results[integration.integration_name] = integration.patch! + end + results +end +``` + +## Tests to Create + +### `test/braintrust/contrib_test.rb` + +- Test `instrument!` with no arguments (all available) +- Test `instrument!(:openai)` (specific integration) +- Test `instrument!(only: [...])` filtering +- Test `instrument!(except: [...])` filtering +- Test return value hash +- Test idempotency + +## Documentation + +Update README with `instrument!` API examples. + +## Dependencies + +- [01-core-infrastructure.md](01-core-infrastructure.md) must be complete +- [02-openai-integration.md](02-openai-integration.md) must be complete (for testing) diff --git a/.plan/auto_instrument/milestones/04-init-auto-instrument.md b/.plan/auto_instrument/milestones/04-init-auto-instrument.md new file mode 100644 index 0000000..b4301d4 --- /dev/null +++ b/.plan/auto_instrument/milestones/04-init-auto-instrument.md @@ -0,0 +1,154 @@ +# Milestone 04: Init Auto-Instrument + +## Goal + +Integrate auto-instrumentation into the `init()` call, enabled by default. + +## What You Get + +`Braintrust.init` auto-instruments all available integrations (zero-config): + +```ruby +require "braintrust" +Braintrust.init # Auto-instruments everything! + +client = OpenAI::Client.new +client.chat.completions.create(...) # Traced! +``` + +With opt-out and selective options: + +```ruby +# Opt-out completely +Braintrust.init(auto_instrument: false) + +# Only specific integrations +Braintrust.init(auto_instrument: { only: [:openai, :anthropic] }) + +# All except some +Braintrust.init(auto_instrument: { except: [:ruby_llm] }) +``` + +## Success Criteria + +- `Braintrust.init` auto-instruments by default +- `auto_instrument: false` disables auto-instrumentation +- `auto_instrument: { only: [...] }` enables only specified integrations +- `auto_instrument: { except: [...] }` excludes specified integrations +- Multiple `init()` calls don't duplicate instrumentation (idempotent) +- Environment variable `BRAINTRUST_AUTO_INSTRUMENT` enables/disables auto-instrumentation +- Environment variables `BRAINTRUST_INSTRUMENT_ONLY` and `BRAINTRUST_INSTRUMENT_EXCEPT` work + +## Files to Modify + +### `lib/braintrust.rb` (or `lib/braintrust/braintrust.rb`) + +Add `auto_instrument` parameter to `init()`: + +```ruby +module Braintrust + class << self + # @param auto_instrument [Boolean, Hash, nil] Auto-instrumentation config + # - nil (default): use BRAINTRUST_AUTO_INSTRUMENT env var, or enable if not set + # - true: explicitly enable (overrides BRAINTRUST_AUTO_INSTRUMENT=false) + # - false: explicitly disable (overrides BRAINTRUST_AUTO_INSTRUMENT=true) + # - Hash with :only or :except keys for filtering + def init( + api_key: nil, + org_name: nil, + project: nil, + auto_instrument: nil, + **options + ) + # ... existing init logic ... + + # Auto-instrument based on parameter + perform_auto_instrument(auto_instrument) + + # ... rest of init ... + end + + private + + def perform_auto_instrument(config) + # Determine if auto-instrumentation should run + should_instrument = case config + when nil + # Not explicitly configured - check env var (default to true) + ENV["BRAINTRUST_AUTO_INSTRUMENT"] != "false" + when false + # Explicitly disabled in code + false + when true, Hash + # Explicitly enabled in code + true + end + + return unless should_instrument + + # Parse filter environment variable overrides + only = parse_env_list("BRAINTRUST_INSTRUMENT_ONLY") + except = parse_env_list("BRAINTRUST_INSTRUMENT_EXCEPT") + + # Apply configuration + if config.is_a?(Hash) + only = config[:only] || only + except = config[:except] || except + end + + Braintrust::Contrib.instrument!(only: only, except: except) + end + + def parse_env_list(key) + value = ENV[key] + return nil unless value + value.split(",").map(&:strip).map(&:to_sym) + end + end +end +``` + +## Environment Variables + +| Variable | Description | Example | +|----------|-------------|---------| +| `BRAINTRUST_AUTO_INSTRUMENT` | Enable/disable auto-instrumentation (only used if not explicitly configured in code) | `false` | +| `BRAINTRUST_INSTRUMENT_ONLY` | Comma-separated whitelist | `openai,anthropic` | +| `BRAINTRUST_INSTRUMENT_EXCEPT` | Comma-separated blacklist | `ruby_llm` | + +**Precedence rules:** +- Explicit code configuration always takes precedence over `BRAINTRUST_AUTO_INSTRUMENT` +- Programmatic filter options (`only`/`except`) override environment variables: + - If `only` specified in code, it overrides `BRAINTRUST_INSTRUMENT_ONLY` + - If `except` specified in code, it overrides `BRAINTRUST_INSTRUMENT_EXCEPT` + - Environment variables only apply when not specified in code + +## Tests to Create + +### `test/braintrust/init_auto_instrument_test.rb` + +- Test `init()` auto-instruments by default (when `BRAINTRUST_AUTO_INSTRUMENT` not set) +- Test `init()` respects `BRAINTRUST_AUTO_INSTRUMENT=false` (skips instrumentation) +- Test `init()` ignores `BRAINTRUST_AUTO_INSTRUMENT=true` (already default behavior) +- Test `init(auto_instrument: false)` skips instrumentation even if `BRAINTRUST_AUTO_INSTRUMENT=true` +- Test `init(auto_instrument: true)` instruments even if `BRAINTRUST_AUTO_INSTRUMENT=false` +- Test `init(auto_instrument: { only: [:openai] })` instruments even if `BRAINTRUST_AUTO_INSTRUMENT=false` +- Test `init(auto_instrument: { only: [:openai] })` only instruments specified +- Test `init(auto_instrument: { except: [:ruby_llm] })` excludes specified +- Test idempotency (multiple init calls) +- Test environment variable `BRAINTRUST_INSTRUMENT_ONLY` +- Test environment variable `BRAINTRUST_INSTRUMENT_EXCEPT` +- Test env + programmatic combination + +## Documentation + +Update README: +- Update "Getting Started" to show zero-config usage +- Add section on `auto_instrument` parameter options +- Document environment variables + +## Dependencies + +- [01-core-infrastructure.md](01-core-infrastructure.md) must be complete +- [02-openai-integration.md](02-openai-integration.md) must be complete +- [03-instrument-api.md](03-instrument-api.md) must be complete diff --git a/.plan/auto_instrument/milestones/05-require-time-auto-instrument.md b/.plan/auto_instrument/milestones/05-require-time-auto-instrument.md new file mode 100644 index 0000000..4426c13 --- /dev/null +++ b/.plan/auto_instrument/milestones/05-require-time-auto-instrument.md @@ -0,0 +1,181 @@ +# Milestone 05: Require-time Auto-Instrument + +## Goal + +Enable instrumentation via `require` without explicit `init()` call, supporting Bundler and Rails workflows. + +## What You Get + +Works via Gemfile or Rails initializer without calling `init()`: + +```ruby +# Gemfile - order doesn't matter! +gem "braintrust", require: "braintrust/contrib/auto_instrument" +gem "openai" +``` + +```ruby +# Or Rails initializer (config/initializers/braintrust.rb) +require "braintrust/contrib/auto_instrument" +``` + +Libraries loaded after the require are automatically instrumented. + +## Success Criteria + +- `require "braintrust/contrib/auto_instrument"` sets up instrumentation +- Works with Bundler (gem load order doesn't matter) +- Works with Rails (`after_initialize` hook) +- Works with plain Ruby scripts +- Idempotent setup (multiple requires are safe) +- Thread-safe with reentrancy guard + +## Files to Create + +### `lib/braintrust/contrib/auto_instrument.rb` + +```ruby +# lib/braintrust/contrib/auto_instrument.rb +require "braintrust" + +module Braintrust + module Contrib + module AutoInstrument + class << self + def setup! + return if @setup_complete + + # Initialize Braintrust from environment variables + # Silent failure if API key not set - spans just won't export + Braintrust.init rescue nil + + # Patch integrations that are already loaded + patch_available_integrations! + + # Set up deferred patching for libraries loaded later + if rails_environment? + setup_rails_hook! + else + setup_require_hook! + end + + @setup_complete = true + end + + def patch_available_integrations! + Braintrust::Contrib.instrument!( + only: parse_env_list("BRAINTRUST_INSTRUMENT_ONLY"), + except: parse_env_list("BRAINTRUST_INSTRUMENT_EXCEPT") + ) + end + + private + + def rails_environment? + defined?(Rails) && Rails.respond_to?(:application) && Rails.application + end + + def setup_rails_hook! + # Rails after_initialize runs immediately if already initialized + Rails.application.config.after_initialize do + Braintrust::Contrib::AutoInstrument.patch_available_integrations! + end + end + + def setup_require_hook! + original_require = Kernel.method(:require) + registry = Registry.instance + + Kernel.define_method(:require) do |path| + # Call original require first + result = original_require.call(path) + + # Thread-local reentrancy guard + unless Thread.current[:braintrust_in_require_hook] + begin + Thread.current[:braintrust_in_require_hook] = true + + # Filter and patch eligible integrations + registry.integrations_for_require_path(path).each do |integration| + next unless integration.available? && integration.compatible? + integration.patch! + end + rescue => e + Braintrust::Log.debug("Auto-instrument hook error: #{e.message}") + ensure + Thread.current[:braintrust_in_require_hook] = false + end + end + + result + end + end + + def parse_env_list(key) + value = ENV[key] + return nil unless value + value.split(",").map(&:strip).map(&:to_sym) + end + end + end + end +end + +# Auto-setup when required +Braintrust::Contrib::AutoInstrument.setup! +``` + +## Design Notes + +### Why Require Hook? + +The require hook catches libraries loaded after Braintrust, regardless of: +- Bundler gem ordering +- Dynamic requires +- Lazy loading + +### Why Rails Hook? + +For Rails, `after_initialize` is cleaner than the require hook because: +- All gems are already loaded +- No need to intercept requires +- Runs at a well-defined point in the boot process + +### Thread Safety + +| Component | Issue | Solution | +|-----------|-------|----------| +| Require hook | Reentrancy if patching triggers requires | Thread-local guard | +| Registry cache | Concurrent access | Double-checked locking (from Milestone 01) | +| Patcher | Concurrent patch calls | Mutex (from Milestone 01) | + +### `init()` Relationship + +- `auto_instrument.rb` calls `Braintrust.init` internally +- Provides true "zero-config" - just set `BRAINTRUST_API_KEY` +- If user calls `init()` explicitly, it's idempotent + +## Tests to Create + +### `test/braintrust/contrib/auto_instrument_test.rb` + +- Test `setup!` patches available integrations +- Test `setup!` is idempotent +- Test require hook triggers patching +- Test reentrancy guard prevents infinite loops +- Test Rails hook (mock Rails environment) +- Test environment variable filtering + +## Documentation + +Update README: +- Add "Bundler Setup" section with Gemfile example +- Add "Rails Setup" section with initializer example +- Explain that gem order doesn't matter + +## Dependencies + +- [01-core-infrastructure.md](01-core-infrastructure.md) must be complete +- [02-openai-integration.md](02-openai-integration.md) must be complete +- [03-instrument-api.md](03-instrument-api.md) must be complete +- [04-init-auto-instrument.md](04-init-auto-instrument.md) must be complete diff --git a/.plan/auto_instrument/milestones/06-cli-wrapper.md b/.plan/auto_instrument/milestones/06-cli-wrapper.md new file mode 100644 index 0000000..a6e8fa0 --- /dev/null +++ b/.plan/auto_instrument/milestones/06-cli-wrapper.md @@ -0,0 +1,220 @@ +# Milestone 06: CLI Wrapper + +## Goal + +Zero-code instrumentation via command line wrapper. + +## What You Get + +Instrument any Ruby application without code changes: + +```bash +# Basic usage +braintrust exec -- ruby app.rb + +# With Rails +braintrust exec -- bundle exec rails server + +# With filtering +braintrust exec --only openai,anthropic -- ruby app.rb +braintrust exec --except ruby_llm -- ruby app.rb +``` + +## Success Criteria + +- `braintrust exec -- COMMAND` instruments the application +- `--only` flag filters to specific integrations +- `--except` flag excludes specific integrations +- Works with any Ruby command (ruby, bundle exec, rails, rake, etc.) +- Preserves existing `RUBYOPT` settings + +## Files to Create + +### `exe/braintrust` + +```ruby +#!/usr/bin/env ruby +# exe/braintrust + +require "optparse" + +module Braintrust + module CLI + class << self + def run(args) + command = parse_args(args) + case command + when :exec + exec_command + when :help + print_help + else + print_help + exit 1 + end + end + + private + + def parse_args(args) + @options = {} + @remaining_args = [] + + parser = OptionParser.new do |opts| + opts.banner = "Usage: braintrust [options]" + + opts.separator "" + opts.separator "Commands:" + opts.separator " exec Run a command with auto-instrumentation" + opts.separator "" + opts.separator "Options:" + + opts.on("--only INTEGRATIONS", "Only instrument these (comma-separated)") do |v| + @options[:only] = v + end + + opts.on("--except INTEGRATIONS", "Skip these integrations (comma-separated)") do |v| + @options[:except] = v + end + + opts.on("-h", "--help", "Show this help") do + @options[:help] = true + end + + opts.on("-v", "--version", "Show version") do + require "braintrust/version" + puts "braintrust #{Braintrust::VERSION}" + exit 0 + end + end + + # Parse up to "--" separator + separator_index = args.index("--") + if separator_index + to_parse = args[0...separator_index] + @remaining_args = args[(separator_index + 1)..] + else + to_parse = args + end + + parser.parse!(to_parse) + + return :help if @options[:help] || to_parse.empty? + return to_parse.first.to_sym + rescue OptionParser::InvalidOption => e + puts e.message + print_help + exit 1 + end + + def exec_command + if @remaining_args.empty? + puts "Error: No command specified after --" + puts "Usage: braintrust exec [options] -- COMMAND" + exit 1 + end + + # Set environment variables for auto_instrument + ENV["BRAINTRUST_INSTRUMENT_ONLY"] = @options[:only] if @options[:only] + ENV["BRAINTRUST_INSTRUMENT_EXCEPT"] = @options[:except] if @options[:except] + + # Inject auto-instrument via RUBYOPT + rubyopt = ENV["RUBYOPT"] || "" + ENV["RUBYOPT"] = "#{rubyopt} -rbraintrust/contrib/auto_instrument".strip + + # Execute the command (replaces current process) + exec(*@remaining_args) + end + + def print_help + puts <<~HELP + Braintrust CLI - Auto-instrument Ruby applications + + Usage: + braintrust exec [options] -- COMMAND + + Commands: + exec Run a command with auto-instrumentation enabled + + Options: + --only INTEGRATIONS Only instrument these (comma-separated) + --except INTEGRATIONS Skip these integrations (comma-separated) + -h, --help Show this help + -v, --version Show version + + Examples: + braintrust exec -- ruby app.rb + braintrust exec -- bundle exec rails server + braintrust exec --only openai -- ruby app.rb + braintrust exec --except ruby_llm -- bundle exec rake + + Environment Variables: + BRAINTRUST_API_KEY API key for Braintrust + BRAINTRUST_INSTRUMENT_ONLY Comma-separated whitelist + BRAINTRUST_INSTRUMENT_EXCEPT Comma-separated blacklist + HELP + end + end + end +end + +Braintrust::CLI.run(ARGV) +``` + +## Files to Modify + +### `braintrust.gemspec` + +Add executable: + +```ruby +Gem::Specification.new do |spec| + # ... existing config ... + + spec.executables = ["braintrust"] + + # ... rest of config ... +end +``` + +## How It Works + +1. Parse command-line options (`--only`, `--except`) +2. Set environment variables for filtering +3. Inject `-rbraintrust/contrib/auto_instrument` into `RUBYOPT` +4. `exec` the user's command (replaces current process) +5. When Ruby starts, it loads `auto_instrument.rb` before the app +6. Auto-instrument sets up require hooks and patches available libraries + +## Tests to Create + +### `test/braintrust/cli_test.rb` + +- Test option parsing (`--only`, `--except`) +- Test RUBYOPT injection +- Test environment variable passthrough +- Test error handling (no command specified) +- Test `--help` and `--version` + +### Integration test + +- Actually run `braintrust exec -- ruby -e "..."` and verify instrumentation works + +## Documentation + +Update README: +- Add "CLI Usage" section +- Show examples for common scenarios (Rails, plain Ruby, etc.) + +## Potential Challenges + +| Challenge | Mitigation | +|-----------|------------| +| Existing RUBYOPT conflicts | Append to existing RUBYOPT, don't replace | +| Cross-platform issues | Test on Linux, macOS, Windows | +| Bundler with `--path` | May need to ensure gem is in load path | + +## Dependencies + +- [01-core-infrastructure.md](01-core-infrastructure.md) must be complete +- [05-require-time-auto-instrument.md](05-require-time-auto-instrument.md) must be complete diff --git a/.plan/auto_instrument/milestones/07-anthropic-integration.md b/.plan/auto_instrument/milestones/07-anthropic-integration.md new file mode 100644 index 0000000..214501d --- /dev/null +++ b/.plan/auto_instrument/milestones/07-anthropic-integration.md @@ -0,0 +1,192 @@ +# Milestone 07: Anthropic Integration + +## Goal + +Port the Anthropic integration to the new contrib framework. + +## What You Get + +All Anthropic clients auto-traced: + +```ruby +require "braintrust" +Braintrust.init + +client = Anthropic::Client.new +client.messages.create(...) # Traced! +``` + +## Success Criteria + +- `Braintrust::Contrib::Anthropic::Integration.patch!` instruments all Anthropic clients +- Class-level patching (new instances are auto-traced) +- Existing `.wrap(client)` API still works (backwards compatible) +- All existing Anthropic tests pass + +## Files to Create + +### `lib/braintrust/contrib/anthropic/integration.rb` + +```ruby +# lib/braintrust/contrib/anthropic/integration.rb +require_relative "../integration" + +module Braintrust + module Contrib + module Anthropic + class Integration + include Braintrust::Contrib::Integration + + def self.integration_name + :anthropic + end + + def self.gem_names + ["anthropic"] + end + + def self.require_paths + ["anthropic"] + end + + def self.minimum_version + "0.1.0" + end + + def self.patcher + require_relative "patcher" + Patcher + end + end + end + end +end +``` + +### `lib/braintrust/contrib/anthropic/patcher.rb` + +```ruby +# lib/braintrust/contrib/anthropic/patcher.rb +require_relative "../patcher" + +module Braintrust + module Contrib + module Anthropic + class Patcher < Braintrust::Contrib::Patcher + class << self + def perform_patch(context) + patch_messages + end + + private + + def patch_messages + ::Anthropic::Client.prepend(MessagesPatch) + end + end + + module MessagesPatch + def messages + messages_resource = super + unless messages_resource.singleton_class.ancestors.include?(MessagesWrapper) + messages_resource.singleton_class.prepend(MessagesWrapper) + end + messages_resource + end + end + + module MessagesWrapper + def create(parameters: {}) + Braintrust::Trace.traced(name: "Anthropic Messages", type: "llm") do |span| + # Tracing logic refactored from existing anthropic.rb + super + end + end + + # Also wrap streaming methods + end + end + end + end +end +``` + +## Files to Modify + +### `lib/braintrust/contrib.rb` + +Add require for Anthropic integration stub and register it: + +```ruby +require_relative "contrib/anthropic/integration" + +# Register the integration +Contrib::Anthropic::Integration.register! +``` + +**Note:** Registration is explicit in `contrib.rb` rather than automatic in the integration file, following the pattern established in Milestone 02. + +### `lib/braintrust/contrib/anthropic.rb` + +Add per-client `instrument!` method (the new API): + +```ruby +# lib/braintrust/contrib/anthropic.rb +module Braintrust + module Contrib + module Anthropic + # Instrument a specific client instance + # This is the new API; Braintrust::Trace::Anthropic.wrap is the backwards-compat alias + def self.instrument!(client) + # Same behavior as the existing wrap() function + # ... wrapping logic ... + client + end + end + end +end +``` + +### `lib/braintrust/trace/contrib/anthropic.rb` + +Convert to compatibility shim that delegates to new API: + +```ruby +# lib/braintrust/trace/contrib/anthropic.rb +# Backwards compatibility - delegates to new contrib framework + +module Braintrust + module Trace + module Anthropic + def self.wrap(client) + Braintrust::Contrib::Anthropic.instrument!(client) + end + end + end +end +``` + +## Tests to Create + +### `test/braintrust/contrib/anthropic/integration_test.rb` + +- Test `integration_name`, `gem_names`, `require_paths` +- Test `available?` and `compatible?` +- Test `patch!` calls patcher + +### `test/braintrust/contrib/anthropic/patcher_test.rb` + +- Test class-level patching (new clients are instrumented) +- Test idempotency (patch! twice doesn't double-wrap) +- Test `applicable?` returns true for this patcher +- Test `messages.create` is traced +- Test streaming methods are traced + +## Documentation + +Update README to show Anthropic in list of supported libraries. + +## Dependencies + +- [01-core-infrastructure.md](01-core-infrastructure.md) must be complete +- [02-openai-integration.md](02-openai-integration.md) recommended (establishes patterns) diff --git a/.plan/auto_instrument/milestones/08-ruby-openai-integration.md b/.plan/auto_instrument/milestones/08-ruby-openai-integration.md new file mode 100644 index 0000000..aeb3d6b --- /dev/null +++ b/.plan/auto_instrument/milestones/08-ruby-openai-integration.md @@ -0,0 +1,238 @@ +# Milestone 08: Ruby-OpenAI Integration + +## Goal + +Port the ruby-openai (alexrudall/ruby-openai) integration to the new contrib framework. + +## What You Get + +All ruby-openai clients auto-traced: + +```ruby +require "braintrust" +Braintrust.init + +# Using alexrudall/ruby-openai gem +client = OpenAI::Client.new(access_token: "...") +client.chat(parameters: { ... }) # Traced! +``` + +## Success Criteria + +- `Braintrust::Contrib::RubyOpenai::Integration.patch!` instruments ruby-openai clients +- Handles namespace collision with official `openai` gem +- Class-level patching (new instances are auto-traced) +- Existing `.wrap(client)` API still works +- All existing ruby-openai tests pass + +## Important: Gem Disambiguation + +Both the official `openai` gem and `ruby-openai` gem use the `OpenAI` namespace and the same require path (`"openai"`). Both gems can be installed simultaneously, but only one's code can be loaded. + +### Detection Strategy + +Check `$LOADED_FEATURES` to determine which gem's code was actually loaded (both can be in `Gem.loaded_specs`, but only one loads code): + +```ruby +def self.gem_names + ["ruby-openai"] +end + +def self.require_paths + ["openai"] # Same as official gem +end + +# Override available? to check which gem's code is actually loaded +def self.available? + $LOADED_FEATURES.any? { |f| f.end_with?("/openai.rb") && f.include?("ruby-openai") } || + Gem.loaded_specs.key?("ruby-openai") +end +``` + +## Files to Create + +### `lib/braintrust/contrib/ruby_openai/integration.rb` + +```ruby +# lib/braintrust/contrib/ruby_openai/integration.rb +require_relative "../integration" + +module Braintrust + module Contrib + module RubyOpenai + class Integration + include Braintrust::Contrib::Integration + + def self.integration_name + :ruby_openai + end + + def self.gem_names + ["ruby-openai"] + end + + def self.require_paths + ["openai"] # Same require path as official gem + end + + def self.minimum_version + "3.0.0" + end + + # Override available? to check which gem's code is actually loaded + def self.available? + $LOADED_FEATURES.any? { |f| f.end_with?("/openai.rb") && f.include?("ruby-openai") } || + Gem.loaded_specs.key?("ruby-openai") + end + + def self.patcher + require_relative "patcher" + Patcher + end + end + end + end +end +``` + +### `lib/braintrust/contrib/ruby_openai/patcher.rb` + +```ruby +# lib/braintrust/contrib/ruby_openai/patcher.rb +require_relative "../patcher" + +module Braintrust + module Contrib + module RubyOpenai + class Patcher < Braintrust::Contrib::Patcher + class << self + def perform_patch(context) + patch_chat + end + + private + + def patch_chat + ::OpenAI::Client.prepend(ChatPatch) + end + end + + module ChatPatch + def chat(parameters: {}) + Braintrust::Trace.traced(name: "OpenAI Chat", type: "llm") do |span| + # Tracing logic refactored from existing code + super + end + end + end + end + end + end +end +``` + +## Files to Modify + +### `lib/braintrust/contrib.rb` + +Add require for ruby-openai integration stub and register it: + +```ruby +require_relative "contrib/ruby_openai/integration" + +# Register the integration +Contrib::RubyOpenai::Integration.register! +``` + +**Note:** Registration is explicit in `contrib.rb` rather than automatic in the integration file, following the pattern established in Milestone 02. + +### `lib/braintrust/contrib/ruby_openai.rb` + +Add per-client `instrument!` method (the new API): + +```ruby +# lib/braintrust/contrib/ruby_openai.rb +module Braintrust + module Contrib + module RubyOpenai + # Instrument a specific client instance + # This is the new API; Braintrust::Trace::OpenAI.wrap is the backwards-compat alias + def self.instrument!(client) + # Same behavior as the existing wrap() function + # ... wrapping logic ... + client + end + end + end +end +``` + +### `lib/braintrust/trace/contrib/openai.rb` + +The existing shim needs to detect which gem is loaded and delegate appropriately: + +```ruby +# lib/braintrust/trace/contrib/openai.rb +# Backwards compatibility - delegates to new contrib framework + +module Braintrust + module Trace + module OpenAI + def self.wrap(client) + # Detect which gem is loaded and delegate to appropriate integration + if Gem.loaded_specs.key?("ruby-openai") + Braintrust::Contrib::RubyOpenai.instrument!(client) + else + Braintrust::Contrib::OpenAI.instrument!(client) + end + end + end + end +end +``` + +## Tests to Create + +### `test/braintrust/contrib/ruby_openai/integration_test.rb` + +- Test `integration_name`, `gem_names`, `require_paths` +- Test `available?` correctly detects ruby-openai gem (not official openai) +- Test `available?` checks $LOADED_FEATURES for gem disambiguation +- Test `available?` returns false when only official openai gem is loaded +- Test `compatible?` +- Test `patch!` calls patcher + +### `test/braintrust/contrib/ruby_openai/patcher_test.rb` + +- Test class-level patching (new clients are instrumented) +- Test idempotency (patch! twice doesn't double-wrap) +- Test `applicable?` returns true for this patcher +- Test `chat` method is traced + +## Documentation + +Update README to clarify: +- Difference between `openai` and `ruby-openai` gems +- Both are supported +- Auto-detection handles the right one + +## Dependencies + +- [01-core-infrastructure.md](01-core-infrastructure.md) must be complete +- [02-openai-integration.md](02-openai-integration.md) must be complete (for disambiguation) + +## Optional: Shared Utilities Refactoring + +**Consider** performing the shared utilities refactoring described in [ref/future-work.md](../ref/future-work.md#shared-utilities-refactoring) during this milestone. + +**Why this milestone?** By this point, you'll have: +- Multiple integrations (official openai, ruby-openai, possibly anthropic) +- Better understanding of what utilities are truly shared vs vendor-specific +- Real patterns emerged from implementation + +**What to refactor:** +- Move token parsing from `lib/braintrust/trace/tokens.rb` to `lib/braintrust/contrib/support/openai.rb` and `support/anthropic.rb` +- Extract shared utilities from patcher files to appropriate `support/` files +- Add backward compatibility layer in old locations + +**Decision point:** If this milestone feels too large with the refactoring, defer it to a dedicated "cleanup" milestone after all initial integrations are ported. diff --git a/.plan/auto_instrument/milestones/09-ruby-llm-integration.md b/.plan/auto_instrument/milestones/09-ruby-llm-integration.md new file mode 100644 index 0000000..d9b7b6f --- /dev/null +++ b/.plan/auto_instrument/milestones/09-ruby-llm-integration.md @@ -0,0 +1,179 @@ +# Milestone 09: RubyLLM Integration + +## Goal + +Port the RubyLLM integration to the new contrib framework. + +## What You Get + +All RubyLLM interactions auto-traced: + +```ruby +require "braintrust" +Braintrust.init + +# Using RubyLLM +chat = RubyLLM.chat(model: "gpt-4") +chat.ask("Hello!") # Traced! +``` + +## Success Criteria + +- `Braintrust::Contrib::RubyLLM::Integration.patch!` instruments RubyLLM +- Class-level patching +- Existing behavior preserved +- All existing RubyLLM tests pass + +## Files to Create + +### `lib/braintrust/contrib/ruby_llm/integration.rb` + +```ruby +# lib/braintrust/contrib/ruby_llm/integration.rb +require_relative "../integration" + +module Braintrust + module Contrib + module RubyLLM + class Integration + include Braintrust::Contrib::Integration + + def self.integration_name + :ruby_llm + end + + def self.gem_names + ["ruby_llm"] + end + + def self.require_paths + ["ruby_llm"] + end + + def self.minimum_version + "1.0.0" + end + + def self.patcher + require_relative "patcher" + Patcher + end + end + end + end +end +``` + +### `lib/braintrust/contrib/ruby_llm/patcher.rb` + +```ruby +# lib/braintrust/contrib/ruby_llm/patcher.rb +require_relative "../patcher" + +module Braintrust + module Contrib + module RubyLLM + class Patcher < Braintrust::Contrib::Patcher + class << self + def perform_patch(context) + patch_chat + end + + private + + def patch_chat + ::RubyLLM::Chat.prepend(ChatPatch) + end + end + + module ChatPatch + def ask(message, **options) + Braintrust::Trace.traced(name: "RubyLLM Chat", type: "llm") do |span| + # Tracing logic refactored from existing ruby_llm.rb + super + end + end + end + end + end + end +end +``` + +## Files to Modify + +### `lib/braintrust/contrib.rb` + +Add require for RubyLLM integration stub and register it: + +```ruby +require_relative "contrib/ruby_llm/integration" + +# Register the integration +Contrib::RubyLLM::Integration.register! +``` + +**Note:** Registration is explicit in `contrib.rb` rather than automatic in the integration file, following the pattern established in Milestone 02. + +### `lib/braintrust/contrib/ruby_llm.rb` + +Add per-client `instrument!` method (the new API): + +```ruby +# lib/braintrust/contrib/ruby_llm.rb +module Braintrust + module Contrib + module RubyLLM + # Instrument a specific client instance + # This is the new API; Braintrust::Trace::RubyLLM.wrap is the backwards-compat alias + def self.instrument!(client) + # Same behavior as the existing wrap() function + # ... wrapping logic ... + client + end + end + end +end +``` + +### `lib/braintrust/trace/contrib/ruby_llm.rb` + +Convert to compatibility shim that delegates to new API: + +```ruby +# lib/braintrust/trace/contrib/ruby_llm.rb +# Backwards compatibility - delegates to new contrib framework + +module Braintrust + module Trace + module RubyLLM + def self.wrap(client) + Braintrust::Contrib::RubyLLM.instrument!(client) + end + end + end +end +``` + +## Tests to Create + +### `test/braintrust/contrib/ruby_llm/integration_test.rb` + +- Test `integration_name`, `gem_names`, `require_paths` +- Test `available?` and `compatible?` +- Test `patch!` calls patcher + +### `test/braintrust/contrib/ruby_llm/patcher_test.rb` + +- Test class-level patching (new instances are instrumented) +- Test idempotency (patch! twice doesn't double-wrap) +- Test `applicable?` returns true for this patcher +- Test `ask` method is traced + +## Documentation + +Update README to show RubyLLM in list of supported libraries. + +## Dependencies + +- [01-core-infrastructure.md](01-core-infrastructure.md) must be complete diff --git a/.plan/auto_instrument/plan.md b/.plan/auto_instrument/plan.md new file mode 100644 index 0000000..494f62c --- /dev/null +++ b/.plan/auto_instrument/plan.md @@ -0,0 +1,136 @@ +# Braintrust Ruby SDK: Auto-Instrumentation + +Make it fast and easy to get your application instrumented. + +## The Problem + +Today, instrumenting LLM libraries requires manual setup per client: + +```ruby +require "braintrust" +Braintrust.init + +client = OpenAI::Client.new +Braintrust::Trace::OpenAI.wrap(client) # Must wrap every client instance + +client2 = Anthropic::Client.new +Braintrust::Trace::Anthropic.wrap(client2) # And again for each library... +``` + +This is verbose, error-prone, and easy to forget. + +## The Vision + +**It just works.** One line, all libraries instrumented: + +```ruby +require "braintrust" +Braintrust.init # That's it. All OpenAI/Anthropic/etc clients auto-traced. +``` + +Or even zero lines with CLI: +```bash +braintrust exec -- ruby app.rb +``` + +## Benefits + +| Benefit | Description | +| --------------- | ------------------------------------------------ | +| **Zero-config** | Works out of the box with sensible defaults | +| **Safe** | Fails gracefully; never breaks user applications | +| **Flexible** | Opt-out or selective instrumentation when needed | +| **CLI support** | Instrument without any code changes | + +## User Experience + +Not all applications and environments are the same. We provide several ways to instrument applications, ordered from most automatic to most customizable. + +### 1. Zero Code + +**Best for:** Instrumenting any Ruby application without modifying its code. + +```bash +braintrust exec -- ruby app.rb +braintrust exec -- bundle exec rails s +``` + +### 2. Zero Config + +**Best for:** Instrumenting specific Ruby applications with smart defaults and maximum compatibility. + +```ruby +# Gemfile +gem "braintrust", require: "braintrust/contrib/auto_instrument" + +# Or Rails initializer +require "braintrust/contrib/auto_instrument" +``` + +### 3. Single Line + +**Best for:** Controlling when and what instrumentation is activated. + +```ruby +require "braintrust" + +Braintrust.init # Auto-instruments all detected libraries + +client = OpenAI::Client.new +client.chat.completions.create(...) # Automatically traced! +``` + +You can also choose what instrumentation is activated: + +```ruby +# You can set environment variables: +# +# BRAINTRUST_AUTO_INSTRUMENT=true +# BRAINTRUST_INSTRUMENT_ONLY=openai,anthropic +# BRAINTRUST_INSTRUMENT_EXCEPT=ruby_llm +# + +# Or configure explicitly in code: +Braintrust.init(auto_instrument: { only: [:openai, :anthropic] }) # Only specific libraries +# --- OR --- +Braintrust.init(auto_instrument: { except: [:ruby_llm] }) # Exclude certain libraries + +client = OpenAI::Client.new +client.chat.completions.create(...) # Automatically traced! +``` + +### 4. Custom + +**Best for:** Fine-control over which parts of an application are instrumented. + +```ruby +# Skip auto-instrument with: +# +# BRAINTRUST_AUTO_INSTRUMENT=false +# +# Or configure explicitly in code: +Braintrust.init(auto_instrument: false) + +# ...then manually instrument a specific OpenAI client +client = OpenAI::Client.new +Braintrust::Contrib::OpenAI.instrument!(client) # Explicit per-client wrapping +``` + +## Milestones + +| # | Milestone | What You Get | +| --------------------------------------------------- | ---------------------------- | --------------------------------------------------------------------------------- | +| [01](milestones/01-integration-framework.md) | Integration framework | Consistent interface across integrations (for more reliable auto-instrumentation) | +| [02](milestones/02-openai-integration.md) | OpenAI Integration | All OpenAI clients auto-traced with `Integration.patch!` | +| [03](milestones/03-instrument-api.md) | Instrument API | Clean `Braintrust::Contrib.instrument!(:openai)` call | +| [04](milestones/04-init-auto-instrument.md) | Init Auto-Instrument | `Braintrust.init` auto-instruments everything (zero-config) | +| [05](milestones/05-require-time-auto-instrument.md) | Require-time Auto-Instrument | Works via `Gemfile` or Rails initializer (no `init()` needed) | +| [06](milestones/06-cli-wrapper.md) | CLI Wrapper | `braintrust exec -- ruby app.rb` (zero code changes) | +| [07](milestones/07-anthropic-integration.md) | Anthropic Integration | Anthropic clients auto-traced | +| [08](milestones/08-ruby-openai-integration.md) | Ruby-OpenAI Integration | alexrudall/ruby-openai gem auto-traced | +| [09](milestones/09-ruby-llm-integration.md) | RubyLLM Integration | RubyLLM auto-traced | + +## See Also + +- [Technical Design](ref/design.md) - Architecture, principles, and implementation details +- [Future Work](ref/future-work.md) - Potential next steps beyond the core milestones diff --git a/.plan/auto_instrument/ref/design.md b/.plan/auto_instrument/ref/design.md new file mode 100644 index 0000000..c63f668 --- /dev/null +++ b/.plan/auto_instrument/ref/design.md @@ -0,0 +1,264 @@ +# Technical Design: Auto-Instrumentation + +This document covers the technical architecture and implementation strategy for auto-instrumentation. + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ braintrust.rb │ +│ (main entry point) │ +└────────────────────────────┬────────────────────────────────┘ + │ requires + ┌────────────────┴────────────────┐ + ▼ ▼ +┌────────────────────┐ ┌────────────────────────┐ +│ Core SDK │ │ Contrib Framework │ +│ (trace, state, │ │ (registry, base │ +│ config, api) │ │ classes, auto- │ +│ │ │ instrument) │ +│ NO contrib refs │◄─────────────│ │ +└────────────────────┘ uses core └───────────┬────────────┘ + │ loads + ┌──────────┬───────────┬───────┴───────┬──────────┐ + ▼ ▼ ▼ ▼ ▼ + ┌─────────┐ ┌──────────┐ ┌─────────┐ ┌──────────┐ ┌─────────┐ + │ OpenAI │ │Anthropic │ │ Ruby- │ │ RubyLLM │ │ ... │ + │ │ │ │ │ OpenAI │ │ │ │ │ + └─────────┘ └──────────┘ └─────────┘ └──────────┘ └─────────┘ + (each integration is a self-contained plugin) +``` + +## Design Principles + +- **Severable**: Contrib framework can be extracted to a separate gem; integrations are independent plugins +- **Lazy loading**: Heavy patcher code only loads when the integration is actually used +- **Fail-safe**: All patching wrapped in rescue blocks; errors logged, never raised +- **Idempotent**: Multiple `init()` or `patch!` calls are safe (no duplicate instrumentation) + +## Directory Structure + +``` +lib/ +├── braintrust.rb # Entry point - loads core + contrib +│ +└── braintrust/ + ├── contrib.rb # Contrib entry point, loads registry + stubs + ├── contrib/ + │ ├── registry.rb # Central integration registry + │ ├── integration.rb # Base integration module + │ ├── patcher.rb # Base patcher class + │ ├── auto_instrument.rb # Auto-instrumentation logic + │ │ + │ ├── openai/ # Official OpenAI SDK + │ │ ├── integration.rb # Integration definition + │ │ └── patcher.rb # Class-level patching + │ │ + │ ├── anthropic/ + │ │ ├── integration.rb + │ │ └── patcher.rb + │ │ + │ ├── ruby_openai/ # alexrudall/ruby-openai gem + │ │ ├── integration.rb + │ │ └── patcher.rb + │ │ + │ └── ruby_llm/ + │ ├── integration.rb + │ └── patcher.rb + │ + ├── trace.rb # Core tracing (NO contrib references) + └── ... # Other core files +``` + +### Key Principle: Severable Plugin Architecture + +**Core has ZERO references to `contrib/`.** Contrib requires core, not vice versa. + +This enables: +1. `contrib/` can be extracted to a separate gem (`braintrust-contrib`) in the future +2. Core SDK releases are independent of integration updates +3. Each integration folder is self-contained (can be extracted to its own gem) + +Each integration folder is a self-contained plugin that: +1. Registers itself with the central registry when loaded +2. Has no dependencies on other integrations +3. Can be extracted into a separate gem in the future +4. Leverages shared base classes for consistency + +## Loading Strategy: Stub + Lazy Load + +To minimize performance impact as the number of integrations grows: + +**Eager loaded (always):** +- Integration "stubs" - tiny files with just metadata (name, gem_names, require_paths, version constraints) +- Base classes (Registry, Integration module, Patcher base) +- Total: ~40 lines per integration stub + +**Lazy loaded (on first patch):** +- Patcher classes - heavy files with actual patching logic (~500 lines each) +- Only loaded for integrations that are actually instrumented + +```ruby +# The lazy loading happens in the Integration's patcher method: +def self.patcher + require_relative "patcher" # Heavy file loaded on-demand + Patcher +end +``` + +### Explicit Registration Pattern + +Integrations are registered explicitly in `lib/braintrust/contrib.rb` rather than auto-registering when loaded: + +```ruby +# lib/braintrust/contrib.rb +require_relative "contrib/openai/integration" +Contrib::OpenAI::Integration.register! +``` + +**Rationale:** +- **No side effects**: Integration classes can be loaded without automatically registering them +- **Testability**: Tests can load integrations without polluting the global registry +- **Flexibility**: Tools (CI/CD, documentation generators, etc.) can inspect integrations without registration +- **Single source of truth**: `contrib.rb` shows exactly which integrations are registered + +This differs from auto-registration (where `register!` is called at the end of the integration file) but provides better separation of concerns. + +## Safety Considerations + +1. **Fail-Safe Patching**: All patching wrapped in rescue blocks +2. **Idempotent**: Multiple calls to `patch!` are safe (no duplicate spans) +3. **No Breaking Changes**: Existing `.wrap()` API preserved for manual use +4. **Lazy Loading**: Integrations only load when target library is present +5. **Version Compatibility**: Check library versions before patching +6. **Graceful Degradation**: If patching fails, app continues without tracing + +## Integration Filtering + +### Why Two-Level Filtering? + +As the number of integrations grows, efficient filtering becomes critical for performance: + +**Performance Benefits:** +- **Faster startup**: Lightweight checks avoid loading unnecessary patcher code (~500 lines each) +- **Lower memory overhead**: Only load patchers for libraries actually in use +- **Minimal require-time cost**: Integration stubs are ~40 lines each, patchers load only when needed + +**Without filtering:** +- When multiple integrations subscribe to the same require path, all their patchers would load +- Memory waste if wrong integration's patcher loads +- Slower require times as number of integrations grows +- Problem compounds with each ambiguous require path + +**With two-level filtering:** +- Only eligible integration's patchers load +- O(1) checks before O(n) patcher loading +- Scales to dozens of integrations with minimal overhead + +### Two-Level Filtering Strategy + +**Level 1: Integration-Level (Lightweight, No Patcher Loading)** +- `available?`: Is the target library loaded? (~10 lines of code) +- `compatible?`: Is the library version compatible? (~20 lines of code) +- These checks happen BEFORE loading patcher code (~500 lines each) +- Only eligible integrations proceed to patching + +**Level 2: Patcher-Level (After Patcher Loads)** +- `applicable?`: Should this specific patcher apply? +- Useful for version-specific patchers within one integration +- Checked under mutex lock before patching +- Can inspect loaded library structure (methods, constants, etc.) + +### Example: OpenAI vs Ruby-OpenAI + +Both gems use `require "openai"` but only one's code can be loaded: + +```ruby +# When require "openai" happens: +# 1. Registry finds: [OpenAI::Integration, RubyOpenai::Integration] +# 2. Filter by available? (lightweight, no patcher loading): +# - Check $LOADED_FEATURES to see which openai.rb was loaded +# - OpenAI::Integration.available? → true if official gem +# - RubyOpenai::Integration.available? → true if ruby-openai gem +# 3. Only one is available +# 4. Patch that one (NOW load its patcher - only the correct one) +``` + +**Result**: Only ~40 lines of integration stub code checked for the wrong gem, not ~500 lines of patcher code. + +### Multiple Patchers Per Integration + +An integration can have multiple patchers for different versions: + +```ruby +class OpenAI::Integration + def self.patcher_classes + require_relative "patcher_v1" + require_relative "patcher_v2" + [Patcherv1, Patcherv2] + end +end + +class OpenAI::Patcherv1 < Patcher + def self.applicable? + # Check for v1.x API structure + defined?(::OpenAI::Client) && + ::OpenAI::Client.instance_methods.include?(:chat) + end +end +``` + +**Decision guide:** +- **Multiple integrations**: Different gems with same require path (avoids loading wrong patcher) +- **Multiple patchers**: Same gem, but incompatible API structures (all patchers load, one applies) + +## Thread Safety + +| Component | Issue | Solution | +|-----------|-------|----------| +| Registry cache | Race condition reading `@require_path_map` | Double-checked locking pattern | +| Patcher `patch!` | Race condition setting `@patched` | Mutex in Patcher base class with double-check | +| Require hook | Reentrancy if patching triggers requires | Thread-local guard (`Thread.current[:braintrust_in_require_hook]`) | +| Rails hook | Already initialized scenario | Not an issue - `after_initialize` runs immediately via ActiveSupport.on_load | + +## Backwards Compatibility + +The existing manual wrapping API will continue to work: +```ruby +client = OpenAI::Client.new +Braintrust::Trace::OpenAI.wrap(client) # Still works! +``` + +The `Braintrust::Trace::OpenAI` module becomes a compatibility shim that: +1. Checks if class-level patching already applied (no double-wrap) +2. Delegates to the same patcher code used by auto-instrument + +## Known Limitations + +### Existing Clients Not Patched + +Class-level patching only affects clients created *after* patching occurs. Clients instantiated before `patch!` is called will not be instrumented. This is documented as expected behavior - initialize Braintrust early in your application lifecycle. + +### Require Hook Scope + +The `Kernel.require` hook only intercepts `require` calls, not `require_relative`. This is acceptable because: +- Third-party gems use `require` to load their entry points +- `require_relative` is typically used for internal files within a gem + +## Environment Variables + +```bash +# Auto-instrumentation controls +BRAINTRUST_INSTRUMENT_ONLY=openai,anthropic # Comma-separated whitelist +BRAINTRUST_INSTRUMENT_EXCEPT=ruby_llm # Comma-separated blacklist +``` + +## Risks & Mitigations + +| Risk | Likelihood | Impact | Mitigation | +|------|------------|--------|------------| +| Patching breaks target library | Low | High | Version constraints, comprehensive tests | +| Performance overhead at require-time | Low | Medium | Stub + lazy load pattern | +| Thread safety issues | Low | High | Double-checked locking, thread-local guards | +| RUBYOPT conflicts (CLI) | Medium | Low | Append to existing RUBYOPT, don't replace | +| Namespace collisions (ruby-openai vs openai) | Medium | Medium | Explicit gem detection via `Gem.loaded_specs` | diff --git a/.plan/auto_instrument/ref/future-work.md b/.plan/auto_instrument/ref/future-work.md new file mode 100644 index 0000000..2329d3c --- /dev/null +++ b/.plan/auto_instrument/ref/future-work.md @@ -0,0 +1,233 @@ +# Future Work + +Potential enhancements beyond the core auto-instrumentation milestones. These are ideas for future iterations, not committed work. + +## Shared Utilities Refactoring + +**When:** During Milestone 08 (ruby-openai integration) or later +**Status:** Planned - shelved until multiple integrations exist + +### Current State + +Integration support utilities are scattered: +- Token parsing: `lib/braintrust/trace/tokens.rb` (parse_openai_usage_tokens, parse_anthropic_usage_tokens) +- Per-integration utilities: Embedded in patcher files (e.g., `lib/braintrust/contrib/openai/patcher.rb`) + +This creates: +- Confusion about where utilities live (`trace/` vs `contrib/`) +- Difficulty discovering what utilities exist for each vendor +- Unclear ownership and boundaries + +### Proposed Structure + +Organize shared utilities by **vendor** under `lib/braintrust/contrib/support/`: + +```ruby +lib/braintrust/contrib/support/ + openai.rb # module Braintrust::Contrib::Support::OpenAI + anthropic.rb # module Braintrust::Contrib::Support::Anthropic + common.rb # module Braintrust::Contrib::Support::Common (optional) +``` + +Each vendor file contains utilities specific to that provider's API: + +```ruby +# lib/braintrust/contrib/support/openai.rb +module Braintrust::Contrib::Support::OpenAI + def self.parse_usage_tokens(usage) + # OpenAI-specific token field parsing + end + + def self.aggregate_streaming_chunks(chunks) + # OpenAI-specific streaming aggregation + end +end +``` + +Truly generic utilities (if they emerge) go in `common.rb`: + +```ruby +# lib/braintrust/contrib/support/common.rb +module Braintrust::Contrib::Support::Common + def self.set_json_attr(span, attr_name, obj) + # Generic span attribute helper + end +end +``` + +### Rationale for Vendor-Based Organization + +**Why by vendor instead of by behavior?** + +- **Isolation**: Changes to OpenAI utilities don't touch Anthropic files +- **Clear ownership**: All OpenAI-specific logic in one place +- **Matches reality**: Token parsing isn't shared code - each vendor has different response structures +- **Aligns with integration structure**: Integrations are organized by vendor (`contrib/openai/`, `contrib/anthropic/`) +- **Easy cleanup**: Remove a vendor? Delete one file +- **Room to grow**: If a vendor file exceeds ~200 lines, refactor to a subdirectory + +**Alternatives considered:** +- Organize by behavior (`support/token_parsing.rb` with all vendors) - rejected due to vendor coupling and large file growth +- Hybrid with base classes - rejected as over-engineering for current needs + +### Migration Strategy + +1. **Create vendor support files** with utilities extracted from current locations +2. **Update integrations** to require and use new locations: + ```ruby + require_relative "../support/openai" + + metrics = Braintrust::Contrib::Support::OpenAI.parse_usage_tokens(usage) + ``` +3. **Add backward compatibility** in `lib/braintrust/trace/tokens.rb`: + ```ruby + require_relative "../contrib/support/openai" + + module Braintrust::Trace + def self.parse_openai_usage_tokens(usage) + Contrib::Support::OpenAI.parse_usage_tokens(usage) + end + end + ``` +4. **Deprecate old location** (optional) after all internal usage migrated + +### Files to Move + +From `lib/braintrust/trace/tokens.rb`: +- `parse_openai_usage_tokens` → `contrib/support/openai.rb` +- `parse_anthropic_usage_tokens` → `contrib/support/anthropic.rb` + +From `lib/braintrust/contrib/openai/patcher.rb`: +- `set_json_attr` → Consider for `contrib/support/common.rb` +- `aggregate_streaming_chunks` → `contrib/support/openai.rb` +- `aggregate_responses_events` → `contrib/support/openai.rb` + +### Benefits + +- **Clearer architecture**: Support utilities live with integrations, not in `trace/` +- **Better discoverability**: "What utilities exist for OpenAI?" → Look in `support/openai.rb` +- **Reduced coupling**: Vendor changes isolated +- **Consistent patterns**: Matches how integrations are already organized + +## System-level auto instrument + +Users install a system package (e.g. `.deb`, `.sh` script) or similar that injects the Braintrust SDK into all Ruby applications on the system. + +The technical idea is to modify the Ruby system configuration to always load Braintrust with any Ruby process. + +Useful for: + +- *Containerized deployments* by baking the Braintrust SDK into user's Docker image build step in their CI/CD pipelines +- *Host-based deployments* when Ruby apps installed directly onto the host. + +## Core-Only Require + +Add `lib/braintrust/core.rb` for users who want minimal footprint without contrib overhead: + +```ruby +require "braintrust/core" # Just core (State, Config, Trace, API, Eval) +# No integrations loaded - smaller memory footprint +``` + +Useful for: +- Applications that don't use any supported LLM libraries +- Custom instrumentation scenarios +- Reducing startup time + +When implemented, `braintrust.rb` would become: +```ruby +require_relative "braintrust/core" +require_relative "braintrust/contrib" +``` + +## Per-Integration Configuration + +Add a `Configuration` class hierarchy for integration-specific settings. + +Configuration values could be derived from multiple sources (in priority order): +1. Programmatic configuration via `configure` block +2. Environment variables (e.g., `BRAINTRUST_OPENAI_INCLUDE_PROMPTS=false`) +3. Configuration file (e.g., `.braintrust.yml`) + +```ruby +# lib/braintrust/contrib/configuration.rb +class Configuration + attr_accessor :enabled # default: true +end + +# lib/braintrust/contrib/openai/configuration.rb +class Configuration < Braintrust::Contrib::Configuration + attr_accessor :trace_chat_completions # default: true + attr_accessor :trace_responses # default: true + attr_accessor :include_prompts # default: true (for privacy control) +end +``` + +Usage: +```ruby +Braintrust::Contrib::OpenAI::Integration.configure do |config| + config.include_prompts = false # Don't log prompts for privacy +end +``` + +## Instance-Level Configuration (Pin) + +Allow per-instance configuration, similar to Datadog's `Pin` class: + +```ruby +client = OpenAI::Client.new +Braintrust::Contrib.pin(client, service_name: "my-openai-service") +``` + +This would allow: +- Different tracing settings per client instance +- Service name customization +- Selective enable/disable on specific instances + +## Span Filtering by Integration + +Allow filtering spans based on integration type or other criteria: + +```ruby +Braintrust.configure do |config| + config.span_filter = ->(span) { + # Drop spans from specific integrations + span.integration != :ruby_llm + } +end +``` + +## Metrics Collection + +Aggregate metrics across integrations: +- Total tokens used +- Latency percentiles +- Error rates by integration +- Cost estimates + +```ruby +Braintrust::Contrib.metrics +# => { openai: { requests: 100, tokens: 50000, avg_latency_ms: 250 }, ... } +``` + +## Additional Integrations + +Potential future integrations: +- **Cohere** - Cohere API client +- **AI21** - AI21 Labs API client +- **Mistral** - Mistral AI client +- **LangChain.rb** - LangChain Ruby framework +- **Instructor-rb** - Structured extraction library + +## Unpatch Support + +Allow removing instrumentation: + +```ruby +Braintrust::Contrib::OpenAI::Integration.unpatch! +``` + +This is complex because: +- Ruby doesn't have clean "unprepend" +- Would need to track original methods +- May not be worth the complexity From c4df1e630b7d2ac7572f06ee928df68eb9fd338b Mon Sep 17 00:00:00 2001 From: David Elner Date: Mon, 15 Dec 2025 11:41:19 -0500 Subject: [PATCH 2/6] Added: Integration framework API --- CONTRIBUTING.md | 64 +++ lib/braintrust.rb | 1 + lib/braintrust/contrib.rb | 83 ++++ lib/braintrust/contrib/context.rb | 64 +++ lib/braintrust/contrib/integration.rb | 139 ++++++ lib/braintrust/contrib/patcher.rb | 67 +++ lib/braintrust/contrib/registry.rb | 92 ++++ lib/braintrust/state.rb | 5 + test/braintrust/contrib/integration_test.rb | 478 ++++++++++++++++++++ test/braintrust/contrib/patcher_test.rb | 281 ++++++++++++ test/braintrust/contrib/registry_test.rb | 241 ++++++++++ test/test_helper.rb | 45 ++ 12 files changed, 1560 insertions(+) create mode 100644 lib/braintrust/contrib.rb create mode 100644 lib/braintrust/contrib/context.rb create mode 100644 lib/braintrust/contrib/integration.rb create mode 100644 lib/braintrust/contrib/patcher.rb create mode 100644 lib/braintrust/contrib/registry.rb create mode 100644 test/braintrust/contrib/integration_test.rb create mode 100644 test/braintrust/contrib/patcher_test.rb create mode 100644 test/braintrust/contrib/registry_test.rb diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5d11826..cec97dc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -77,3 +77,67 @@ for more details. ```bash rake -T test:vcr ``` + +## Adding Integrations for AI Libraries + +To add instrumentation support for a new AI library, follow these steps: + +### 1. Define the Integration + +Create a new file in `lib/braintrust/contrib/`: + +```ruby +# lib/braintrust/contrib/trustybrain_llm.rb +module Braintrust::Contrib + class TrustybrainLLM + include Integration + + def self.integration_name + :trustybrain_llm + end + + def self.gem_names + ["trustybrain_llm"] + end + + def self.patcher + TrustybrainLLMPatcher + end + end + + class TrustybrainLLMPatcher < Patcher + def self.perform_patch(context) + # Add your instrumentation here + # context.tracer_provider gives you access to the tracer + end + end +end +``` + +### 2. Register It + +Add to `lib/braintrust/contrib.rb`: + +```ruby +require_relative "contrib/trustybrain_llm" + +# At the bottom: +Contrib::TrustybrainLLM.register! +``` + +### 3. Write Tests + +Create `test/braintrust/contrib/trustybrain_llm_test.rb`: + +```ruby +require "test_helper" + +class Braintrust::Contrib::TrustybrainLLMTest < Minitest::Test + def test_integration_basics + assert_equal :trustybrain_llm, TrustybrainLLM.integration_name + assert_equal ["trustybrain_llm"], TrustybrainLLM.gem_names + end +end +``` + +See existing tests in `test/braintrust/contrib/` for complete examples of testing integrations, patchers, and the registry. diff --git a/lib/braintrust.rb b/lib/braintrust.rb index 469cde1..979651e 100644 --- a/lib/braintrust.rb +++ b/lib/braintrust.rb @@ -7,6 +7,7 @@ require_relative "braintrust/api" require_relative "braintrust/internal/experiments" require_relative "braintrust/eval" +require_relative "braintrust/contrib" # Braintrust Ruby SDK # diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb new file mode 100644 index 0000000..f6e059a --- /dev/null +++ b/lib/braintrust/contrib.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +require_relative "contrib/registry" +require_relative "contrib/integration" +require_relative "contrib/patcher" +require_relative "contrib/context" + +module Braintrust + # Contrib framework for auto-instrumentation integrations. + # Provides a consistent interface for all integrations and enables + # reliable auto-instrumentation in later milestones. + module Contrib + class << self + # Get the global registry instance. + # @return [Registry] + def registry + Registry.instance + end + + # Initialize the contrib framework with optional configuration. + # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider, nil] Optional tracer provider + # @return [void] + def init(tracer_provider: nil) + @default_tracer_provider = tracer_provider + end + + # Instrument a registered integration by name. + # This is the main entry point for activating integrations. + # + # @param name [Symbol] The integration name (e.g., :openai, :anthropic) + # @param options [Hash] Optional configuration + # @option options [Object] :target Optional target instance to instrument specifically + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + # + # @example Instrument all OpenAI clients + # Braintrust::Contrib.instrument!(:openai) + # + # @example Instrument specific OpenAI client instance + # client = OpenAI::Client.new + # Braintrust::Contrib.instrument!(:openai, target: client, tracer_provider: my_provider) + def instrument!(name, **options) + if (integration = registry[name]) + integration.instrument!(**options) + else + Braintrust::Log.error("No integration for '#{name}' is defined!") + end + end + + # Get the default tracer provider, falling back to OpenTelemetry global. + # @return [OpenTelemetry::Trace::TracerProvider] + def default_tracer_provider + @default_tracer_provider || ::OpenTelemetry.tracer_provider + end + + # Get the context for a target object. + # @param target [Object] The object to retrieve context from + # @return [Context, nil] The context if found, nil otherwise + def context_for(target) + Context.from(target) + end + + # Get the tracer provider for a target. + # Checks target's context first, then falls back to contrib default. + # @param target [Object] The object to look up tracer provider for + # @return [OpenTelemetry::Trace::TracerProvider] + def tracer_provider_for(target) + context_for(target)&.[](:tracer_provider) || default_tracer_provider + end + + # Get a tracer for a target, using its context's tracer_provider if available. + # @param target [Object] The object to look up context from + # @param name [String] Tracer name + # @return [OpenTelemetry::Trace::Tracer] + def tracer_for(target, name: "braintrust") + tracer_provider_for(target).tracer(name) + end + end + end +end + +# Load integration stubs (eager load minimal metadata). +# These will be added in subsequent milestones. diff --git a/lib/braintrust/contrib/context.rb b/lib/braintrust/contrib/context.rb new file mode 100644 index 0000000..ad92247 --- /dev/null +++ b/lib/braintrust/contrib/context.rb @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + # Per-instance or per-class configuration context. + # Allows attaching generic configuration to specific objects or classes. + class Context + # Set or update context on a target object. + # Creates a new context if one doesn't exist, or updates existing context. + # @param target [Object] The object to attach context to + # @param options [Hash] Configuration options to store + # @return [Context, nil] The existing context if updated, nil if created new or options empty + def self.set!(target, **options) + return nil if options.empty? + + if (ctx = from(target)) + # Update existing context + options.each { |k, v| ctx[k] = v } + else + # Create and attach new context + target.instance_variable_set(:@braintrust_context, new(**options)) + end + + ctx + end + + # Retrieve context from a target, checking instance then class. + # @param target [Object] The object to retrieve context from + # @return [Context, nil] The context if found, nil otherwise + def self.from(target) + return nil unless target + return nil unless target.respond_to?(:instance_variable_get) + + # Check target instance + ctx = target.instance_variable_get(:@braintrust_context) + return ctx if ctx + + # Check target class + target.class.instance_variable_get(:@braintrust_context) + end + + # @param options [Hash] Configuration options + def initialize(**options) + @options = options + end + + def [](key) + @options[key] + end + + def []=(key, value) + @options[key] = value + end + + # Get an option value with a default fallback. + # @param key [Symbol, String] The option key + # @param default [Object] The default value if key not found + # @return [Object] The option value, or default if not found + def fetch(key, default) + @options.fetch(key, default) + end + end + end +end diff --git a/lib/braintrust/contrib/integration.rb b/lib/braintrust/contrib/integration.rb new file mode 100644 index 0000000..ead9904 --- /dev/null +++ b/lib/braintrust/contrib/integration.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + # Base module defining the integration contract. + # Include this module in integration classes to define the schema. + # Delegates actual patching to a Patcher subclass. + module Integration + def self.included(base) + base.extend(ClassMethods) + end + + module ClassMethods + # Unique symbol name for this integration (e.g., :openai, :anthropic). + # @return [Symbol] + def integration_name + raise NotImplementedError, "#{self} must implement integration_name" + end + + # Array of gem names this integration supports. + # @return [Array] + def gem_names + raise NotImplementedError, "#{self} must implement gem_names" + end + + # Require paths for auto-instrument detection. + # Default implementation returns gem_names. + # @return [Array] + def require_paths + gem_names + end + + # Is the target library loaded? + # @return [Boolean] + def available? + gem_names.any? { |name| Gem.loaded_specs.key?(name) } + end + + # Minimum compatible version (optional, inclusive). + # @return [String, nil] + def minimum_version + nil + end + + # Maximum compatible version (optional, inclusive). + # @return [String, nil] + def maximum_version + nil + end + + # Is the library version compatible? + # @return [Boolean] + def compatible? + return false unless available? + + gem_names.each do |name| + spec = Gem.loaded_specs[name] + next unless spec + + version = spec.version + return false if minimum_version && version < Gem::Version.new(minimum_version) + return false if maximum_version && version > Gem::Version.new(maximum_version) + return true + end + false + end + + # Array of patcher classes for this integration. + # Override to return multiple patchers for version-specific logic. + # @return [Array] Array of patcher classes + def patchers + [patcher] # Default: single patcher + end + + # Convenience method for single patcher (existing pattern). + # Override this OR patchers (not both). + # @return [Class] The patcher class + def patcher + raise NotImplementedError, "#{self} must implement patcher or patchers" + end + + # Instrument this integration with optional configuration. + # If a target is provided, configures the target instance specifically. + # Otherwise, applies class-level instrumentation to all instances. + # + # @param options [Hash] Configuration options + # @option options [Object] :target Optional target instance to instrument + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [Boolean] true if patching succeeded or was already done + # + # @example Class-level instrumentation (all clients) + # integration.instrument!(tracer_provider: my_provider) + # + # @example Instance-level instrumentation (specific client) + # integration.instrument!(target: client, tracer_provider: my_provider) + def instrument!(**options) + if options[:target] + # Configure the target with provided options + options = options.dup + target = options.delete(:target) + + Contrib::Context.set!(target, **options) + end + + patch!(**options) + end + + # Apply instrumentation (idempotent). Tries all applicable patchers. + # This method is typically called by instrument! after configuration. + # + # @param options [Hash] Configuration options + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [Boolean] true if any patching succeeded or was already done + def patch!(**options) + return false unless available? && compatible? + + # Try all applicable patchers + success = false + patchers.each do |patch| + # Check if this patcher is applicable + next unless patch.applicable? + + # Attempt to patch (patcher checks applicable? again under lock) + success = true if patch.patch!(**options) + end + + Braintrust::Log.debug("No applicable patcher found for #{integration_name}") unless success + success + end + + # Register this integration with the global registry. + def register! + Registry.instance.register(self) + end + end + end + end +end diff --git a/lib/braintrust/contrib/patcher.rb b/lib/braintrust/contrib/patcher.rb new file mode 100644 index 0000000..1d3c261 --- /dev/null +++ b/lib/braintrust/contrib/patcher.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + # Base class for all patchers. + # Provides thread-safe, idempotent patching with error handling. + class Patcher + class << self + # Has this patcher already been applied? + # @return [Boolean] + def patched?(**options) + @patched == true + end + + # Override in subclasses to check if patcher should apply. + # Called after patcher loads but before perform_patch. + # @return [Boolean] true if this patcher should be applied + def applicable? + true # Default: always applicable + end + + # Apply the patch (thread-safe and idempotent). + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [Boolean] true if patching succeeded or was already done + def patch!(**options) + return false unless applicable? + return true if patched?(**options) # Fast path + + @patch_mutex ||= Mutex.new + @patch_mutex.synchronize do + unless applicable? + Braintrust::Log.debug("Skipping #{name} - not applicable") + return false + end + return true if patched?(**options) # Double-check under lock + + perform_patch(**options) + @patched = true + end + Braintrust::Log.debug("Patched #{name}") + true + rescue => e + Braintrust::Log.error("Failed to patch #{name}: #{e.message}") + false + end + + # Subclasses implement this to perform the actual patching. + # This method is called under lock after applicable? returns true. + # + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + def perform_patch(**options) + raise NotImplementedError, "#{self} must implement perform_patch" + end + + # Reset patched state (primarily for testing). + def reset! + @patched = false + end + end + end + end +end diff --git a/lib/braintrust/contrib/registry.rb b/lib/braintrust/contrib/registry.rb new file mode 100644 index 0000000..77a3d67 --- /dev/null +++ b/lib/braintrust/contrib/registry.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +require "singleton" + +module Braintrust + module Contrib + # Thread-safe singleton registry for integrations. + # Provides registration, lookup, and require-path mapping for auto-instrumentation. + class Registry + include Singleton + + def initialize + @integrations = {} + @require_path_map = nil # Lazy cache + @mutex = Mutex.new + end + + # Register an integration class with the registry. + # @param integration_class [Class] The integration class to register + def register(integration_class) + @mutex.synchronize do + @integrations[integration_class.integration_name] = integration_class + @require_path_map = nil # Invalidate cache + end + end + + # Look up an integration by name. + # @param name [Symbol, String] The integration name + # @return [Class, nil] The integration class, or nil if not found + def [](name) + @integrations[name.to_sym] + end + + # Get all registered integrations. + # @return [Array] All registered integration classes + def all + @integrations.values + end + + # Get all available integrations (target library is loaded). + # @return [Array] Available integration classes + def available + @integrations.values.select(&:available?) + end + + # Iterate over all registered integrations. + # @yield [Class] Each registered integration class + def each(&block) + @integrations.values.each(&block) + end + + # Returns integrations associated with a require path. + # Thread-safe with double-checked locking for performance. + # @param path [String] The require path (e.g., "openai", "anthropic") + # @return [Array] Integrations matching the require path + def integrations_for_require_path(path) + map = @require_path_map + if map.nil? + map = @mutex.synchronize do + @require_path_map ||= build_require_path_map + end + end + basename = File.basename(path.to_s, ".rb") + map.fetch(basename, EMPTY_ARRAY) + end + + # Clear all registrations (primarily for testing). + def clear! + @mutex.synchronize do + @integrations.clear + @require_path_map = nil + end + end + + private + + EMPTY_ARRAY = [].freeze + + def build_require_path_map + map = {} + @integrations.each_value do |integration| + integration.require_paths.each do |req| + map[req] ||= [] + map[req] << integration + end + end + map.each_value(&:freeze) + map.freeze + end + end + end +end diff --git a/lib/braintrust/state.rb b/lib/braintrust/state.rb index 3d6d154..6f20d69 100644 --- a/lib/braintrust/state.rb +++ b/lib/braintrust/state.rb @@ -93,6 +93,11 @@ def initialize(api_key: nil, org_name: nil, org_id: nil, default_project: nil, a if enable_tracing require_relative "trace" Trace.setup(self, tracer_provider, exporter: exporter) + + # Propagate tracer_provider to Contrib if loaded (soft dependency check) + if defined?(Braintrust::Contrib) + Braintrust::Contrib.init(tracer_provider: tracer_provider) + end end end diff --git a/test/braintrust/contrib/integration_test.rb b/test/braintrust/contrib/integration_test.rb new file mode 100644 index 0000000..f52d471 --- /dev/null +++ b/test/braintrust/contrib/integration_test.rb @@ -0,0 +1,478 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::Contrib::IntegrationTest < Minitest::Test + def setup + # Use anonymous subclass to isolate test state + registry_class = Class.new(Braintrust::Contrib::Registry) + @registry = registry_class.instance + end + + # Create a mock patcher class for testing + def create_mock_patcher(should_fail: false, applicable: true) + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :patch_called, :should_fail, :is_applicable + end + + def self.applicable? + @is_applicable + end + + def self.perform_patch(**options) + @patch_called = true + raise "Patch failed" if @should_fail + end + end + + patcher.patch_called = false + patcher.should_fail = should_fail + patcher.is_applicable = applicable + patcher + end + + # Create a full integration class for testing + def create_test_integration( + name:, + gem_names:, + require_paths: nil, + patcher: nil, + min_version: nil, + max_version: nil + ) + patcher_class = patcher || create_mock_patcher + + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_integration_name, :_gem_names, :_require_paths, + :_patcher, :_min_version, :_max_version + end + + def self.integration_name + _integration_name + end + + def self.gem_names + _gem_names + end + + def self.require_paths + _require_paths || _gem_names + end + + def self.minimum_version + _min_version + end + + def self.maximum_version + _max_version + end + + def self.patcher + _patcher + end + end + + integration._integration_name = name + integration._gem_names = gem_names + integration._require_paths = require_paths + integration._patcher = patcher_class + integration._min_version = min_version + integration._max_version = max_version + integration + end + + def test_integration_name_raises_not_implemented + integration = Class.new { include Braintrust::Contrib::Integration } + + assert_raises(NotImplementedError) do + integration.integration_name + end + end + + def test_gem_names_raises_not_implemented + integration = Class.new { include Braintrust::Contrib::Integration } + + assert_raises(NotImplementedError) do + integration.gem_names + end + end + + def test_patcher_raises_not_implemented + integration = Class.new { include Braintrust::Contrib::Integration } + + assert_raises(NotImplementedError) do + integration.patcher + end + end + + def test_patchers_defaults_to_wrapping_patcher + patcher = create_mock_patcher + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + patcher: patcher + ) + + assert_equal [patcher], integration.patchers + end + + def test_require_paths_defaults_to_gem_names + integration = create_test_integration( + name: :test, + gem_names: ["test-gem", "other-gem"] + ) + + assert_equal ["test-gem", "other-gem"], integration.require_paths + end + + def test_require_paths_can_be_overridden + integration = create_test_integration( + name: :test, + gem_names: ["test-gem"], + require_paths: ["custom_path", "another_path"] + ) + + assert_equal ["custom_path", "another_path"], integration.require_paths + end + + def test_minimum_version_defaults_to_nil + integration = create_test_integration( + name: :test, + gem_names: ["test-gem"] + ) + + assert_nil integration.minimum_version + end + + def test_maximum_version_defaults_to_nil + integration = create_test_integration( + name: :test, + gem_names: ["test-gem"] + ) + + assert_nil integration.maximum_version + end + + def test_available_checks_gem_loaded_specs + # Use a gem that is actually loaded (minitest) + integration = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"] + ) + + assert integration.available? + end + + def test_available_returns_false_for_unloaded_gem + integration = create_test_integration( + name: :test, + gem_names: ["nonexistent-gem-xyz-123"] + ) + + refute integration.available? + end + + def test_available_with_multiple_gems_any_loaded + # minitest is loaded, fake-gem is not + integration = create_test_integration( + name: :test, + gem_names: ["fake-gem-xyz", "minitest"] + ) + + assert integration.available? + end + + def test_compatible_returns_false_when_not_available + integration = create_test_integration( + name: :test, + gem_names: ["nonexistent-gem-xyz-123"] + ) + + refute integration.compatible? + end + + def test_compatible_returns_true_when_no_version_constraints + integration = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"] + ) + + assert integration.compatible? + end + + def test_compatible_checks_minimum_version + # Get the current minitest version + Gem.loaded_specs["minitest"].version + + # Test with a minimum version below current + integration_ok = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"], + min_version: "1.0.0" + ) + assert integration_ok.compatible? + + # Test with a minimum version above current + integration_too_new = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"], + min_version: "999.0.0" + ) + refute integration_too_new.compatible? + end + + def test_compatible_checks_maximum_version + # Test with a maximum version above current + integration_ok = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"], + max_version: "999.0.0" + ) + assert integration_ok.compatible? + + # Test with a maximum version below current + integration_too_old = create_test_integration( + name: :minitest_test, + gem_names: ["minitest"], + max_version: "0.0.1" + ) + refute integration_too_old.compatible? + end + + def test_patch_delegates_to_patcher + patcher = create_mock_patcher + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + patcher: patcher + ) + + result = integration.patch! + + assert result + assert patcher.patch_called + end + + def test_patch_returns_false_when_not_available + patcher = create_mock_patcher + integration = create_test_integration( + name: :test, + gem_names: ["nonexistent-gem-xyz-123"], + patcher: patcher + ) + + result = integration.patch! + + refute result + refute patcher.patch_called + end + + def test_patch_returns_false_when_not_compatible + patcher = create_mock_patcher + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + min_version: "999.0.0", # Too high + patcher: patcher + ) + + result = integration.patch! + + refute result + refute patcher.patch_called + end + + def test_patch_passes_tracer_provider + received_options = nil + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :is_applicable + end + + def self.applicable? + @is_applicable + end + + define_singleton_method(:perform_patch) do |**options| + received_options = options + end + end + patcher.is_applicable = true + + integration = create_test_integration( + name: :test, + gem_names: ["minitest"], + patcher: patcher + ) + + tracer_provider = Object.new + integration.patch!(tracer_provider: tracer_provider) + + assert_equal tracer_provider, received_options[:tracer_provider] + end + + def test_register_adds_to_registry + integration = create_test_integration( + name: :test_integration, + gem_names: ["test-gem"] + ) + + # Mock Registry.instance to verify register! calls it + mock_registry = Minitest::Mock.new + mock_registry.expect(:register, nil, [integration]) + + Braintrust::Contrib::Registry.stub(:instance, mock_registry) do + integration.register! + end + + mock_registry.verify + end + + def test_patchers_with_multiple_patchers + patcher1 = create_mock_patcher + patcher2 = create_mock_patcher + + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_patchers + end + + def self.integration_name + :test + end + + def self.gem_names + ["minitest"] + end + + def self.patchers + _patchers + end + end + + integration._patchers = [patcher1, patcher2] + + assert_equal [patcher1, patcher2], integration.patchers + end + + def test_patch_tries_all_applicable_patchers + # First patcher is not applicable + patcher1 = create_mock_patcher(applicable: false) + + # Second and third patchers are applicable - both should be tried + patcher2 = create_mock_patcher(applicable: true) + patcher3 = create_mock_patcher(applicable: true) + + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_patchers + end + + def self.integration_name + :test + end + + def self.gem_names + ["minitest"] + end + + def self.patchers + _patchers + end + end + + integration._patchers = [patcher1, patcher2, patcher3] + + result = integration.patch! + + assert result + refute patcher1.patch_called # Not applicable + assert patcher2.patch_called # Applied + assert patcher3.patch_called # Also applied (doesn't stop after patcher2) + end + + def test_patch_skips_non_applicable_patchers + # Create patcher that is not applicable + non_applicable_patcher = create_mock_patcher(applicable: false) + + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_patcher + end + + def self.integration_name + :test + end + + def self.gem_names + ["minitest"] + end + + def self.patchers + [_patcher] + end + end + + integration._patcher = non_applicable_patcher + + result = integration.patch! + + refute result + refute non_applicable_patcher.patch_called + end + + def test_patch_logs_when_no_applicable_patcher + non_applicable_patcher = create_mock_patcher(applicable: false) + + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_patcher + end + + def self.integration_name + :test + end + + def self.gem_names + ["minitest"] + end + + def self.patchers + [_patcher] + end + end + + integration._patcher = non_applicable_patcher + + # Capture log output + captured_logs = [] + original_logger = Braintrust::Log.logger + test_logger = Logger.new(StringIO.new) + test_logger.level = Logger::DEBUG + test_logger.formatter = ->(_severity, _time, _progname, msg) { + captured_logs << msg + "" + } + Braintrust::Log.logger = test_logger + + begin + integration.patch! + # Check that the "no applicable patcher" message was logged + assert captured_logs.any? { |msg| msg.include?("No applicable patcher found") } + ensure + Braintrust::Log.logger = original_logger + end + end +end diff --git a/test/braintrust/contrib/patcher_test.rb b/test/braintrust/contrib/patcher_test.rb new file mode 100644 index 0000000..17ba7ca --- /dev/null +++ b/test/braintrust/contrib/patcher_test.rb @@ -0,0 +1,281 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::Contrib::PatcherTest < Minitest::Test + def setup + # Create a fresh patcher class for each test to avoid state leakage + @patcher = create_test_patcher + end + + def create_test_patcher(should_fail: false) + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :patch_count, :last_options, :should_fail + end + + def self.perform_patch(**options) + @patch_count ||= 0 + @patch_count += 1 + @last_options = options + raise "Intentional patch failure" if @should_fail + end + end + + patcher.reset! + patcher.patch_count = 0 + patcher.last_options = {} + patcher.should_fail = should_fail + patcher + end + + def test_patch_passes_options + options = {tracer_provider: "test-provider", target: "test-target"} + @patcher.patch!(**options) + + assert_equal "test-provider", @patcher.last_options[:tracer_provider] + assert_equal "test-target", @patcher.last_options[:target] + end + + def test_patched_returns_false_initially + refute @patcher.patched? + end + + def test_patch_sets_patched_to_true + @patcher.patch! + + assert @patcher.patched? + end + + def test_patch_returns_true_on_success + result = @patcher.patch! + + assert result + end + + def test_patch_calls_perform_patch_once + @patcher.patch! + + assert_equal 1, @patcher.patch_count + end + + def test_patch_is_idempotent + @patcher.patch! + @patcher.patch! + @patcher.patch! + + assert_equal 1, @patcher.patch_count + assert @patcher.patched? + end + + def test_patch_returns_true_on_subsequent_calls + first_result = @patcher.patch! + second_result = @patcher.patch! + + assert first_result + assert second_result + end + + def test_patch_passes_options_to_perform_patch + tracer_provider = Object.new + + @patcher.patch!(tracer_provider: tracer_provider) + + assert_instance_of Hash, @patcher.last_options + assert_equal tracer_provider, @patcher.last_options[:tracer_provider] + end + + def test_patch_returns_false_on_error + failing_patcher = create_test_patcher(should_fail: true) + + result = suppress_logs { failing_patcher.patch! } + + refute result + end + + def test_patch_does_not_set_patched_on_error + failing_patcher = create_test_patcher(should_fail: true) + + suppress_logs { failing_patcher.patch! } + + refute failing_patcher.patched? + end + + def test_patch_logs_error_on_failure + failing_patcher = create_test_patcher(should_fail: true) + + # Capture log output + captured_logs = [] + original_logger = Braintrust::Log.logger + test_logger = Logger.new(StringIO.new) + test_logger.formatter = ->(_severity, _time, _progname, msg) { + captured_logs << msg + "" + } + Braintrust::Log.logger = test_logger + + begin + failing_patcher.patch! + # Check that error was logged (can't easily verify content without more setup) + # The main thing is that it doesn't raise + ensure + Braintrust::Log.logger = original_logger + end + end + + def test_reset_allows_repatching + @patcher.patch! + assert @patcher.patched? + + @patcher.reset! + refute @patcher.patched? + + @patcher.patch! + assert @patcher.patched? + assert_equal 2, @patcher.patch_count + end + + def test_perform_patch_raises_not_implemented_in_base_class + assert_raises(NotImplementedError) do + Braintrust::Contrib::Patcher.perform_patch + end + end + + def test_thread_safety_only_patches_once + patcher = create_test_patcher + + threads = 100.times.map do + Thread.new { patcher.patch! } + end + + threads.each(&:join) + + assert_equal 1, patcher.patch_count + assert patcher.patched? + end + + def test_thread_safety_concurrent_patch_calls + patcher = create_test_patcher + + errors = [] + results = [] + mutex = Mutex.new + + threads = 100.times.map do + Thread.new do + result = patcher.patch! + mutex.synchronize { results << result } + rescue => e + mutex.synchronize { errors << e.message } + end + end + + threads.each(&:join) + + assert_equal [], errors + assert results.all? { |r| r == true } + assert_equal 1, patcher.patch_count + end + + def test_applicable_returns_true_by_default + assert @patcher.applicable? + end + + def test_applicable_can_be_overridden + patcher = Class.new(Braintrust::Contrib::Patcher) do + def self.applicable? + false + end + + def self.perform_patch(**options) + # No-op + end + end + patcher.reset! + + refute patcher.applicable? + end + + def test_patch_checks_applicable_under_lock + applicable_calls = [] + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :applicable_calls + end + + def self.applicable? + @applicable_calls ||= [] + @applicable_calls << Thread.current.object_id + true + end + + def self.perform_patch(**options) + # No-op + end + end + patcher.reset! + patcher.applicable_calls = applicable_calls + + patcher.patch! + + # Should be called twice: once before lock (fast path), once under lock (double-check) + assert_equal 2, applicable_calls.length + end + + def test_patch_returns_false_when_not_applicable + patcher = Class.new(Braintrust::Contrib::Patcher) do + class << self + attr_accessor :perform_patch_called + end + + def self.applicable? + false + end + + def self.perform_patch(**options) + @perform_patch_called = true + end + end + patcher.reset! + patcher.perform_patch_called = false + + result = patcher.patch! + + refute result + refute patcher.perform_patch_called + refute patcher.patched? + end + + def test_patch_returns_false_and_does_not_log_when_not_applicable + patcher = Class.new(Braintrust::Contrib::Patcher) do + def self.applicable? + false + end + + def self.perform_patch(**options) + # No-op + end + end + patcher.reset! + + # Capture log output + captured_logs = [] + original_logger = Braintrust::Log.logger + test_logger = Logger.new(StringIO.new) + test_logger.level = Logger::DEBUG + test_logger.formatter = ->(_severity, _time, _progname, msg) { + captured_logs << msg + "" + } + Braintrust::Log.logger = test_logger + + begin + result = patcher.patch! + # Fast path returns false immediately without logging + refute result + assert_empty captured_logs, "Fast path should not log when not applicable" + ensure + Braintrust::Log.logger = original_logger + end + end +end diff --git a/test/braintrust/contrib/registry_test.rb b/test/braintrust/contrib/registry_test.rb new file mode 100644 index 0000000..610815e --- /dev/null +++ b/test/braintrust/contrib/registry_test.rb @@ -0,0 +1,241 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::Contrib::RegistryTest < Minitest::Test + def setup + # Use anonymous subclass to isolate test state + registry_class = Class.new(Braintrust::Contrib::Registry) + @registry = registry_class.instance + end + + # Mock integration class for testing + def create_mock_integration(name:, gem_names:, require_paths: nil, available: false) + integration = Class.new do + include Braintrust::Contrib::Integration + + class << self + attr_accessor :_integration_name, :_gem_names, :_require_paths, :_available + end + + def self.integration_name + _integration_name + end + + def self.gem_names + _gem_names + end + + def self.require_paths + _require_paths || _gem_names + end + + def self.available? + _available + end + end + + integration._integration_name = name + integration._gem_names = gem_names + integration._require_paths = require_paths + integration._available = available + integration + end + + def test_register_and_lookup + integration = create_mock_integration(name: :openai, gem_names: ["openai"]) + + @registry.register(integration) + + assert_equal integration, @registry[:openai] + assert_equal integration, @registry["openai"] + end + + def test_lookup_returns_nil_for_unregistered + assert_nil @registry[:unknown] + end + + def test_all_returns_all_integrations + openai = create_mock_integration(name: :openai, gem_names: ["openai"]) + anthropic = create_mock_integration(name: :anthropic, gem_names: ["anthropic"]) + + @registry.register(openai) + @registry.register(anthropic) + + all = @registry.all + assert_equal 2, all.length + assert_includes all, openai + assert_includes all, anthropic + end + + def test_available_filters_by_availability + available_integration = create_mock_integration( + name: :available, + gem_names: ["available-gem"], + available: true + ) + unavailable_integration = create_mock_integration( + name: :unavailable, + gem_names: ["unavailable-gem"], + available: false + ) + + @registry.register(available_integration) + @registry.register(unavailable_integration) + + available = @registry.available + assert_equal 1, available.length + assert_includes available, available_integration + refute_includes available, unavailable_integration + end + + def test_each_iterates_over_integrations + openai = create_mock_integration(name: :openai, gem_names: ["openai"]) + anthropic = create_mock_integration(name: :anthropic, gem_names: ["anthropic"]) + + @registry.register(openai) + @registry.register(anthropic) + + collected = [] + @registry.each { |i| collected << i } + + assert_equal 2, collected.length + assert_includes collected, openai + assert_includes collected, anthropic + end + + def test_integrations_for_require_path + openai = create_mock_integration( + name: :openai, + gem_names: ["openai"], + require_paths: ["openai"] + ) + ruby_openai = create_mock_integration( + name: :ruby_openai, + gem_names: ["ruby-openai"], + require_paths: ["openai"] + ) + + @registry.register(openai) + @registry.register(ruby_openai) + + integrations = @registry.integrations_for_require_path("openai") + assert_equal 2, integrations.length + assert_includes integrations, openai + assert_includes integrations, ruby_openai + end + + def test_integrations_for_require_path_strips_rb_extension + openai = create_mock_integration( + name: :openai, + gem_names: ["openai"], + require_paths: ["openai"] + ) + + @registry.register(openai) + + integrations = @registry.integrations_for_require_path("openai.rb") + assert_equal 1, integrations.length + assert_includes integrations, openai + end + + def test_integrations_for_require_path_returns_empty_for_unknown + integrations = @registry.integrations_for_require_path("unknown") + assert_equal [], integrations + assert integrations.frozen? + end + + def test_integrations_for_require_path_caching + openai = create_mock_integration( + name: :openai, + gem_names: ["openai"], + require_paths: ["openai"] + ) + + @registry.register(openai) + + # First call builds the cache + result1 = @registry.integrations_for_require_path("openai") + + # Second call should return the same frozen array (cached) + result2 = @registry.integrations_for_require_path("openai") + + assert_same result1, result2 + end + + def test_register_invalidates_cache + openai = create_mock_integration( + name: :openai, + gem_names: ["openai"], + require_paths: ["openai"] + ) + + @registry.register(openai) + + # Build the cache + result1 = @registry.integrations_for_require_path("openai") + assert_equal 1, result1.length + + # Register another integration + another = create_mock_integration( + name: :another, + gem_names: ["another"], + require_paths: ["openai"] + ) + @registry.register(another) + + # Cache should be invalidated + result2 = @registry.integrations_for_require_path("openai") + assert_equal 2, result2.length + end + + def test_thread_safety_for_registration + integrations = 100.times.map do |i| + create_mock_integration(name: :"integration_#{i}", gem_names: ["gem_#{i}"]) + end + + threads = integrations.map do |integration| + Thread.new { @registry.register(integration) } + end + + threads.each(&:join) + + assert_equal 100, @registry.all.length + end + + def test_thread_safety_for_require_path_lookup + openai = create_mock_integration( + name: :openai, + gem_names: ["openai"], + require_paths: ["openai"] + ) + + @registry.register(openai) + + errors = [] + threads = 100.times.map do + Thread.new do + result = @registry.integrations_for_require_path("openai") + errors << "Got nil" if result.nil? + errors << "Wrong length: #{result.length}" unless result.length == 1 + rescue => e + errors << e.message + end + end + + threads.each(&:join) + + assert_equal [], errors + end + + def test_clear_removes_all_integrations + openai = create_mock_integration(name: :openai, gem_names: ["openai"]) + + @registry.register(openai) + assert_equal 1, @registry.all.length + + @registry.clear! + assert_equal 0, @registry.all.length + assert_nil @registry[:openai] + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index b1a0f8f..26514f4 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -224,6 +224,51 @@ def get_openai_key def get_anthropic_key ENV["ANTHROPIC_API_KEY"] || "sk-ant-test-key-for-vcr" end + + # Suppress log output during block execution. + # Use for tests that deliberately cause errors/warnings. + # + # @yield Block to execute with logging suppressed + # @return Result of the block + # + # @example + # suppress_logs { failing_patcher.patch! } + # + def suppress_logs + original_logger = Braintrust::Log.logger + Braintrust::Log.logger = Logger.new(File::NULL) + yield + ensure + Braintrust::Log.logger = original_logger + end + + # Safely stub a singleton method and restore the original after the block. + # This properly handles method restoration even when the method comes from + # an included/extended module in the ancestor chain. + # + # @param object [Object] The object whose singleton method to stub + # @param method_name [Symbol] The name of the method to stub + # @param stub_impl [Proc] The stub implementation (as a lambda or proc) + # @yield The test code to run with the stubbed method + # + # @example + # with_stubbed_singleton_method(MyClass, :available?, -> { false }) do + # # test code here + # end + def with_stubbed_singleton_method(object, method_name, stub_impl) + # Save the original method by unbinding it + original_method = object.method(method_name).unbind + + # Replace with stub + object.define_singleton_method(method_name, &stub_impl) + + yield + ensure + # Restore the original method by rebinding + object.define_singleton_method(method_name) do |*args, **kwargs, &block| + original_method.bind_call(object, *args, **kwargs, &block) + end + end end # Include helper in all test cases From 9cad4df2b1ab637976d2fad5f153a20ba794c391 Mon Sep 17 00:00:00 2001 From: David Elner Date: Mon, 15 Dec 2025 12:15:35 -0500 Subject: [PATCH 3/6] Added: Integration generator script --- CONTRIBUTING.md | 125 +++++++++++++++++----- Rakefile | 121 +++++++++++++++++++++ templates/contrib/integration.rb.erb | 52 +++++++++ templates/contrib/integration_test.rb.erb | 45 ++++++++ templates/contrib/patcher.rb.erb | 39 +++++++ templates/contrib/patcher_test.rb.erb | 32 ++++++ 6 files changed, 385 insertions(+), 29 deletions(-) create mode 100644 templates/contrib/integration.rb.erb create mode 100644 templates/contrib/integration_test.rb.erb create mode 100644 templates/contrib/patcher.rb.erb create mode 100644 templates/contrib/patcher_test.rb.erb diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cec97dc..b4b8365 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -78,66 +78,133 @@ for more details. rake -T test:vcr ``` -## Adding Integrations for AI Libraries +## Adding integrations for libraries -To add instrumentation support for a new AI library, follow these steps: +To add instrumentation support for a new library, use the integration generator: -### 1. Define the Integration +```bash +rake contrib:generate NAME=trustybrain_llm AUTO_REGISTER=true +``` + +This will create the integration structure and optionally register it. You can also specify additional options: + +```bash +rake contrib:generate NAME=trustybrain_llm \ + GEM_NAMES=trustybrain_llm,trustybrain \ + REQUIRE_PATHS=trustybrain \ + MIN_VERSION=1.0.0 \ + MAX_VERSION=2.0.0 \ + AUTO_REGISTER=true +``` + +### Manual Setup + +If you prefer to create the integration manually, follow these steps: + +### 1. Create the integration directory structure + +```bash +mkdir -p lib/braintrust/contrib/trustybrain_llm +mkdir -p test/braintrust/contrib/trustybrain_llm +``` -Create a new file in `lib/braintrust/contrib/`: +### 2. Define the integration stub + +Create `lib/braintrust/contrib/trustybrain_llm/integration.rb`: ```ruby -# lib/braintrust/contrib/trustybrain_llm.rb -module Braintrust::Contrib - class TrustybrainLLM - include Integration +# frozen_string_literal: true - def self.integration_name - :trustybrain_llm - end +require_relative "../integration" - def self.gem_names - ["trustybrain_llm"] - end +module Braintrust + module Contrib + module TrustybrainLLM + class Integration + include Braintrust::Contrib::Integration - def self.patcher - TrustybrainLLMPatcher + def self.integration_name + :trustybrain_llm + end + + def self.gem_names + ["trustybrain_llm"] + end + + def self.patchers + require_relative "patcher" + [Patcher] + end + end end end +end +``` + +### 3. Create the patcher + +Create `lib/braintrust/contrib/trustybrain_llm/patcher.rb`: - class TrustybrainLLMPatcher < Patcher - def self.perform_patch(context) - # Add your instrumentation here - # context.tracer_provider gives you access to the tracer +```ruby +# frozen_string_literal: true + +require_relative "../patcher" + +module Braintrust + module Contrib + module TrustybrainLLM + class Patcher < Braintrust::Contrib::Patcher + class << self + def applicable? + defined?(::TrustybrainLLM::Client) + end + + def perform_patch(**options) + ::TrustybrainLLM::Client.prepend(Instrumentation) + end + end + + module Instrumentation + def chat(*args, **kwargs, &block) + Braintrust::Contrib.tracer_for(self).in_span("trustybrain_llm.chat") do + super + end + end + end + end end end end ``` -### 2. Register It +### 4. Register it Add to `lib/braintrust/contrib.rb`: ```ruby -require_relative "contrib/trustybrain_llm" +require_relative "contrib/trustybrain_llm/integration" # At the bottom: -Contrib::TrustybrainLLM.register! +Contrib::TrustybrainLLM::Integration.register! ``` -### 3. Write Tests +### 5. Write tests -Create `test/braintrust/contrib/trustybrain_llm_test.rb`: +Create test files in `test/braintrust/contrib/trustybrain_llm/`: ```ruby +# test/braintrust/contrib/trustybrain_llm/integration_test.rb require "test_helper" -class Braintrust::Contrib::TrustybrainLLMTest < Minitest::Test +class Braintrust::Contrib::TrustybrainLLM::IntegrationTest < Minitest::Test def test_integration_basics - assert_equal :trustybrain_llm, TrustybrainLLM.integration_name - assert_equal ["trustybrain_llm"], TrustybrainLLM.gem_names + integration = Braintrust::Contrib::TrustybrainLLM::Integration + assert_equal :trustybrain_llm, integration.integration_name + assert_equal ["trustybrain_llm"], integration.gem_names end + + # TODO: Add tests for patchers, availability, compatibility, and instrumentation end ``` -See existing tests in `test/braintrust/contrib/` for complete examples of testing integrations, patchers, and the registry. +See existing tests in `test/braintrust/contrib/` for complete examples. diff --git a/Rakefile b/Rakefile index 5aa4d62..4d86d97 100644 --- a/Rakefile +++ b/Rakefile @@ -252,6 +252,127 @@ task release: ["release:publish", "release:github"] do puts "✓ Release completed successfully!" end +# Contrib tasks +namespace :contrib do + desc "Generate a new integration (NAME=name [GEM_NAMES=gem1,gem2] [REQUIRE_PATHS=path1,path2] [MIN_VERSION=1.0.0] [MAX_VERSION=2.0.0] [AUTO_REGISTER=true])" + task :generate do + require "erb" + require "fileutils" + + # Parse parameters + name = ENV["NAME"] + unless name + puts "Error: NAME is required" + puts "Usage: rake contrib:generate NAME=trustybrain_llm [GEM_NAMES=trustybrain_llm] [AUTO_REGISTER=true]" + exit 1 + end + + # Convert name to snake_case if it's PascalCase + snake_case_name = name.gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2') + .gsub(/([a-z\d])([A-Z])/, '\1_\2') + .downcase + + # Convert to PascalCase for module name + module_name = snake_case_name.split("_").map(&:capitalize).join + + integration_name = snake_case_name.to_sym + + # Parse optional parameters + gem_names = ENV["GEM_NAMES"]&.split(",") || [snake_case_name] + require_paths = ENV["REQUIRE_PATHS"]&.split(",") || gem_names + min_version = ENV["MIN_VERSION"] + max_version = ENV["MAX_VERSION"] + auto_register = ENV.fetch("AUTO_REGISTER", "false").downcase == "true" + + # Display what will be generated + puts "\n=== Generating Integration ===" + puts "Name: #{module_name}" + puts "Integration name: :#{integration_name}" + puts "Gem names: #{gem_names.inspect}" + puts "Require paths: #{require_paths.inspect}" if require_paths != gem_names + puts "Min version: #{min_version}" if min_version + puts "Max version: #{max_version}" if max_version + puts + + # Template binding + template_binding = binding + + # Paths + integration_dir = "lib/braintrust/contrib/#{snake_case_name}" + test_dir = "test/braintrust/contrib/#{snake_case_name}" + + # Create directories + FileUtils.mkdir_p(integration_dir) + FileUtils.mkdir_p(test_dir) + + # Generate files + templates = { + "templates/contrib/integration.rb.erb" => "#{integration_dir}/integration.rb", + "templates/contrib/patcher.rb.erb" => "#{integration_dir}/patcher.rb", + "templates/contrib/integration_test.rb.erb" => "#{test_dir}/integration_test.rb", + "templates/contrib/patcher_test.rb.erb" => "#{test_dir}/patcher_test.rb" + } + + templates.each do |template_path, output_path| + template = ERB.new(File.read(template_path), trim_mode: "-") + content = template.result(template_binding) + File.write(output_path, content) + puts "✓ Created #{output_path}" + end + + # Auto-register if requested + if auto_register + contrib_file = "lib/braintrust/contrib.rb" + contrib_content = File.read(contrib_file) + + # Find the position to insert (before the last "end" or after the last require) + insertion_point = if /^# Load integration stubs/.match?(contrib_content) + contrib_content.index("# Load integration stubs") + else + # Insert before the final module end + contrib_content.rindex("end") + end + + require_line = "require_relative \"contrib/#{snake_case_name}/integration\"" + register_line = "Contrib::#{module_name}::Integration.register!" + + # Check if already registered + if contrib_content.include?(require_line) + puts "⚠ #{contrib_file} already contains this integration" + else + lines_to_add = [ + "", + "# #{module_name}", + require_line, + register_line + ].join("\n") + + contrib_content.insert(insertion_point, lines_to_add + "\n") + File.write(contrib_file, contrib_content) + puts "✓ Updated #{contrib_file}" + end + end + + # Display next steps + puts "\n=== Next Steps ===" + unless auto_register + puts "1. Add to lib/braintrust/contrib.rb:" + puts " require_relative \"contrib/#{snake_case_name}/integration\"" + puts " Contrib::#{module_name}::Integration.register!" + puts + end + puts "#{auto_register ? "1" : "2"}. Implement the patcher in:" + puts " #{integration_dir}/patcher.rb" + puts + puts "#{auto_register ? "2" : "3"}. Add tests in:" + puts " #{test_dir}/" + puts + puts "#{auto_register ? "3" : "4"}. Run tests:" + puts " bundle exec rake test TEST=#{test_dir}/**/*_test.rb" + puts + end +end + # Version bump tasks def bump_version(type) version_file = "lib/braintrust/version.rb" diff --git a/templates/contrib/integration.rb.erb b/templates/contrib/integration.rb.erb new file mode 100644 index 0000000..ff281fd --- /dev/null +++ b/templates/contrib/integration.rb.erb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require_relative "../integration" + +module Braintrust + module Contrib + module <%= module_name %> + # Integration for <%= gem_names.join(", ") %> + class Integration + include Braintrust::Contrib::Integration + + # @return [Symbol] Unique identifier for this integration + def self.integration_name + :<%= integration_name %> + end + + # @return [Array] Gem names this integration supports + def self.gem_names + <%= gem_names.inspect %> + end +<% if require_paths != gem_names %> + + # @return [Array] Require paths for auto-instrument detection + def self.require_paths + <%= require_paths.inspect %> + end +<% end %> +<% if min_version %> + + # @return [String] Minimum compatible version + def self.minimum_version + "<%= min_version %>" + end +<% end %> +<% if max_version %> + + # @return [String] Maximum compatible version + def self.maximum_version + "<%= max_version %>" + end +<% end %> + + # Lazy-load the patcher only when actually patching. + # @return [Array] The patcher classes + def self.patchers + require_relative "patcher" + [Patcher] + end + end + end + end +end diff --git a/templates/contrib/integration_test.rb.erb b/templates/contrib/integration_test.rb.erb new file mode 100644 index 0000000..329ec2d --- /dev/null +++ b/templates/contrib/integration_test.rb.erb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::Contrib::<%= module_name %>::IntegrationTest < Minitest::Test + def setup + @integration = Braintrust::Contrib::<%= module_name %>::Integration + end + + def test_integration_name + assert_equal :<%= integration_name %>, @integration.integration_name + end + + def test_gem_names + assert_equal <%= gem_names.inspect %>, @integration.gem_names + end +<% if require_paths != gem_names %> + + def test_require_paths + assert_equal <%= require_paths.inspect %>, @integration.require_paths + end +<% end %> + + def test_minimum_version +<% if min_version %> + assert_equal "<%= min_version %>", @integration.minimum_version +<% else %> + assert_nil @integration.minimum_version +<% end %> + end + + def test_maximum_version +<% if max_version %> + assert_equal "<%= max_version %>", @integration.maximum_version +<% else %> + assert_nil @integration.maximum_version +<% end %> + end + + def test_patchers + patchers = @integration.patchers + assert_kind_of Array, patchers + assert_includes patchers, Braintrust::Contrib::<%= module_name %>::Patcher + end +end diff --git a/templates/contrib/patcher.rb.erb b/templates/contrib/patcher.rb.erb new file mode 100644 index 0000000..81a9043 --- /dev/null +++ b/templates/contrib/patcher.rb.erb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +require_relative "../patcher" + +module Braintrust + module Contrib + module <%= module_name %> + # Patcher for <%= gem_names.join(", ") %> + class Patcher < Braintrust::Contrib::Patcher + class << self + # Check if this patcher should apply. + # @return [Boolean] true if target library is available + def applicable? + # TODO: Update to check if target library classes are defined + # Example: defined?(::SomeLibrary::Client) + true + end + + # Perform the actual patching. + # @param options [Hash] Configuration options passed from integration + # @return [void] + def perform_patch(**options) + # TODO: Add your instrumentation here + # Example: ::SomeLibrary::Client.prepend(Instrumentation) + end + end + + # Example instrumentation module (uncomment and modify for your integration) + # module Instrumentation + # def some_method(*args, **kwargs, &block) + # Braintrust::Contrib.tracer_for(self).in_span("somelibrary.some_method") do + # super + # end + # end + # end + end + end + end +end diff --git a/templates/contrib/patcher_test.rb.erb b/templates/contrib/patcher_test.rb.erb new file mode 100644 index 0000000..89b43f8 --- /dev/null +++ b/templates/contrib/patcher_test.rb.erb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require "test_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/<%= snake_case_name %>/patcher" + +class Braintrust::Contrib::<%= module_name %>::PatcherTest < Minitest::Test + def setup + @patcher = Braintrust::Contrib::<%= module_name %>::Patcher + @patcher.reset! + end + + def teardown + @patcher.reset! + end + + def test_inherits_from_base_patcher + assert @patcher < Braintrust::Contrib::Patcher + end + + def test_implements_perform_patch + assert @patcher.respond_to?(:perform_patch) + end + + def test_applicable + # TODO: Update once applicable? checks for target library + assert @patcher.applicable? + end + + # TODO: Add tests for instrumented code +end From d6c6d334a4f5574c3a06b4f35da6c8f048e34303 Mon Sep 17 00:00:00 2001 From: David Elner Date: Wed, 17 Dec 2025 14:59:52 -0500 Subject: [PATCH 4/6] Added: `test:contrib` task to run integration tests --- Rakefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Rakefile b/Rakefile index 4d86d97..3f515a0 100644 --- a/Rakefile +++ b/Rakefile @@ -99,6 +99,11 @@ task default: :ci # Test-related tasks namespace :test do + desc "Run only contrib framework tests" + task :contrib do + sh "bundle exec ruby -Ilib:test test/braintrust/contrib/*_test.rb" + end + desc "Run tests with verbose timing output" task :verbose do ENV["MT_VERBOSE"] = "1" From 53580f2279a441609a17b8ee4d7a796fc78021f8 Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 18 Dec 2025 17:36:54 -0500 Subject: [PATCH 5/6] Added: minitest-stub-const gem for testing --- Gemfile.lock | 2 ++ braintrust.gemspec | 1 + test/test_helper.rb | 1 + 3 files changed, 4 insertions(+) diff --git a/Gemfile.lock b/Gemfile.lock index 7a03578..c999ae5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -50,6 +50,7 @@ GEM builder minitest (>= 5.0) ruby-progressbar + minitest-stub-const (0.6) openssl (3.3.1) opentelemetry-api (1.7.0) opentelemetry-common (0.23.0) @@ -145,6 +146,7 @@ DEPENDENCIES kramdown (~> 2.0) minitest (~> 5.0) minitest-reporters (~> 1.6) + minitest-stub-const (~> 0.6) rake (~> 13.0) simplecov (~> 0.22) standard (~> 1.0) diff --git a/braintrust.gemspec b/braintrust.gemspec index 6f1fd68..cb2c4d0 100644 --- a/braintrust.gemspec +++ b/braintrust.gemspec @@ -40,6 +40,7 @@ Gem::Specification.new do |spec| # Development dependencies spec.add_development_dependency "minitest", "~> 5.0" + spec.add_development_dependency "minitest-stub-const", "~> 0.6" spec.add_development_dependency "rake", "~> 13.0" spec.add_development_dependency "standard", "~> 1.0" spec.add_development_dependency "simplecov", "~> 0.22" diff --git a/test/test_helper.rb b/test/test_helper.rb index 26514f4..861a67f 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -17,6 +17,7 @@ require "braintrust" require "minitest/autorun" +require "minitest/stub_const" # Show test timings when MT_VERBOSE is set if ENV["MT_VERBOSE"] From 104d0720c27da97b48c3528a9d53977f39b951da Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 18 Dec 2025 19:47:11 -0500 Subject: [PATCH 6/6] Changed: Migrated OpenAI to new integration API --- README.md | 5 +- examples/openai_instrument.rb | 70 ++ examples/openai_instrument_target.rb | 72 ++ examples/{openai.rb => openai_wrap.rb} | 1 + lib/braintrust/contrib.rb | 5 +- .../contrib/openai/instrumentation/chat.rb | 323 +++++ .../contrib/openai/instrumentation/common.rb | 164 +++ .../openai/instrumentation/responses.rb | 166 +++ lib/braintrust/contrib/openai/integration.rb | 58 + lib/braintrust/contrib/openai/patcher.rb | 95 ++ lib/braintrust/trace/contrib/openai.rb | 607 +--------- .../contrib/openai/integration_test.rb | 140 +++ .../openai/patcher_feature_parity_test.rb | 1073 +++++++++++++++++ .../braintrust/contrib/openai/patcher_test.rb | 163 +++ test/braintrust/trace/openai_test.rb | 193 --- .../openai/streaming_chat_completions.yml | 130 ++ 16 files changed, 2473 insertions(+), 792 deletions(-) create mode 100644 examples/openai_instrument.rb create mode 100644 examples/openai_instrument_target.rb rename examples/{openai.rb => openai_wrap.rb} (96%) create mode 100644 lib/braintrust/contrib/openai/instrumentation/chat.rb create mode 100644 lib/braintrust/contrib/openai/instrumentation/common.rb create mode 100644 lib/braintrust/contrib/openai/instrumentation/responses.rb create mode 100644 lib/braintrust/contrib/openai/integration.rb create mode 100644 lib/braintrust/contrib/openai/patcher.rb create mode 100644 test/braintrust/contrib/openai/integration_test.rb create mode 100644 test/braintrust/contrib/openai/patcher_feature_parity_test.rb create mode 100644 test/braintrust/contrib/openai/patcher_test.rb create mode 100644 test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml diff --git a/README.md b/README.md index 5da0707..c242a17 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,10 @@ Braintrust.init client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) -Braintrust::Trace::OpenAI.wrap(client) +# Instrument all clients +Braintrust::Contrib.instrument!(:openai) +# OR instrument a single client +Braintrust::Trace::OpenAI.instrument!(:openai, target: client) tracer = OpenTelemetry.tracer_provider.tracer("openai-app") root_span = nil diff --git a/examples/openai_instrument.rb b/examples/openai_instrument.rb new file mode 100644 index 0000000..6a9b34a --- /dev/null +++ b/examples/openai_instrument.rb @@ -0,0 +1,70 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "opentelemetry/sdk" + +# Example: OpenAI chat completion with Braintrust tracing +# +# This example demonstrates how to automatically trace OpenAI API calls with Braintrust. +# +# Note: The openai gem is a development dependency. To run this example: +# 1. Install dependencies: bundle install +# 2. Run from the SDK root: bundle exec ruby examples/openai.rb +# +# Usage: +# OPENAI_API_KEY=your-openai-key bundle exec ruby examples/openai.rb + +# Check for API keys +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + puts "Get your API key from: https://platform.openai.com/api-keys" + exit 1 +end + +# Instrument OpenAI +Braintrust.init(blocking_login: true) +Braintrust::Contrib.instrument!(:openai) + +# Create OpenAI client +client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) + +# Create a root span to capture the entire operation +tracer = OpenTelemetry.tracer_provider.tracer("openai-example") +root_span = nil + +# Make a chat completion request (automatically traced!) +puts "Sending chat completion request to OpenAI..." +response = tracer.in_span("examples/openai.rb") do |span| + root_span = span + + client.chat.completions.create( + messages: [ + {role: "system", content: "You are a helpful assistant."}, + {role: "user", content: "Say hello and tell me a short joke."} + ], + model: "gpt-4o-mini", + max_tokens: 100 + ) +end + +# Print the response +puts "\n✓ Response received!" +puts "\nAssistant: #{response.choices[0].message.content}" + +# Print usage stats +puts "\nToken usage:" +puts " Prompt tokens: #{response.usage.prompt_tokens}" +puts " Completion tokens: #{response.usage.completion_tokens}" +puts " Total tokens: #{response.usage.total_tokens}" + +# Print permalink to view this trace in Braintrust +puts "\n✓ View this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +# Shutdown to flush spans to Braintrust +OpenTelemetry.tracer_provider.shutdown + +puts "\n✓ Trace sent to Braintrust!" diff --git a/examples/openai_instrument_target.rb b/examples/openai_instrument_target.rb new file mode 100644 index 0000000..6fc2c47 --- /dev/null +++ b/examples/openai_instrument_target.rb @@ -0,0 +1,72 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require "bundler/setup" +require "braintrust" +require "openai" +require "opentelemetry/sdk" + +# Example: OpenAI chat completion with Braintrust tracing +# +# This example demonstrates how to automatically trace OpenAI API calls with Braintrust. +# +# Note: The openai gem is a development dependency. To run this example: +# 1. Install dependencies: bundle install +# 2. Run from the SDK root: bundle exec ruby examples/openai.rb +# +# Usage: +# OPENAI_API_KEY=your-openai-key bundle exec ruby examples/openai.rb + +# Check for API keys +unless ENV["OPENAI_API_KEY"] + puts "Error: OPENAI_API_KEY environment variable is required" + puts "Get your API key from: https://platform.openai.com/api-keys" + exit 1 +end + +# Instrument OpenAI +Braintrust.init(blocking_login: true) + +# Create OpenAI client +client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) + +# Instrument the client with Braintrust tracing +Braintrust::Contrib.instrument!(:openai, target: client) + +# Create a root span to capture the entire operation +tracer = OpenTelemetry.tracer_provider.tracer("openai-example") +root_span = nil + +# Make a chat completion request (automatically traced!) +puts "Sending chat completion request to OpenAI..." +response = tracer.in_span("examples/openai.rb") do |span| + root_span = span + + client.chat.completions.create( + messages: [ + {role: "system", content: "You are a helpful assistant."}, + {role: "user", content: "Say hello and tell me a short joke."} + ], + model: "gpt-4o-mini", + max_tokens: 100 + ) +end + +# Print the response +puts "\n✓ Response received!" +puts "\nAssistant: #{response.choices[0].message.content}" + +# Print usage stats +puts "\nToken usage:" +puts " Prompt tokens: #{response.usage.prompt_tokens}" +puts " Completion tokens: #{response.usage.completion_tokens}" +puts " Total tokens: #{response.usage.total_tokens}" + +# Print permalink to view this trace in Braintrust +puts "\n✓ View this trace in Braintrust:" +puts " #{Braintrust::Trace.permalink(root_span)}" + +# Shutdown to flush spans to Braintrust +OpenTelemetry.tracer_provider.shutdown + +puts "\n✓ Trace sent to Braintrust!" diff --git a/examples/openai.rb b/examples/openai_wrap.rb similarity index 96% rename from examples/openai.rb rename to examples/openai_wrap.rb index 246ff20..5db5858 100644 --- a/examples/openai.rb +++ b/examples/openai_wrap.rb @@ -30,6 +30,7 @@ client = OpenAI::Client.new(api_key: ENV["OPENAI_API_KEY"]) # Wrap the client with Braintrust tracing +# DEPRECATED: Use `Braintrust::Contrib.instrument!(:openai, target: client)` instead Braintrust::Trace::OpenAI.wrap(client) # Create a root span to capture the entire operation diff --git a/lib/braintrust/contrib.rb b/lib/braintrust/contrib.rb index f6e059a..2ef99e4 100644 --- a/lib/braintrust/contrib.rb +++ b/lib/braintrust/contrib.rb @@ -80,4 +80,7 @@ def tracer_for(target, name: "braintrust") end # Load integration stubs (eager load minimal metadata). -# These will be added in subsequent milestones. +require_relative "contrib/openai/integration" + +# Register integrations +Braintrust::Contrib::OpenAI::Integration.register! diff --git a/lib/braintrust/contrib/openai/instrumentation/chat.rb b/lib/braintrust/contrib/openai/instrumentation/chat.rb new file mode 100644 index 0000000..ff59d61 --- /dev/null +++ b/lib/braintrust/contrib/openai/instrumentation/chat.rb @@ -0,0 +1,323 @@ +# frozen_string_literal: true + +require "opentelemetry/sdk" +require "json" + +require_relative "common" + +module Braintrust + module Contrib + module OpenAI + module Instrumentation + # Chat completions instrumentation for OpenAI. + # Provides modules that can be prepended to OpenAI::Client to instrument chat.completions API. + module Chat + # Module prepended to chat.completions to add tracing + module Completions + def self.included(base) + # Guard against double-wrapping for: Check if patch is already in the ancestor chain. + # This prevents double instrumentation if class-level patching was already applied, + # and this patch is being applied to a singleton-class. (Special case.) + # + # Ruby's prepend() doesn't check the full inheritance chain, so without this guard, + # the instrumentation could be added twice. + base.prepend(InstanceMethods) unless applied?(base) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + module InstanceMethods + # Wrap create method for non-streaming completions + def create(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + + tracer.in_span("Chat Completion") do |span| + # Track start time for time_to_first_token + start_time = Time.now + + # Initialize metadata hash + metadata = { + "provider" => "openai", + "endpoint" => "/v1/chat/completions" + } + + # Capture request metadata fields + metadata_fields = %i[ + model frequency_penalty logit_bias logprobs max_tokens n + presence_penalty response_format seed service_tier stop + stream stream_options temperature top_p top_logprobs + tools tool_choice parallel_tool_calls user functions function_call + ] + + metadata_fields.each do |field| + metadata[field.to_s] = params[field] if params.key?(field) + end + + # Set input messages as JSON + if params[:messages] + messages_array = params[:messages].map(&:to_h) + span.set_attribute("braintrust.input_json", JSON.generate(messages_array)) + end + + # Call the original method + response = super(**params) + + # Calculate time to first token + time_to_first_token = Time.now - start_time + + # Set output (choices) as JSON + if response.respond_to?(:choices) && response.choices&.any? + choices_array = response.choices.map(&:to_h) + span.set_attribute("braintrust.output_json", JSON.generate(choices_array)) + end + + # Set metrics (token usage with advanced details) + metrics = {} + if response.respond_to?(:usage) && response.usage + metrics = Common.parse_usage_tokens(response.usage) + end + # Add time_to_first_token metric + metrics["time_to_first_token"] = time_to_first_token + span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty? + + # Add response metadata fields + metadata["id"] = response.id if response.respond_to?(:id) && response.id + metadata["created"] = response.created if response.respond_to?(:created) && response.created + metadata["system_fingerprint"] = response.system_fingerprint if response.respond_to?(:system_fingerprint) && response.system_fingerprint + metadata["service_tier"] = response.service_tier if response.respond_to?(:service_tier) && response.service_tier + + # Set metadata ONCE at the end with complete hash + span.set_attribute("braintrust.metadata", JSON.generate(metadata)) + + response + end + end + + # Wrap stream_raw for streaming chat completions + def stream_raw(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + aggregated_chunks = [] + start_time = Time.now + time_to_first_token = nil + metadata = { + "provider" => "openai", + "endpoint" => "/v1/chat/completions" + } + + # Start span with proper context + span = tracer.start_span("Chat Completion") + + # Capture request metadata fields + metadata_fields = %i[ + model frequency_penalty logit_bias logprobs max_tokens n + presence_penalty response_format seed service_tier stop + stream stream_options temperature top_p top_logprobs + tools tool_choice parallel_tool_calls user functions function_call + ] + + metadata_fields.each do |field| + metadata[field.to_s] = params[field] if params.key?(field) + end + metadata["stream"] = true # Explicitly mark as streaming + + # Set input messages as JSON + if params[:messages] + messages_array = params[:messages].map(&:to_h) + span.set_attribute("braintrust.input_json", JSON.generate(messages_array)) + end + + # Set initial metadata + span.set_attribute("braintrust.metadata", JSON.generate(metadata)) + + # Call the original stream_raw method with error handling + begin + stream = super + rescue => e + # Record exception if stream creation fails + span.record_exception(e) + span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}") + span.finish + raise + end + + # Wrap the stream to aggregate chunks + original_each = stream.method(:each) + stream.define_singleton_method(:each) do |&block| + original_each.call do |chunk| + # Capture time to first token on first chunk + time_to_first_token ||= Time.now - start_time + aggregated_chunks << chunk.to_h + block&.call(chunk) + end + rescue => e + # Record exception if streaming fails + span.record_exception(e) + span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") + raise + ensure + # Always aggregate whatever chunks we collected and finish span + unless aggregated_chunks.empty? + aggregated_output = Common.aggregate_streaming_chunks(aggregated_chunks) + Common.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices]) + + # Set metrics if usage is included + metrics = {} + if aggregated_output[:usage] + metrics = Common.parse_usage_tokens(aggregated_output[:usage]) + end + # Add time_to_first_token metric + metrics["time_to_first_token"] = time_to_first_token || 0.0 + Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + + # Update metadata with response fields + metadata["id"] = aggregated_output[:id] if aggregated_output[:id] + metadata["created"] = aggregated_output[:created] if aggregated_output[:created] + metadata["model"] = aggregated_output[:model] if aggregated_output[:model] + metadata["system_fingerprint"] = aggregated_output[:system_fingerprint] if aggregated_output[:system_fingerprint] + Common.set_json_attr(span, "braintrust.metadata", metadata) + end + + span.finish + end + + stream + end + + # Wrap stream for streaming chat completions (returns ChatCompletionStream with convenience methods) + def stream(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + start_time = Time.now + time_to_first_token = nil + metadata = { + "provider" => "openai", + "endpoint" => "/v1/chat/completions" + } + + # Start span with proper context + span = tracer.start_span("Chat Completion") + + # Capture request metadata fields + metadata_fields = %i[ + model frequency_penalty logit_bias logprobs max_tokens n + presence_penalty response_format seed service_tier stop + stream stream_options temperature top_p top_logprobs + tools tool_choice parallel_tool_calls user functions function_call + ] + + metadata_fields.each do |field| + metadata[field.to_s] = params[field] if params.key?(field) + end + metadata["stream"] = true # Explicitly mark as streaming + + # Set input messages as JSON + if params[:messages] + messages_array = params[:messages].map(&:to_h) + span.set_attribute("braintrust.input_json", JSON.generate(messages_array)) + end + + # Set initial metadata + span.set_attribute("braintrust.metadata", JSON.generate(metadata)) + + # Call the original stream method with error handling + begin + stream = super + rescue => e + # Record exception if stream creation fails + span.record_exception(e) + span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}") + span.finish + raise + end + + # Local helper for setting JSON attributes + set_json_attr = ->(attr_name, obj) { Common.set_json_attr(span, attr_name, obj) } + + # Helper to extract metadata from SDK's internal snapshot + extract_stream_metadata = lambda do + # Access the SDK's internal accumulated completion snapshot + snapshot = stream.current_completion_snapshot + return unless snapshot + + # Set output from accumulated choices + if snapshot.choices&.any? + choices_array = snapshot.choices.map(&:to_h) + set_json_attr.call("braintrust.output_json", choices_array) + end + + # Set metrics if usage is available + metrics = {} + if snapshot.usage + metrics = Common.parse_usage_tokens(snapshot.usage) + end + # Add time_to_first_token metric + metrics["time_to_first_token"] = time_to_first_token || 0.0 + set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty? + + # Update metadata with response fields + metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id + metadata["created"] = snapshot.created if snapshot.respond_to?(:created) && snapshot.created + metadata["model"] = snapshot.model if snapshot.respond_to?(:model) && snapshot.model + metadata["system_fingerprint"] = snapshot.system_fingerprint if snapshot.respond_to?(:system_fingerprint) && snapshot.system_fingerprint + set_json_attr.call("braintrust.metadata", metadata) + end + + # Prevent double-finish of span + finish_braintrust_span = lambda do + return if stream.instance_variable_get(:@braintrust_span_finished) + stream.instance_variable_set(:@braintrust_span_finished, true) + extract_stream_metadata.call + span.finish + end + + # Wrap .each() method - this is the core consumption method + original_each = stream.method(:each) + stream.define_singleton_method(:each) do |&block| + original_each.call do |chunk| + # Capture time to first token on first chunk + time_to_first_token ||= Time.now - start_time + block&.call(chunk) + end + rescue => e + span.record_exception(e) + span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") + raise + ensure + finish_braintrust_span.call + end + + # Wrap .text() method - returns enumerable for text deltas + original_text = stream.method(:text) + stream.define_singleton_method(:text) do + text_enum = original_text.call + # Wrap the returned enumerable's .each method + original_text_each = text_enum.method(:each) + text_enum.define_singleton_method(:each) do |&block| + original_text_each.call do |delta| + # Capture time to first token on first delta + time_to_first_token ||= Time.now - start_time + block&.call(delta) + end + rescue => e + span.record_exception(e) + span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") + raise + ensure + finish_braintrust_span.call + end + text_enum + end + + stream + end + end + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/openai/instrumentation/common.rb b/lib/braintrust/contrib/openai/instrumentation/common.rb new file mode 100644 index 0000000..15c167c --- /dev/null +++ b/lib/braintrust/contrib/openai/instrumentation/common.rb @@ -0,0 +1,164 @@ +# frozen_string_literal: true + +require "json" + +require_relative "../../../trace/tokens" + +module Braintrust + module Contrib + module OpenAI + module Instrumentation + # Chat completions instrumentation for OpenAI. + # Provides modules that can be prepended to OpenAI::Client to instrument chat.completions API. + module Common + # Helper to safely set a JSON attribute on a span + # Only sets the attribute if obj is present + # @param span [OpenTelemetry::Trace::Span] the span to set attribute on + # @param attr_name [String] the attribute name (e.g., "braintrust.output_json") + # @param obj [Object] the object to serialize to JSON + # @return [void] + def self.set_json_attr(span, attr_name, obj) + return unless obj + span.set_attribute(attr_name, JSON.generate(obj)) + end + + # Parse usage tokens from OpenAI API response + # @param usage [Hash, Object] usage object from OpenAI response + # @return [Hash] metrics hash with normalized names + def self.parse_usage_tokens(usage) + Braintrust::Trace.parse_openai_usage_tokens(usage) + end + + # Aggregate streaming chunks into a single response structure + # Follows the Go SDK logic for aggregating deltas + # @param chunks [Array] array of chunk hashes from stream + # @return [Hash] aggregated response with choices, usage, etc. + def self.aggregate_streaming_chunks(chunks) + return {} if chunks.empty? + + # Initialize aggregated structure + aggregated = { + id: nil, + created: nil, + model: nil, + system_fingerprint: nil, + choices: [], + usage: nil + } + + # Track aggregated content and tool_calls for each choice index + choice_data = {} + + chunks.each do |chunk| + # Capture top-level fields from any chunk that has them + aggregated[:id] ||= chunk[:id] + aggregated[:created] ||= chunk[:created] + aggregated[:model] ||= chunk[:model] + aggregated[:system_fingerprint] ||= chunk[:system_fingerprint] + + # Aggregate usage (usually only in last chunk if stream_options.include_usage is set) + if chunk[:usage] + aggregated[:usage] = chunk[:usage] + end + + # Process choices + next unless chunk[:choices].is_a?(Array) + chunk[:choices].each do |choice| + index = choice[:index] || 0 + choice_data[index] ||= { + index: index, + role: nil, + content: +"", + tool_calls: [], + finish_reason: nil + } + + delta = choice[:delta] || {} + + # Aggregate role (set once from first delta that has it) + choice_data[index][:role] ||= delta[:role] + + # Aggregate content + if delta[:content] + choice_data[index][:content] << delta[:content] + end + + # Aggregate tool_calls (similar to Go SDK logic) + if delta[:tool_calls].is_a?(Array) && delta[:tool_calls].any? + delta[:tool_calls].each do |tool_call_delta| + # Check if this is a new tool call or continuation + if tool_call_delta[:id] && !tool_call_delta[:id].empty? + # New tool call + choice_data[index][:tool_calls] << { + id: tool_call_delta[:id], + type: tool_call_delta[:type], + function: { + name: tool_call_delta.dig(:function, :name) || +"", + arguments: tool_call_delta.dig(:function, :arguments) || +"" + } + } + elsif choice_data[index][:tool_calls].any? + # Continuation - append arguments to last tool call + last_tool_call = choice_data[index][:tool_calls].last + if tool_call_delta.dig(:function, :arguments) + last_tool_call[:function][:arguments] << tool_call_delta[:function][:arguments] + end + end + end + end + + # Capture finish_reason + if choice[:finish_reason] + choice_data[index][:finish_reason] = choice[:finish_reason] + end + end + end + + # Build final choices array + aggregated[:choices] = choice_data.values.sort_by { |c| c[:index] }.map do |choice| + message = { + role: choice[:role], + content: choice[:content].empty? ? nil : choice[:content] + } + + # Add tool_calls to message if any + message[:tool_calls] = choice[:tool_calls] if choice[:tool_calls].any? + + { + index: choice[:index], + message: message, + finish_reason: choice[:finish_reason] + } + end + + aggregated + end + + # Aggregate responses streaming events into a single response structure + # Follows similar logic to Python SDK's _postprocess_streaming_results + # @param events [Array] array of event objects from stream + # @return [Hash] aggregated response with output, usage, etc. + def self.aggregate_responses_events(events) + return {} if events.empty? + + # Find the response.completed event which has the final response + completed_event = events.find { |e| e.respond_to?(:type) && e.type == :"response.completed" } + + if completed_event&.respond_to?(:response) + response = completed_event.response + # Convert the response object to a hash-like structure for logging + return { + id: response.respond_to?(:id) ? response.id : nil, + output: response.respond_to?(:output) ? response.output : nil, + usage: response.respond_to?(:usage) ? response.usage : nil + } + end + + # Fallback if no completed event found + {} + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/openai/instrumentation/responses.rb b/lib/braintrust/contrib/openai/instrumentation/responses.rb new file mode 100644 index 0000000..dcbd30b --- /dev/null +++ b/lib/braintrust/contrib/openai/instrumentation/responses.rb @@ -0,0 +1,166 @@ +# frozen_string_literal: true + +require "opentelemetry/sdk" +require "json" + +require_relative "common" + +module Braintrust + module Contrib + module OpenAI + module Instrumentation + # Responses API instrumentation for OpenAI. + # Provides modules that can be prepended to OpenAI::Client to instrument responses API. + module Responses + def self.included(base) + # Guard against double-wrapping for: Check if patch is already in the ancestor chain. + # This prevents double instrumentation if class-level patching was already applied, + # and this patch is being applied to a singleton-class. (Special case.) + # + # Ruby's prepend() doesn't check the full inheritance chain, so without this guard, + # the instrumentation could be added twice. + base.prepend(InstanceMethods) unless base.ancestors.include?(InstanceMethods) + end + + def self.applied?(base) + base.ancestors.include?(InstanceMethods) + end + + module InstanceMethods + # Wrap non-streaming create method + def create(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + + tracer.in_span("openai.responses.create") do |span| + # Initialize metadata hash + metadata = { + "provider" => "openai", + "endpoint" => "/v1/responses" + } + + # Capture request metadata fields + metadata_fields = %i[ + model instructions modalities tools parallel_tool_calls + tool_choice temperature max_tokens top_p frequency_penalty + presence_penalty seed user metadata store response_format + ] + + metadata_fields.each do |field| + metadata[field.to_s] = params[field] if params.key?(field) + end + + # Set input as JSON + if params[:input] + span.set_attribute("braintrust.input_json", JSON.generate(params[:input])) + end + + # Call the original method + response = super(**params) + + # Set output as JSON + if response.respond_to?(:output) && response.output + span.set_attribute("braintrust.output_json", JSON.generate(response.output)) + end + + # Set metrics (token usage) + if response.respond_to?(:usage) && response.usage + metrics = Common.parse_usage_tokens(response.usage) + span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty? + end + + # Add response metadata fields + metadata["id"] = response.id if response.respond_to?(:id) && response.id + + # Set metadata ONCE at the end with complete hash + span.set_attribute("braintrust.metadata", JSON.generate(metadata)) + + response + end + end + + # Wrap streaming method + def stream(**params) + client = instance_variable_get(:@client) + tracer = Braintrust::Contrib.tracer_for(client) + aggregated_events = [] + metadata = { + "provider" => "openai", + "endpoint" => "/v1/responses", + "stream" => true + } + + # Start span with proper context + span = tracer.start_span("openai.responses.create") + + # Capture request metadata fields + metadata_fields = %i[ + model instructions modalities tools parallel_tool_calls + tool_choice temperature max_tokens top_p frequency_penalty + presence_penalty seed user metadata store response_format + ] + + metadata_fields.each do |field| + metadata[field.to_s] = params[field] if params.key?(field) + end + + # Set input as JSON + if params[:input] + span.set_attribute("braintrust.input_json", JSON.generate(params[:input])) + end + + # Set initial metadata + span.set_attribute("braintrust.metadata", JSON.generate(metadata)) + + # Call the original stream method with error handling + begin + stream = super + rescue => e + # Record exception if stream creation fails + span.record_exception(e) + span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}") + span.finish + raise + end + + # Wrap the stream to aggregate events + original_each = stream.method(:each) + stream.define_singleton_method(:each) do |&block| + original_each.call do |event| + # Store the actual event object (not converted to hash) + aggregated_events << event + block&.call(event) + end + rescue => e + # Record exception if streaming fails + span.record_exception(e) + span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") + raise + ensure + # Always aggregate whatever events we collected and finish span + unless aggregated_events.empty? + aggregated_output = Common.aggregate_responses_events(aggregated_events) + Common.set_json_attr(span, "braintrust.output_json", aggregated_output[:output]) if aggregated_output[:output] + + # Set metrics if usage is included + if aggregated_output[:usage] + metrics = Common.parse_usage_tokens(aggregated_output[:usage]) + Common.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? + end + + # Update metadata with response fields + metadata["id"] = aggregated_output[:id] if aggregated_output[:id] + Common.set_json_attr(span, "braintrust.metadata", metadata) + end + + span.finish + end + + stream + end + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/openai/integration.rb b/lib/braintrust/contrib/openai/integration.rb new file mode 100644 index 0000000..083f635 --- /dev/null +++ b/lib/braintrust/contrib/openai/integration.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require_relative "../integration" + +module Braintrust + module Contrib + module OpenAI + # OpenAI integration for automatic instrumentation. + # Instruments the official openai gem (not ruby-openai). + class Integration + include Braintrust::Contrib::Integration + + # @return [Symbol] Unique identifier for this integration + def self.integration_name + :openai + end + + # @return [Array] Gem names this integration supports + def self.gem_names + ["openai"] + end + + # @return [Array] Require paths for auto-instrument detection + def self.require_paths + ["openai"] + end + + # @return [String] Minimum compatible version + def self.minimum_version + "0.1.0" + end + + # Check if the official openai gem is loaded (not ruby-openai). + # The ruby-openai gem also uses "require 'openai'", so we need to distinguish them. + # @return [Boolean] true if official openai gem is available + def self.available? + # Check if "openai" gem is in loaded specs (official gem name) + return true if Gem.loaded_specs.key?("openai") + + # Also check $LOADED_FEATURES for files ending with /openai.rb + # and containing /openai- in the path (gem version in path) + # This helps distinguish from ruby-openai which has /ruby-openai-/ in path + $LOADED_FEATURES.any? do |feature| + feature.end_with?("/openai.rb") && feature.include?("/openai-") + end + end + + # Lazy-load the patcher only when actually patching. + # This keeps the integration stub lightweight. + # @return [Class] The patcher class + def self.patchers + require_relative "patcher" + [ChatPatcher, ResponsesPatcher] + end + end + end + end +end diff --git a/lib/braintrust/contrib/openai/patcher.rb b/lib/braintrust/contrib/openai/patcher.rb new file mode 100644 index 0000000..4d2b90b --- /dev/null +++ b/lib/braintrust/contrib/openai/patcher.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +require_relative "../patcher" +require_relative "instrumentation/chat" +require_relative "instrumentation/responses" + +module Braintrust + module Contrib + module OpenAI + # Patcher for OpenAI integration - implements class-level patching. + # All new OpenAI::Client instances created after patch! will be automatically instrumented. + class ChatPatcher < Braintrust::Contrib::Patcher + class << self + def applicable? + defined?(::OpenAI::Client) + end + + def patched?(**options) + # Use the target's singleton class if provided, otherwise check the base class. + target_class = get_singleton_class(options[:target]) || ::OpenAI::Resources::Chat::Completions + + Instrumentation::Chat::Completions.applied?(target_class) + end + + # Perform the actual patching. + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + def perform_patch(**options) + return unless applicable? + + if options[:target] + # Instance-level (for only this client) + raise ArgumentError, "target must be a kind of ::OpenAI::Client" unless options[:target].is_a?(::OpenAI::Client) + + get_singleton_class(options[:target]).include(Instrumentation::Chat::Completions) + else + # Class-level (for all clients) + ::OpenAI::Resources::Chat::Completions.include(Instrumentation::Chat::Completions) + end + end + + private + + def get_singleton_class(client) + client&.chat&.completions&.singleton_class + end + end + end + + # Patcher for OpenAI integration - implements class-level patching. + # All new OpenAI::Client instances created after patch! will be automatically instrumented. + class ResponsesPatcher < Braintrust::Contrib::Patcher + class << self + def applicable? + defined?(::OpenAI::Client) && ::OpenAI::Client.instance_methods.include?(:responses) + end + + def patched?(**options) + # Use the target's singleton class if provided, otherwise check the base class. + target_class = get_singleton_class(options[:target]) || ::OpenAI::Resources::Responses + + Instrumentation::Responses.applied?(target_class) + end + + # Perform the actual patching. + # @param options [Hash] Configuration options passed from integration + # @option options [Object] :target Optional target instance to patch + # @option options [OpenTelemetry::SDK::Trace::TracerProvider] :tracer_provider Optional tracer provider + # @return [void] + def perform_patch(**options) + return unless applicable? + + if options[:target] + # Instance-level (for only this client) + raise ArgumentError, "target must be a kind of ::OpenAI::Client" unless options[:target].is_a?(::OpenAI::Client) + + get_singleton_class(options[:target]).include(Instrumentation::Responses) + else + # Class-level (for all clients) + ::OpenAI::Resources::Responses.include(Instrumentation::Responses) + end + end + + private + + def get_singleton_class(client) + client&.responses&.singleton_class + end + end + end + end + end +end diff --git a/lib/braintrust/trace/contrib/openai.rb b/lib/braintrust/trace/contrib/openai.rb index 5eb0a29..872f446 100644 --- a/lib/braintrust/trace/contrib/openai.rb +++ b/lib/braintrust/trace/contrib/openai.rb @@ -1,611 +1,24 @@ # frozen_string_literal: true -require "opentelemetry/sdk" -require "json" -require_relative "../tokens" +# Backward compatibility shim for the old OpenAI integration API. +# All instrumentation logic has been moved to lib/braintrust/contrib/openai/ +# This file now just delegates to the new API. + +require_relative "../../contrib" module Braintrust module Trace module OpenAI - # Helper to safely set a JSON attribute on a span - # Only sets the attribute if obj is present - # @param span [OpenTelemetry::Trace::Span] the span to set attribute on - # @param attr_name [String] the attribute name (e.g., "braintrust.output_json") - # @param obj [Object] the object to serialize to JSON - # @return [void] - def self.set_json_attr(span, attr_name, obj) - return unless obj - span.set_attribute(attr_name, JSON.generate(obj)) - end - - # Parse usage tokens from OpenAI API response - # @param usage [Hash, Object] usage object from OpenAI response - # @return [Hash] metrics hash with normalized names - def self.parse_usage_tokens(usage) - Braintrust::Trace.parse_openai_usage_tokens(usage) - end - - # Aggregate streaming chunks into a single response structure - # Follows the Go SDK logic for aggregating deltas - # @param chunks [Array] array of chunk hashes from stream - # @return [Hash] aggregated response with choices, usage, etc. - def self.aggregate_streaming_chunks(chunks) - return {} if chunks.empty? - - # Initialize aggregated structure - aggregated = { - id: nil, - created: nil, - model: nil, - system_fingerprint: nil, - choices: [], - usage: nil - } - - # Track aggregated content and tool_calls for each choice index - choice_data = {} - - chunks.each do |chunk| - # Capture top-level fields from any chunk that has them - aggregated[:id] ||= chunk[:id] - aggregated[:created] ||= chunk[:created] - aggregated[:model] ||= chunk[:model] - aggregated[:system_fingerprint] ||= chunk[:system_fingerprint] - - # Aggregate usage (usually only in last chunk if stream_options.include_usage is set) - if chunk[:usage] - aggregated[:usage] = chunk[:usage] - end - - # Process choices - next unless chunk[:choices].is_a?(Array) - chunk[:choices].each do |choice| - index = choice[:index] || 0 - choice_data[index] ||= { - index: index, - role: nil, - content: +"", - tool_calls: [], - finish_reason: nil - } - - delta = choice[:delta] || {} - - # Aggregate role (set once from first delta that has it) - choice_data[index][:role] ||= delta[:role] - - # Aggregate content - if delta[:content] - choice_data[index][:content] << delta[:content] - end - - # Aggregate tool_calls (similar to Go SDK logic) - if delta[:tool_calls].is_a?(Array) && delta[:tool_calls].any? - delta[:tool_calls].each do |tool_call_delta| - # Check if this is a new tool call or continuation - if tool_call_delta[:id] && !tool_call_delta[:id].empty? - # New tool call - choice_data[index][:tool_calls] << { - id: tool_call_delta[:id], - type: tool_call_delta[:type], - function: { - name: tool_call_delta.dig(:function, :name) || +"", - arguments: tool_call_delta.dig(:function, :arguments) || +"" - } - } - elsif choice_data[index][:tool_calls].any? - # Continuation - append arguments to last tool call - last_tool_call = choice_data[index][:tool_calls].last - if tool_call_delta.dig(:function, :arguments) - last_tool_call[:function][:arguments] << tool_call_delta[:function][:arguments] - end - end - end - end - - # Capture finish_reason - if choice[:finish_reason] - choice_data[index][:finish_reason] = choice[:finish_reason] - end - end - end - - # Build final choices array - aggregated[:choices] = choice_data.values.sort_by { |c| c[:index] }.map do |choice| - message = { - role: choice[:role], - content: choice[:content].empty? ? nil : choice[:content] - } - - # Add tool_calls to message if any - message[:tool_calls] = choice[:tool_calls] if choice[:tool_calls].any? - - { - index: choice[:index], - message: message, - finish_reason: choice[:finish_reason] - } - end - - aggregated - end - - # Wrap an OpenAI::Client to automatically create spans for chat completions and responses - # Supports both synchronous and streaming requests + # Wrap an OpenAI::Client to automatically create spans for chat completions and responses. + # This is the legacy API - delegates to the new contrib framework. + # # @param client [OpenAI::Client] the OpenAI client to wrap # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider (defaults to global) + # @return [OpenAI::Client] the wrapped client def self.wrap(client, tracer_provider: nil) - tracer_provider ||= ::OpenTelemetry.tracer_provider - - # Wrap chat completions - wrap_chat_completions(client, tracer_provider) - - # Wrap responses API if available - wrap_responses(client, tracer_provider) if client.respond_to?(:responses) - + Braintrust::Contrib.instrument!(:openai, target: client, tracer_provider: tracer_provider) client end - - # Wrap chat completions API - # @param client [OpenAI::Client] the OpenAI client - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider - def self.wrap_chat_completions(client, tracer_provider) - # Create a wrapper module that intercepts chat.completions.create - wrapper = Module.new do - define_method(:create) do |**params| - tracer = tracer_provider.tracer("braintrust") - - tracer.in_span("Chat Completion") do |span| - # Track start time for time_to_first_token - start_time = Time.now - - # Initialize metadata hash - metadata = { - "provider" => "openai", - "endpoint" => "/v1/chat/completions" - } - - # Capture request metadata fields - metadata_fields = %i[ - model frequency_penalty logit_bias logprobs max_tokens n - presence_penalty response_format seed service_tier stop - stream stream_options temperature top_p top_logprobs - tools tool_choice parallel_tool_calls user functions function_call - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - - # Set input messages as JSON - # Pass through all message fields to preserve tool_calls, tool_call_id, name, etc. - if params[:messages] - messages_array = params[:messages].map(&:to_h) - span.set_attribute("braintrust.input_json", JSON.generate(messages_array)) - end - - # Call the original method - response = super(**params) - - # Calculate time to first token - time_to_first_token = Time.now - start_time - - # Set output (choices) as JSON - # Use to_h to get the raw structure with all fields (including tool_calls) - if response.respond_to?(:choices) && response.choices&.any? - choices_array = response.choices.map(&:to_h) - span.set_attribute("braintrust.output_json", JSON.generate(choices_array)) - end - - # Set metrics (token usage with advanced details) - metrics = {} - if response.respond_to?(:usage) && response.usage - metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(response.usage) - end - # Add time_to_first_token metric - metrics["time_to_first_token"] = time_to_first_token - span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty? - - # Add response metadata fields - metadata["id"] = response.id if response.respond_to?(:id) && response.id - metadata["created"] = response.created if response.respond_to?(:created) && response.created - metadata["system_fingerprint"] = response.system_fingerprint if response.respond_to?(:system_fingerprint) && response.system_fingerprint - metadata["service_tier"] = response.service_tier if response.respond_to?(:service_tier) && response.service_tier - - # Set metadata ONCE at the end with complete hash - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - response - end - end - - # Wrap stream_raw for streaming chat completions - define_method(:stream_raw) do |**params| - tracer = tracer_provider.tracer("braintrust") - aggregated_chunks = [] - start_time = Time.now - time_to_first_token = nil - metadata = { - "provider" => "openai", - "endpoint" => "/v1/chat/completions" - } - - # Start span with proper context (will be child of current span if any) - span = tracer.start_span("Chat Completion") - - # Capture request metadata fields - metadata_fields = %i[ - model frequency_penalty logit_bias logprobs max_tokens n - presence_penalty response_format seed service_tier stop - stream stream_options temperature top_p top_logprobs - tools tool_choice parallel_tool_calls user functions function_call - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - metadata["stream"] = true # Explicitly mark as streaming - - # Set input messages as JSON - if params[:messages] - messages_array = params[:messages].map(&:to_h) - span.set_attribute("braintrust.input_json", JSON.generate(messages_array)) - end - - # Set initial metadata - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - # Call the original stream_raw method with error handling - begin - stream = super(**params) - rescue => e - # Record exception if stream creation fails - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}") - span.finish - raise - end - - # Wrap the stream to aggregate chunks - original_each = stream.method(:each) - stream.define_singleton_method(:each) do |&block| - original_each.call do |chunk| - # Capture time to first token on first chunk - time_to_first_token ||= Time.now - start_time - aggregated_chunks << chunk.to_h - block&.call(chunk) - end - rescue => e - # Record exception if streaming fails - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") - raise - ensure - # Always aggregate whatever chunks we collected and finish span - # This runs on normal completion, break, or exception - unless aggregated_chunks.empty? - aggregated_output = Braintrust::Trace::OpenAI.aggregate_streaming_chunks(aggregated_chunks) - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.output_json", aggregated_output[:choices]) - - # Set metrics if usage is included (requires stream_options.include_usage) - metrics = {} - if aggregated_output[:usage] - metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(aggregated_output[:usage]) - end - # Add time_to_first_token metric - metrics["time_to_first_token"] = time_to_first_token || 0.0 - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? - - # Update metadata with response fields - metadata["id"] = aggregated_output[:id] if aggregated_output[:id] - metadata["created"] = aggregated_output[:created] if aggregated_output[:created] - metadata["model"] = aggregated_output[:model] if aggregated_output[:model] - metadata["system_fingerprint"] = aggregated_output[:system_fingerprint] if aggregated_output[:system_fingerprint] - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metadata", metadata) - end - - span.finish - end - - stream - end - - # Wrap stream for streaming chat completions (returns ChatCompletionStream with convenience methods) - define_method(:stream) do |**params| - tracer = tracer_provider.tracer("braintrust") - start_time = Time.now - time_to_first_token = nil - metadata = { - "provider" => "openai", - "endpoint" => "/v1/chat/completions" - } - - # Start span with proper context (will be child of current span if any) - span = tracer.start_span("Chat Completion") - - # Capture request metadata fields - metadata_fields = %i[ - model frequency_penalty logit_bias logprobs max_tokens n - presence_penalty response_format seed service_tier stop - stream stream_options temperature top_p top_logprobs - tools tool_choice parallel_tool_calls user functions function_call - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - metadata["stream"] = true # Explicitly mark as streaming - - # Set input messages as JSON - if params[:messages] - messages_array = params[:messages].map(&:to_h) - span.set_attribute("braintrust.input_json", JSON.generate(messages_array)) - end - - # Set initial metadata - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - # Call the original stream method with error handling - begin - stream = super(**params) - rescue => e - # Record exception if stream creation fails - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}") - span.finish - raise - end - - # Local helper for setting JSON attributes - set_json_attr = ->(attr_name, obj) { Braintrust::Trace::OpenAI.set_json_attr(span, attr_name, obj) } - - # Helper to extract metadata from SDK's internal snapshot - extract_stream_metadata = lambda do - # Access the SDK's internal accumulated completion snapshot - snapshot = stream.current_completion_snapshot - return unless snapshot - - # Set output from accumulated choices - if snapshot.choices&.any? - choices_array = snapshot.choices.map(&:to_h) - set_json_attr.call("braintrust.output_json", choices_array) - end - - # Set metrics if usage is available - metrics = {} - if snapshot.usage - metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(snapshot.usage) - end - # Add time_to_first_token metric - metrics["time_to_first_token"] = time_to_first_token || 0.0 - set_json_attr.call("braintrust.metrics", metrics) unless metrics.empty? - - # Update metadata with response fields - metadata["id"] = snapshot.id if snapshot.respond_to?(:id) && snapshot.id - metadata["created"] = snapshot.created if snapshot.respond_to?(:created) && snapshot.created - metadata["model"] = snapshot.model if snapshot.respond_to?(:model) && snapshot.model - metadata["system_fingerprint"] = snapshot.system_fingerprint if snapshot.respond_to?(:system_fingerprint) && snapshot.system_fingerprint - set_json_attr.call("braintrust.metadata", metadata) - end - - # Prevent double-finish of span - finish_braintrust_span = lambda do - return if stream.instance_variable_get(:@braintrust_span_finished) - stream.instance_variable_set(:@braintrust_span_finished, true) - extract_stream_metadata.call - span.finish - end - - # Wrap .each() method - this is the core consumption method - original_each = stream.method(:each) - stream.define_singleton_method(:each) do |&block| - original_each.call do |chunk| - # Capture time to first token on first chunk - time_to_first_token ||= Time.now - start_time - block&.call(chunk) - end - rescue => e - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") - raise - ensure - finish_braintrust_span.call - end - - # Wrap .text() method - returns enumerable for text deltas - original_text = stream.method(:text) - stream.define_singleton_method(:text) do - text_enum = original_text.call - # Wrap the returned enumerable's .each method - original_text_each = text_enum.method(:each) - text_enum.define_singleton_method(:each) do |&block| - original_text_each.call do |delta| - # Capture time to first token on first delta - time_to_first_token ||= Time.now - start_time - block&.call(delta) - end - rescue => e - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") - raise - ensure - finish_braintrust_span.call - end - text_enum - end - - stream - end - end - - # Prepend the wrapper to the completions resource - client.chat.completions.singleton_class.prepend(wrapper) - end - - # Wrap responses API - # @param client [OpenAI::Client] the OpenAI client - # @param tracer_provider [OpenTelemetry::SDK::Trace::TracerProvider] the tracer provider - def self.wrap_responses(client, tracer_provider) - # Create a wrapper module that intercepts responses.create and responses.stream - wrapper = Module.new do - # Wrap non-streaming create method - define_method(:create) do |**params| - tracer = tracer_provider.tracer("braintrust") - - tracer.in_span("openai.responses.create") do |span| - # Initialize metadata hash - metadata = { - "provider" => "openai", - "endpoint" => "/v1/responses" - } - - # Capture request metadata fields - metadata_fields = %i[ - model instructions modalities tools parallel_tool_calls - tool_choice temperature max_tokens top_p frequency_penalty - presence_penalty seed user metadata store response_format - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - - # Set input as JSON - if params[:input] - span.set_attribute("braintrust.input_json", JSON.generate(params[:input])) - end - - # Call the original method - response = super(**params) - - # Set output as JSON - if response.respond_to?(:output) && response.output - span.set_attribute("braintrust.output_json", JSON.generate(response.output)) - end - - # Set metrics (token usage) - if response.respond_to?(:usage) && response.usage - metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(response.usage) - span.set_attribute("braintrust.metrics", JSON.generate(metrics)) unless metrics.empty? - end - - # Add response metadata fields - metadata["id"] = response.id if response.respond_to?(:id) && response.id - - # Set metadata ONCE at the end with complete hash - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - response - end - end - - # Wrap streaming method - define_method(:stream) do |**params| - tracer = tracer_provider.tracer("braintrust") - aggregated_events = [] - metadata = { - "provider" => "openai", - "endpoint" => "/v1/responses", - "stream" => true - } - - # Start span with proper context - span = tracer.start_span("openai.responses.create") - - # Capture request metadata fields - metadata_fields = %i[ - model instructions modalities tools parallel_tool_calls - tool_choice temperature max_tokens top_p frequency_penalty - presence_penalty seed user metadata store response_format - ] - - metadata_fields.each do |field| - metadata[field.to_s] = params[field] if params.key?(field) - end - - # Set input as JSON - if params[:input] - span.set_attribute("braintrust.input_json", JSON.generate(params[:input])) - end - - # Set initial metadata - span.set_attribute("braintrust.metadata", JSON.generate(metadata)) - - # Call the original stream method with error handling - begin - stream = super(**params) - rescue => e - # Record exception if stream creation fails - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("OpenAI API error: #{e.message}") - span.finish - raise - end - - # Wrap the stream to aggregate events - original_each = stream.method(:each) - stream.define_singleton_method(:each) do |&block| - original_each.call do |event| - # Store the actual event object (not converted to hash) - aggregated_events << event - block&.call(event) - end - rescue => e - # Record exception if streaming fails - span.record_exception(e) - span.status = ::OpenTelemetry::Trace::Status.error("Streaming error: #{e.message}") - raise - ensure - # Always aggregate whatever events we collected and finish span - unless aggregated_events.empty? - aggregated_output = Braintrust::Trace::OpenAI.aggregate_responses_events(aggregated_events) - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.output_json", aggregated_output[:output]) if aggregated_output[:output] - - # Set metrics if usage is included - if aggregated_output[:usage] - metrics = Braintrust::Trace::OpenAI.parse_usage_tokens(aggregated_output[:usage]) - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metrics", metrics) unless metrics.empty? - end - - # Update metadata with response fields - metadata["id"] = aggregated_output[:id] if aggregated_output[:id] - Braintrust::Trace::OpenAI.set_json_attr(span, "braintrust.metadata", metadata) - end - - span.finish - end - - stream - end - end - - # Prepend the wrapper to the responses resource - client.responses.singleton_class.prepend(wrapper) - end - - # Aggregate responses streaming events into a single response structure - # Follows similar logic to Python SDK's _postprocess_streaming_results - # @param events [Array] array of event objects from stream - # @return [Hash] aggregated response with output, usage, etc. - def self.aggregate_responses_events(events) - return {} if events.empty? - - # Find the response.completed event which has the final response - completed_event = events.find { |e| e.respond_to?(:type) && e.type == :"response.completed" } - - if completed_event&.respond_to?(:response) - response = completed_event.response - # Convert the response object to a hash-like structure for logging - return { - id: response.respond_to?(:id) ? response.id : nil, - output: response.respond_to?(:output) ? response.output : nil, - usage: response.respond_to?(:usage) ? response.usage : nil - } - end - - # Fallback if no completed event found - {} - end end end end diff --git a/test/braintrust/contrib/openai/integration_test.rb b/test/braintrust/contrib/openai/integration_test.rb new file mode 100644 index 0000000..289a454 --- /dev/null +++ b/test/braintrust/contrib/openai/integration_test.rb @@ -0,0 +1,140 @@ +# frozen_string_literal: true + +require "test_helper" + +class Braintrust::Contrib::OpenAI::IntegrationTest < Minitest::Test + def setup + @integration = Braintrust::Contrib::OpenAI::Integration + end + + def test_integration_name + assert_equal :openai, @integration.integration_name + end + + def test_gem_names + assert_equal ["openai"], @integration.gem_names + end + + def test_require_paths + assert_equal ["openai"], @integration.require_paths + end + + def test_minimum_version + assert_equal "0.1.0", @integration.minimum_version + end + + def test_maximum_version + assert_nil @integration.maximum_version + end + + def test_available_when_openai_gem_loaded + # Mock Gem.loaded_specs to include "openai" gem + original_specs = Gem.loaded_specs.dup + begin + Gem.loaded_specs["openai"] = Gem::Specification.new("openai", "1.0.0") + assert @integration.available?, "Should be available when openai gem is in loaded_specs" + ensure + Gem.loaded_specs.replace(original_specs) + end + end + + def test_available_with_loaded_features + # Skip this test if openai gem is actually loaded (we can't properly mock it) + skip "OpenAI gem is loaded, can't test $LOADED_FEATURES path in isolation" if Gem.loaded_specs.key?("openai") + + # Mock $LOADED_FEATURES to include openai gem path + # This simulates the gem being loaded via require 'openai' + original_features = $LOADED_FEATURES.dup + begin + $LOADED_FEATURES.replace(["/path/to/gems/openai-1.0.0/lib/openai.rb"]) + + assert @integration.available?, "Should be available when openai.rb is in $LOADED_FEATURES with openai- in path" + ensure + $LOADED_FEATURES.replace(original_features) + end + end + + def test_not_available_when_ruby_openai_gem_loaded + # The ruby-openai gem also uses 'openai' in require path but has different gem name + # It should NOT match because: + # 1. Gem.loaded_specs won't have "openai" key (it has "ruby-openai") + # 2. $LOADED_FEATURES will have /ruby-openai-/ in path, not /openai-/ + original_specs = Gem.loaded_specs.dup + original_features = $LOADED_FEATURES.dup + begin + # Clear any openai gem + Gem.loaded_specs.delete("openai") + # Add ruby-openai gem + Gem.loaded_specs["ruby-openai"] = Gem::Specification.new("ruby-openai", "1.0.0") + # Add to $LOADED_FEATURES with ruby-openai path + $LOADED_FEATURES.replace(["/path/to/gems/ruby-openai-1.0.0/lib/openai.rb"]) + + refute @integration.available?, "Should NOT be available when only ruby-openai gem is loaded" + ensure + Gem.loaded_specs.replace(original_specs) + $LOADED_FEATURES.replace(original_features) + end + end + + def test_not_available_when_no_gem_loaded + # Mock Gem.loaded_specs and $LOADED_FEATURES to have no openai + original_specs = Gem.loaded_specs.dup + original_features = $LOADED_FEATURES.dup + begin + Gem.loaded_specs.delete("openai") + $LOADED_FEATURES.replace(["/some/other/gem.rb"]) + + refute @integration.available?, "Should not be available when openai gem is not loaded" + ensure + Gem.loaded_specs.replace(original_specs) + $LOADED_FEATURES.replace(original_features) + end + end + + def test_compatible_when_available + skip "OpenAI gem not available" unless defined?(::OpenAI) + skip "ruby-openai gem loaded instead" if Gem.loaded_specs["ruby-openai"] + + # If openai gem is actually loaded in test environment + if @integration.available? + assert @integration.compatible?, "Should be compatible when available and version is acceptable" + end + end + + def test_patchers_lazy_loads + # The patchers should not be loaded until we call patchers method + # We can't easily test this without unloading the constants, so we'll just + # verify that patchers returns an array of classes + patcher_classes = @integration.patchers + assert patcher_classes.is_a?(Array), "patchers should return an Array" + assert patcher_classes.length > 0, "patchers should return at least one patcher" + patcher_classes.each do |patcher_class| + assert patcher_class.is_a?(Class), "each patcher should be a Class" + assert patcher_class < Braintrust::Contrib::Patcher, "each patcher should inherit from Patcher" + end + end + + def test_patch_returns_false_when_not_available + with_stubbed_singleton_method(@integration, :available?, -> { false }) do + result = @integration.patch!(tracer_provider: nil) + refute result, "patch! should return false when not available" + end + end + + def test_register_adds_to_registry + # Clear registry for clean test + registry = Braintrust::Contrib::Registry.instance + registry.clear! + + # Register the integration + @integration.register! + + # Verify it's in the registry + assert_equal @integration, registry[:openai] + assert registry.all.include?(@integration) + ensure + registry.clear! + # Re-register for other tests + @integration.register! + end +end diff --git a/test/braintrust/contrib/openai/patcher_feature_parity_test.rb b/test/braintrust/contrib/openai/patcher_feature_parity_test.rb new file mode 100644 index 0000000..0c6d20c --- /dev/null +++ b/test/braintrust/contrib/openai/patcher_feature_parity_test.rb @@ -0,0 +1,1073 @@ +# frozen_string_literal: true + +require "test_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/openai/patcher" + +class Braintrust::Contrib::OpenAI::PatcherFeatureParityTest < Minitest::Test + def setup + # Skip all OpenAI tests if the gem is not available + skip "OpenAI gem not available" unless defined?(OpenAI) + + # Check which gem is loaded by looking at Gem.loaded_specs + # ruby-openai has gem name "ruby-openai" + # official openai gem has gem name "openai" + if Gem.loaded_specs["ruby-openai"] + skip "openai gem not available (found ruby-openai gem instead)" + elsif !Gem.loaded_specs["openai"] + skip "Could not determine which OpenAI gem is loaded" + end + end + + # No teardown needed - patchers are idempotent + + def test_wrap_creates_span_for_chat_completions + VCR.use_cassette("openai/chat_completions") do + require "openai" + + # Set up test rig (includes Braintrust processor) + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Make a simple chat completion request with additional params to test metadata capture + response = client.chat.completions.create( + messages: [ + {role: "system", content: "You are a test assistant."}, + {role: "user", content: "Say 'test'"} + ], + model: "gpt-4o-mini", + max_tokens: 10, + temperature: 0.5 + ) + + # Verify response + refute_nil response + refute_nil response.choices[0].message.content + + # Drain and verify span + span = rig.drain_one + + # Verify span name matches Go SDK + assert_equal "Chat Completion", span.name + + # Verify braintrust.input_json contains messages + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 2, input.length + assert_equal "system", input[0]["role"] + assert_equal "You are a test assistant.", input[0]["content"] + assert_equal "user", input[1]["role"] + assert_equal "Say 'test'", input[1]["content"] + + # Verify braintrust.output_json contains choices + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + assert_equal 0, output[0]["index"] + assert_equal "assistant", output[0]["message"]["role"] + refute_nil output[0]["message"]["content"] + refute_nil output[0]["finish_reason"] + + # Verify braintrust.metadata contains request and response metadata + assert span.attributes.key?("braintrust.metadata") + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal "/v1/chat/completions", metadata["endpoint"] + assert_equal "gpt-4o-mini", metadata["model"] + assert_equal 10, metadata["max_tokens"] + assert_equal 0.5, metadata["temperature"] + refute_nil metadata["id"] + refute_nil metadata["created"] + + # Verify braintrust.metrics contains token usage + assert span.attributes.key?("braintrust.metrics") + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics["prompt_tokens"] > 0 + assert metrics["completion_tokens"] > 0 + assert metrics["tokens"] > 0 + assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] + + # Verify time_to_first_token metric is present + assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" + assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0" + end + end + + def test_wrap_handles_vision_messages_with_image_url + VCR.use_cassette("openai/vision") do + require "openai" + + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Make a vision request with content array (image_url + text) + response = client.chat.completions.create( + messages: [ + { + role: "user", + content: [ + {type: "text", text: "What color is this image?"}, + { + type: "image_url", + image_url: { + url: "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/320px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + } + } + ] + } + ], + model: "gpt-4o-mini", + max_tokens: 50 + ) + + # Verify response + refute_nil response + refute_nil response.choices[0].message.content + + # Drain and verify span + span = rig.drain_one + + # Verify span name + assert_equal "Chat Completion", span.name + + # Verify braintrust.input_json contains messages with content array + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 1, input.length + assert_equal "user", input[0]["role"] + + # Content should be an array, not a string + assert_instance_of Array, input[0]["content"] + assert_equal 2, input[0]["content"].length + + # First element should be text + assert_equal "text", input[0]["content"][0]["type"] + assert_equal "What color is this image?", input[0]["content"][0]["text"] + + # Second element should be image_url + assert_equal "image_url", input[0]["content"][1]["type"] + assert input[0]["content"][1]["image_url"].key?("url") + assert_match(/wikimedia/, input[0]["content"][1]["image_url"]["url"]) + + # Verify output still works + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + refute_nil output[0]["message"]["content"] + end + end + + def test_wrap_handles_tool_messages_with_tool_call_id + VCR.use_cassette("openai/tool_messages") do + require "openai" + + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # First request - model will use a tool + tools = [ + { + type: "function", + function: { + name: "get_weather", + description: "Get the current weather", + parameters: { + type: "object", + properties: { + location: {type: "string", description: "City name"} + }, + required: ["location"] + } + } + } + ] + + first_response = client.chat.completions.create( + messages: [ + {role: "user", content: "What's the weather in Paris?"} + ], + model: "gpt-4o-mini", + tools: tools, + max_tokens: 100 + ) + + # Get the tool call from response + tool_call = first_response.choices[0].message.tool_calls&.first + skip "Model didn't call tool" unless tool_call + + # Second request - provide tool result with tool_call_id + second_response = client.chat.completions.create( + messages: [ + {role: "user", content: "What's the weather in Paris?"}, + { + role: "assistant", + content: nil, + tool_calls: [ + { + id: tool_call.id, + type: "function", + function: { + name: tool_call.function.name, + arguments: tool_call.function.arguments + } + } + ] + }, + { + role: "tool", + tool_call_id: tool_call.id, + content: "Sunny, 22°C" + } + ], + model: "gpt-4o-mini", + tools: tools, + max_tokens: 100 + ) + + # Verify response + refute_nil second_response + refute_nil second_response.choices[0].message.content + + # Drain all spans (we have 2: first request + second request) + spans = rig.drain + assert_equal 2, spans.length, "Should have 2 spans (one for each request)" + + # We're testing the second span (second request) + span = spans[1] + + # Verify braintrust.input_json contains all message fields + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 3, input.length + + # First message: user + assert_equal "user", input[0]["role"] + assert_equal "What's the weather in Paris?", input[0]["content"] + + # Second message: assistant with tool_calls + assert_equal "assistant", input[1]["role"] + assert input[1].key?("tool_calls"), "assistant message should have tool_calls" + assert_equal 1, input[1]["tool_calls"].length + assert_equal tool_call.id, input[1]["tool_calls"][0]["id"] + assert_equal "function", input[1]["tool_calls"][0]["type"] + assert_equal tool_call.function.name, input[1]["tool_calls"][0]["function"]["name"] + + # Third message: tool response with tool_call_id + assert_equal "tool", input[2]["role"] + assert_equal tool_call.id, input[2]["tool_call_id"], "tool message should preserve tool_call_id" + assert_equal "Sunny, 22°C", input[2]["content"] + + # Verify output contains tool_calls + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + refute_nil output[0]["message"]["content"] + end + end + + def test_wrap_parses_advanced_token_metrics + VCR.use_cassette("openai/advanced_tokens") do + require "openai" + + # This test verifies that we properly parse token_details fields + # Note: We're testing with a mock since we can't control what OpenAI returns + + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Make a request (ideally with a model that returns detailed metrics) + # For now, we'll just make a normal request and verify the metrics structure + response = client.chat.completions.create( + messages: [ + {role: "user", content: "test"} + ], + model: "gpt-4o-mini", + max_tokens: 10 + ) + + # Verify response + refute_nil response + + # Drain and verify span + span = rig.drain_one + + # Verify braintrust.metrics exists + assert span.attributes.key?("braintrust.metrics") + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + + # Basic metrics should always be present + assert metrics["prompt_tokens"] > 0 + assert metrics["completion_tokens"] > 0 + assert metrics["tokens"] > 0 + + # If the response includes token_details, they should be parsed with correct naming + # The response.usage object may have: + # - prompt_tokens_details.cached_tokens → prompt_cached_tokens + # - prompt_tokens_details.audio_tokens → prompt_audio_tokens + # - completion_tokens_details.reasoning_tokens → completion_reasoning_tokens + # - completion_tokens_details.audio_tokens → completion_audio_tokens + # + # We can't force OpenAI to return these, but if they exist, we verify the naming + + if response.usage.respond_to?(:prompt_tokens_details) && response.usage.prompt_tokens_details + details = response.usage.prompt_tokens_details + if details.respond_to?(:cached_tokens) && details.cached_tokens + assert metrics.key?("prompt_cached_tokens"), "Should have prompt_cached_tokens" + assert_equal details.cached_tokens, metrics["prompt_cached_tokens"] + end + end + + if response.usage.respond_to?(:completion_tokens_details) && response.usage.completion_tokens_details + details = response.usage.completion_tokens_details + if details.respond_to?(:reasoning_tokens) && details.reasoning_tokens + assert metrics.key?("completion_reasoning_tokens"), "Should have completion_reasoning_tokens" + assert_equal details.reasoning_tokens, metrics["completion_reasoning_tokens"] + end + end + end + end + + def test_wrap_handles_streaming_chat_completions + VCR.use_cassette("openai/streaming") do + require "openai" + + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Make a streaming request + stream = client.chat.completions.stream_raw( + messages: [ + {role: "user", content: "Count from 1 to 3"} + ], + model: "gpt-4o-mini", + max_tokens: 50, + stream_options: { + include_usage: true # Request usage stats in stream + } + ) + + # Consume the stream + full_content = "" + stream.each do |chunk| + delta_content = chunk.choices[0]&.delta&.content + full_content += delta_content if delta_content + end + + # Verify we got content + refute_empty full_content + + # Drain and verify span + span = rig.drain_one + + # Verify span name + assert_equal "Chat Completion", span.name + + # Verify input was captured + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 1, input.length + assert_equal "user", input[0]["role"] + assert_equal "Count from 1 to 3", input[0]["content"] + + # Verify output was aggregated from stream + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + assert_equal 0, output[0]["index"] + assert_equal "assistant", output[0]["message"]["role"] + assert output[0]["message"]["content"], "Should have aggregated content" + assert output[0]["message"]["content"].length > 0, "Content should not be empty" + + # Verify metadata includes stream flag + assert span.attributes.key?("braintrust.metadata") + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal true, metadata["stream"] + assert_match(/gpt-4o-mini/, metadata["model"]) # Model may include version suffix + + # Verify metrics include time_to_first_token and usage tokens + assert span.attributes.key?("braintrust.metrics"), "Should have braintrust.metrics" + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" + assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0" + + # Verify usage metrics are present (when stream_options.include_usage is set) + assert metrics.key?("prompt_tokens"), "Should have prompt_tokens metric" + assert metrics["prompt_tokens"] > 0, "prompt_tokens should be > 0" + assert metrics.key?("completion_tokens"), "Should have completion_tokens metric" + assert metrics["completion_tokens"] > 0, "completion_tokens should be > 0" + assert metrics.key?("tokens"), "Should have tokens metric" + assert metrics["tokens"] > 0, "tokens should be > 0" + assert_equal metrics["prompt_tokens"] + metrics["completion_tokens"], metrics["tokens"] + end + end + + def test_wrap_closes_span_for_partially_consumed_stream + VCR.use_cassette("openai/partial_stream") do + require "openai" + + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Make a streaming request + stream = client.chat.completions.stream_raw( + messages: [ + {role: "user", content: "Count from 1 to 10"} + ], + model: "gpt-4o-mini", + max_tokens: 50 + ) + + # Consume only part of the stream + chunk_count = 0 + begin + stream.each do |chunk| + chunk_count += 1 + break if chunk_count >= 2 # Stop after 2 chunks + end + rescue StopIteration + # Expected when breaking out of iteration + end + + # Span should be finished even though we didn't consume all chunks + span = rig.drain_one + + # Verify span name + assert_equal "Chat Completion", span.name + + # Verify input was captured + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 1, input.length + + # Note: output will be partially aggregated + end + end + + def test_wrap_records_exception_for_create_errors + VCR.use_cassette("openai/create_error") do + require "openai" + + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Create OpenAI client with invalid API key to trigger an error + client = OpenAI::Client.new(api_key: "invalid_key") + Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) + + # Make a request that will fail + error = assert_raises do + client.chat.completions.create( + messages: [ + {role: "user", content: "test"} + ], + model: "gpt-4o-mini" + ) + end + + # Verify an error was raised + refute_nil error + + # Drain and verify span was created with error information + span = rig.drain_one + + # Verify span name + assert_equal "Chat Completion", span.name + + # Verify span status indicates an error + assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code + + # Verify error message is captured in status description + refute_nil span.status.description + assert span.status.description.length > 0 + + # Verify exception event was recorded + assert span.events.any? { |event| event.name == "exception" }, "Should have an exception event" + + exception_event = span.events.find { |event| event.name == "exception" } + assert exception_event.attributes.key?("exception.type"), "Should have exception type" + assert exception_event.attributes.key?("exception.message"), "Should have exception message" + end + end + + def test_wrap_records_exception_for_stream_errors + VCR.use_cassette("openai/stream_error") do + require "openai" + + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Create OpenAI client with invalid API key to trigger an error + client = OpenAI::Client.new(api_key: "invalid_key") + Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) + + # Make a streaming request that will fail + error = assert_raises do + stream = client.chat.completions.stream_raw( + messages: [ + {role: "user", content: "test"} + ], + model: "gpt-4o-mini" + ) + + # Error occurs when we try to consume the stream + stream.each do |chunk| + # Won't get here + end + end + + # Verify an error was raised + refute_nil error + + # Drain and verify span was created with error information + span = rig.drain_one + + # Verify span name + assert_equal "Chat Completion", span.name + + # Verify span status indicates an error + assert_equal OpenTelemetry::Trace::Status::ERROR, span.status.code + + # Verify error message is captured in status description + refute_nil span.status.description + + # Verify exception event was recorded + assert span.events.any? { |event| event.name == "exception" }, "Should have an exception event" + end + end + + def test_wrap_responses_create_non_streaming + require "openai" + + VCR.use_cassette("openai_responses_create_non_streaming") do + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Skip if responses API not available + skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses) + + # Make a non-streaming responses.create request + response = client.responses.create( + model: "gpt-4o-mini", + instructions: "You are a helpful assistant.", + input: "What is 2+2?" + ) + + # Verify response + refute_nil response + refute_nil response.output + + # Drain and verify span + span = rig.drain_one + + # Verify span name + assert_equal "openai.responses.create", span.name + + # Verify braintrust.input_json contains input + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal "What is 2+2?", input + + # Verify braintrust.output_json contains output + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + refute_nil output + + # Verify braintrust.metadata contains request metadata + assert span.attributes.key?("braintrust.metadata") + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal "/v1/responses", metadata["endpoint"] + assert_equal "gpt-4o-mini", metadata["model"] + assert_equal "You are a helpful assistant.", metadata["instructions"] + + # Verify braintrust.metrics contains token usage + assert span.attributes.key?("braintrust.metrics") + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics["tokens"] > 0 if metrics["tokens"] + end + end + + def test_wrap_responses_create_streaming + require "openai" + + VCR.use_cassette("openai_responses_create_streaming") do + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Skip if responses API not available + skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses) + + # Make a streaming responses request using .stream method + stream = client.responses.stream( + model: "gpt-4o-mini", + input: "Count from 1 to 3" + ) + + # Consume the stream + event_count = 0 + stream.each do |event| + event_count += 1 + end + + # Verify we got events + assert event_count > 0, "Should have received streaming events" + + # Drain and verify span + span = rig.drain_one + + # Verify span name + assert_equal "openai.responses.create", span.name + + # Verify input was captured + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal "Count from 1 to 3", input + + # Verify output was aggregated from stream + assert span.attributes.key?("braintrust.output_json"), "Missing braintrust.output_json. Keys: #{span.attributes.keys}" + output = JSON.parse(span.attributes["braintrust.output_json"]) + refute_nil output, "Output is nil: #{output.inspect}" + + # Verify metadata includes stream flag + assert span.attributes.key?("braintrust.metadata") + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal "/v1/responses", metadata["endpoint"] + assert_equal true, metadata["stream"] + + # Verify metrics were captured if available + if span.attributes.key?("braintrust.metrics") + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics["tokens"] > 0 if metrics["tokens"] + end + end + end + + def test_wrap_responses_stream_partial_consumption + require "openai" + + VCR.use_cassette("openai_responses_stream_partial") do + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Skip if responses API not available + skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses) + + # Make a streaming request + stream = client.responses.stream( + model: "gpt-4o-mini", + input: "Count from 1 to 10" + ) + + # Consume only part of the stream + event_count = 0 + begin + stream.each do |event| + event_count += 1 + break if event_count >= 3 # Stop after 3 events + end + rescue StopIteration + # Expected when breaking out of iteration + end + + # Span should be finished even though we didn't consume all events + span = rig.drain_one + + # Verify span name + assert_equal "openai.responses.create", span.name + + # Verify input was captured + assert span.attributes.key?("braintrust.input_json") + end + end + + def test_chat_and_responses_do_not_interfere + require "openai" + + # This test verifies that chat completions and responses API can coexist + # without interfering with each other when both wrappers are active + VCR.use_cassette("openai_chat_and_responses_no_interference") do + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Create OpenAI client and wrap it (wraps BOTH chat and responses) + client = OpenAI::Client.new(api_key: get_openai_key) + Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) + + # Skip if responses API not available + skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses) + + # First, make a chat completion request + chat_response = client.chat.completions.create( + messages: [{role: "user", content: "Say hello"}], + model: "gpt-4o-mini", + max_tokens: 10 + ) + refute_nil chat_response + + # Then, make a responses API request + # This is where the bug would manifest if the wrappers interfere + responses_response = client.responses.create( + model: "gpt-4o-mini", + instructions: "You are a helpful assistant.", + input: "Say goodbye" + ) + refute_nil responses_response + refute_nil responses_response.output + + # Drain both spans + spans = rig.drain + assert_equal 2, spans.length, "Should have 2 spans (chat + responses)" + + # Verify first span is for chat completions + chat_span = spans[0] + assert_equal "Chat Completion", chat_span.name + chat_metadata = JSON.parse(chat_span.attributes["braintrust.metadata"]) + assert_equal "/v1/chat/completions", chat_metadata["endpoint"] + assert_equal "gpt-4o-mini", chat_metadata["model"] + + # Verify input is messages array (chat API structure) + chat_input = JSON.parse(chat_span.attributes["braintrust.input_json"]) + assert_instance_of Array, chat_input + assert_equal "user", chat_input[0]["role"] + assert_equal "Say hello", chat_input[0]["content"] + + responses_span = spans[1] + assert_equal "openai.responses.create", responses_span.name + + responses_metadata = JSON.parse(responses_span.attributes["braintrust.metadata"]) + assert_equal "/v1/responses", responses_metadata["endpoint"] + assert_equal "gpt-4o-mini", responses_metadata["model"] + assert_equal "You are a helpful assistant.", responses_metadata["instructions"] + + responses_input = JSON.parse(responses_span.attributes["braintrust.input_json"]) + assert_equal "Say goodbye", responses_input + end + end + + def test_streaming_chat_and_responses_do_not_interfere + require "openai" + + # This test verifies that streaming for both chat completions and responses API + # work correctly without interfering when both streaming wrappers are active. + # This is critical because streaming uses different aggregation mechanisms. + VCR.use_cassette("openai_streaming_chat_and_responses_no_interference") do + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Create OpenAI client and wrap it (wraps BOTH chat and responses) + client = OpenAI::Client.new(api_key: get_openai_key) + Braintrust::Trace::OpenAI.wrap(client, tracer_provider: rig.tracer_provider) + + # Skip if responses API not available + skip "Responses API not available in this OpenAI gem version" unless client.respond_to?(:responses) + + # First, make a STREAMING chat completion request + chat_content = "" + stream = client.chat.completions.stream_raw( + messages: [{role: "user", content: "Count from 1 to 3"}], + model: "gpt-4o-mini", + max_tokens: 50, + stream_options: {include_usage: true} + ) + stream.each do |chunk| + delta_content = chunk.choices[0]&.delta&.content + chat_content += delta_content if delta_content + end + refute_empty chat_content + + # Then, make a STREAMING responses API request + # This is where the bug would manifest if streaming wrappers interfere + responses_event_count = 0 + responses_stream = client.responses.stream( + model: "gpt-4o-mini", + instructions: "You are a helpful assistant.", + input: "Say hello" + ) + responses_stream.each do |event| + responses_event_count += 1 + end + assert responses_event_count > 0, "Should have received streaming events from responses API" + + # Drain both spans + spans = rig.drain + assert_equal 2, spans.length, "Should have 2 spans (chat streaming + responses streaming)" + + # Verify first span is for STREAMING chat completions + chat_span = spans[0] + assert_equal "Chat Completion", chat_span.name + chat_metadata = JSON.parse(chat_span.attributes["braintrust.metadata"]) + assert_equal "/v1/chat/completions", chat_metadata["endpoint"] + assert_equal true, chat_metadata["stream"], "Chat span should have stream flag" + assert_match(/gpt-4o-mini/, chat_metadata["model"]) + + # Verify chat input is messages array (chat API structure) + chat_input = JSON.parse(chat_span.attributes["braintrust.input_json"]) + assert_instance_of Array, chat_input + assert_equal "user", chat_input[0]["role"] + assert_equal "Count from 1 to 3", chat_input[0]["content"] + + # Verify chat output was aggregated from stream chunks + chat_output = JSON.parse(chat_span.attributes["braintrust.output_json"]) + assert_equal 1, chat_output.length + assert_equal "assistant", chat_output[0]["message"]["role"] + refute_nil chat_output[0]["message"]["content"] + assert chat_output[0]["message"]["content"].length > 0, "Chat content should be aggregated" + + responses_span = spans[1] + assert_equal "openai.responses.create", responses_span.name + + responses_metadata = JSON.parse(responses_span.attributes["braintrust.metadata"]) + assert_equal "/v1/responses", responses_metadata["endpoint"] + assert_equal true, responses_metadata["stream"] + assert_match(/gpt-4o-mini/, responses_metadata["model"]) + assert_equal "You are a helpful assistant.", responses_metadata["instructions"] + + responses_input = JSON.parse(responses_span.attributes["braintrust.input_json"]) + assert_equal "Say hello", responses_input + + assert responses_span.attributes.key?("braintrust.output_json") + responses_output = JSON.parse(responses_span.attributes["braintrust.output_json"]) + refute_nil responses_output + end + end + + def test_wrap_handles_streaming_with_text + VCR.use_cassette("openai/streaming_text") do + require "openai" + + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Make a streaming request using .stream (not .stream_raw) + stream = client.chat.completions.stream( + messages: [ + {role: "user", content: "Count from 1 to 3"} + ], + model: "gpt-4o-mini", + max_tokens: 50, + stream_options: { + include_usage: true + } + ) + + # Consume the stream using .text() method + full_text = "" + stream.text.each do |delta| + full_text += delta + end + + # Verify we got content + refute_empty full_text + + # Drain and verify span + span = rig.drain_one + + # Verify span name + assert_equal "Chat Completion", span.name + + # Verify input was captured + assert span.attributes.key?("braintrust.input_json") + input = JSON.parse(span.attributes["braintrust.input_json"]) + assert_equal 1, input.length + assert_equal "user", input[0]["role"] + assert_equal "Count from 1 to 3", input[0]["content"] + + # Verify output was aggregated from stream + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + assert_equal 0, output[0]["index"] + assert_equal "assistant", output[0]["message"]["role"] + assert output[0]["message"]["content"], "Should have aggregated content" + assert output[0]["message"]["content"].length > 0, "Content should not be empty" + + # Verify metadata includes stream flag + assert span.attributes.key?("braintrust.metadata") + metadata = JSON.parse(span.attributes["braintrust.metadata"]) + assert_equal "openai", metadata["provider"] + assert_equal true, metadata["stream"] + assert_match(/gpt-4o-mini/, metadata["model"]) + + # Verify metrics were captured (if include_usage was respected) + assert span.attributes.key?("braintrust.metrics"), "Should have braintrust.metrics" + metrics = JSON.parse(span.attributes["braintrust.metrics"]) + assert metrics["tokens"] > 0 if metrics["tokens"] + + # Verify time_to_first_token metric is present + assert metrics.key?("time_to_first_token"), "Should have time_to_first_token metric" + assert metrics["time_to_first_token"] >= 0, "time_to_first_token should be >= 0" + end + end + + def test_wrap_handles_streaming_with_get_final_completion + VCR.use_cassette("openai/streaming_get_final_completion") do + require "openai" + + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Make a streaming request using .stream + stream = client.chat.completions.stream( + messages: [ + {role: "user", content: "Say hello"} + ], + model: "gpt-4o-mini", + max_tokens: 20, + stream_options: { + include_usage: true + } + ) + + # Use .get_final_completion() to block and get final result + completion = stream.get_final_completion + + # Verify we got a completion + refute_nil completion + refute_nil completion.choices + assert completion.choices.length > 0 + refute_nil completion.choices[0].message.content + + # Drain and verify span + span = rig.drain_one + + # Verify span name + assert_equal "Chat Completion", span.name + + # Verify output was captured + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + assert output[0]["message"]["content"], "Should have captured content" + end + end + + def test_wrap_handles_streaming_with_get_output_text + VCR.use_cassette("openai/streaming_get_output_text") do + require "openai" + + # Set up test rig + rig = setup_otel_test_rig + Braintrust::Contrib.init(tracer_provider: rig.tracer_provider) + + # Patch OpenAI at class level (all new clients will be auto-traced) + Braintrust::Contrib::OpenAI::Integration.patch! + + # Create OpenAI client AFTER patching + client = OpenAI::Client.new(api_key: get_openai_key) + + # Make a streaming request using .stream + stream = client.chat.completions.stream( + messages: [ + {role: "user", content: "Say hello"} + ], + model: "gpt-4o-mini", + max_tokens: 20, + stream_options: { + include_usage: true + } + ) + + # Use .get_output_text() to block and get final text + output_text = stream.get_output_text + + # Verify we got text + refute_nil output_text + refute_empty output_text + + # Drain and verify span + span = rig.drain_one + + # Verify span name + assert_equal "Chat Completion", span.name + + # Verify output was captured + assert span.attributes.key?("braintrust.output_json") + output = JSON.parse(span.attributes["braintrust.output_json"]) + assert_equal 1, output.length + assert output[0]["message"]["content"], "Should have captured content" + end + end +end diff --git a/test/braintrust/contrib/openai/patcher_test.rb b/test/braintrust/contrib/openai/patcher_test.rb new file mode 100644 index 0000000..81493e1 --- /dev/null +++ b/test/braintrust/contrib/openai/patcher_test.rb @@ -0,0 +1,163 @@ +# frozen_string_literal: true + +require "test_helper" + +# Explicitly load the patcher (lazy-loaded by integration) +require "braintrust/contrib/openai/patcher" + +class Braintrust::Contrib::OpenAI::PatcherTest < Minitest::Test + def setup + # Skip all tests if the OpenAI gem is not available + skip "OpenAI gem not available" unless defined?(OpenAI) + + # Check which gem is loaded + if Gem.loaded_specs["ruby-openai"] + skip "openai gem not available (found ruby-openai gem instead)" + elsif !Gem.loaded_specs["openai"] + skip "Could not determine which OpenAI gem is loaded" + end + end + + # ChatPatcher tests + + def test_chat_patcher_applicable_returns_true + assert Braintrust::Contrib::OpenAI::ChatPatcher.applicable? + end + + def test_chat_patcher_includes_correct_module_for_class_level + fake_completions = Minitest::Mock.new + fake_completions.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions]) + + OpenAI::Resources::Chat.stub_const(:Completions, fake_completions) do + Braintrust::Contrib::OpenAI::ChatPatcher.perform_patch + fake_completions.verify + end + end + + def test_chat_patcher_includes_correct_module_for_instance_level + fake_singleton_class = Minitest::Mock.new + fake_singleton_class.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions]) + + fake_completions = Minitest::Mock.new + fake_completions.expect(:singleton_class, fake_singleton_class) + + fake_chat = Minitest::Mock.new + fake_chat.expect(:completions, fake_completions) + + fake_client = Minitest::Mock.new + fake_client.expect(:is_a?, true, [::OpenAI::Client]) + fake_client.expect(:chat, fake_chat) + + Braintrust::Contrib::OpenAI::ChatPatcher.perform_patch(target: fake_client) + + fake_singleton_class.verify + fake_completions.verify + fake_chat.verify + fake_client.verify + end + + def test_chat_patcher_patched_returns_false_when_not_patched + fake_completions = Class.new + + OpenAI::Resources::Chat.stub_const(:Completions, fake_completions) do + refute Braintrust::Contrib::OpenAI::ChatPatcher.patched? + end + end + + def test_chat_patcher_patched_returns_true_after_patching + # Use real classes for this test since we're actually including modules + Braintrust::Contrib::OpenAI::ChatPatcher.perform_patch + + assert Braintrust::Contrib::OpenAI::ChatPatcher.patched? + end + + # ResponsesPatcher tests + + def test_responses_patcher_applicable_with_responses_method + assert Braintrust::Contrib::OpenAI::ResponsesPatcher.applicable? + end + + def test_responses_patcher_includes_correct_module_for_class_level + skip "Responses API not available" unless OpenAI::Client.instance_methods.include?(:responses) + + fake_responses = Minitest::Mock.new + fake_responses.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Responses]) + + OpenAI::Resources.stub_const(:Responses, fake_responses) do + Braintrust::Contrib::OpenAI::ResponsesPatcher.perform_patch + fake_responses.verify + end + end + + def test_responses_patcher_includes_correct_module_for_instance_level + skip "Responses API not available" unless OpenAI::Client.instance_methods.include?(:responses) + + fake_singleton_class = Minitest::Mock.new + fake_singleton_class.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Responses]) + + fake_responses = Minitest::Mock.new + fake_responses.expect(:singleton_class, fake_singleton_class) + + fake_client = Minitest::Mock.new + fake_client.expect(:is_a?, true, [::OpenAI::Client]) + fake_client.expect(:responses, fake_responses) + + Braintrust::Contrib::OpenAI::ResponsesPatcher.perform_patch(target: fake_client) + + fake_singleton_class.verify + fake_responses.verify + fake_client.verify + end + + def test_responses_patcher_patched_returns_false_when_not_patched + skip "Responses API not available" unless OpenAI::Client.instance_methods.include?(:responses) + + fake_responses = Class.new + + OpenAI::Resources.stub_const(:Responses, fake_responses) do + refute Braintrust::Contrib::OpenAI::ResponsesPatcher.patched? + end + end + + def test_responses_patcher_patched_returns_true_after_patching + skip "Responses API not available" unless OpenAI::Client.instance_methods.include?(:responses) + + # Use real classes for this test since we're actually including modules + Braintrust::Contrib::OpenAI::ResponsesPatcher.perform_patch + + assert Braintrust::Contrib::OpenAI::ResponsesPatcher.patched? + end + + # Integration patch! method tests (these test the Integration layer, not instrumentation) + + def test_integration_patch_applies_all_applicable_patchers + fake_chat = Minitest::Mock.new + # ancestors is called twice per patcher: once for fast-path patched? check, once under lock + 2.times { fake_chat.expect(:ancestors, []) } + fake_chat.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Chat::Completions]) + + fake_responses = Minitest::Mock.new + 2.times { fake_responses.expect(:ancestors, []) } + fake_responses.expect(:include, true, [Braintrust::Contrib::OpenAI::Instrumentation::Responses]) + + OpenAI::Resources::Chat.stub_const(:Completions, fake_chat) do + OpenAI::Resources.stub_const(:Responses, fake_responses) do + result = Braintrust::Contrib::OpenAI::Integration.patch! + + assert result, "patch! should return true when patchers succeed" + fake_chat.verify + fake_responses.verify if OpenAI::Client.instance_methods.include?(:responses) + end + end + end + + def test_integration_patch_is_idempotent + # First patch + result1 = Braintrust::Contrib::OpenAI::Integration.patch! + assert result1, "First patch should succeed" + + # Second patch should also succeed (idempotent) + result2 = Braintrust::Contrib::OpenAI::Integration.patch! + assert result2, "Second patch should also succeed (idempotent)" + end +end diff --git a/test/braintrust/trace/openai_test.rb b/test/braintrust/trace/openai_test.rb index 22e3233..989d633 100644 --- a/test/braintrust/trace/openai_test.rb +++ b/test/braintrust/trace/openai_test.rb @@ -864,199 +864,6 @@ def test_streaming_chat_and_responses_do_not_interfere end end - def test_traced_vs_raw_chat_completions_non_streaming - require "openai" - - # This test verifies that tracing doesn't mutate the response - # by comparing output from a raw client vs a traced client - VCR.use_cassette("openai_traced_vs_raw_chat_non_streaming") do - # Set up test rig - rig = setup_otel_test_rig - - # Create two clients: one raw, one traced - raw_client = OpenAI::Client.new(api_key: get_openai_key) - traced_client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider) - - # Make identical requests - params = { - messages: [{role: "user", content: "Say hello"}], - model: "gpt-4o-mini", - max_tokens: 10 - } - - raw_response = raw_client.chat.completions.create(**params) - traced_response = traced_client.chat.completions.create(**params) - - assert_match(/gpt-4o-mini/, raw_response.model) - assert_match(/gpt-4o-mini/, traced_response.model) - assert_equal raw_response.choices.length, traced_response.choices.length - assert_equal raw_response.choices[0].message.role, traced_response.choices[0].message.role - refute_nil raw_response.choices[0].message.content - refute_nil traced_response.choices[0].message.content - assert_equal raw_response.choices[0].finish_reason, traced_response.choices[0].finish_reason - assert_operator raw_response.usage.total_tokens, :>, 0 - assert_operator traced_response.usage.total_tokens, :>, 0 - - spans = rig.drain - assert_equal 1, spans.length - assert_equal "Chat Completion", spans[0].name - end - end - - def test_traced_vs_raw_chat_completions_streaming - require "openai" - - # This test verifies that tracing doesn't mutate streaming responses - VCR.use_cassette("openai_traced_vs_raw_chat_streaming") do - # Set up test rig - rig = setup_otel_test_rig - - # Create two clients: one raw, one traced - raw_client = OpenAI::Client.new(api_key: get_openai_key) - traced_client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider) - - # Make identical streaming requests - params = { - messages: [{role: "user", content: "Count from 1 to 3"}], - model: "gpt-4o-mini", - max_tokens: 50, - stream_options: {include_usage: true} - } - - raw_chunks = [] - raw_stream = raw_client.chat.completions.stream_raw(**params) - raw_stream.each do |chunk| - raw_chunks << chunk - end - - traced_chunks = [] - traced_stream = traced_client.chat.completions.stream_raw(**params) - traced_stream.each do |chunk| - traced_chunks << chunk - end - - assert_operator (raw_chunks.length - traced_chunks.length).abs, :<=, 2 - if raw_chunks[0].respond_to?(:model) && traced_chunks[0].respond_to?(:model) - assert_match(/gpt-4o-mini/, raw_chunks[0].model) - assert_match(/gpt-4o-mini/, traced_chunks[0].model) - end - - raw_content = raw_chunks.map { |c| c.choices[0]&.delta&.content }.compact.join - traced_content = traced_chunks.map { |c| c.choices[0]&.delta&.content }.compact.join - refute_empty raw_content - refute_empty traced_content - - spans = rig.drain - assert_equal 1, spans.length - assert_equal "Chat Completion", spans[0].name - end - end - - def test_traced_vs_raw_responses_non_streaming - require "openai" - - # This test verifies that tracing doesn't mutate responses API output - VCR.use_cassette("openai_traced_vs_raw_responses_non_streaming") do - # Set up test rig - rig = setup_otel_test_rig - - # Create two clients: one raw, one traced - raw_client = OpenAI::Client.new(api_key: get_openai_key) - traced_client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this OpenAI gem version" unless raw_client.respond_to?(:responses) - - # Make identical requests - params = { - model: "gpt-4o-mini", - instructions: "You are a helpful assistant.", - input: "Say hello" - } - - raw_response = raw_client.responses.create(**params) - traced_response = traced_client.responses.create(**params) - - assert_match(/gpt-4o-mini/, raw_response.model) - assert_match(/gpt-4o-mini/, traced_response.model) - assert_equal raw_response.output.length, traced_response.output.length - - raw_output = raw_response.output.first.content.first - traced_output = traced_response.output.first.content.first - assert_equal raw_output[:type], traced_output[:type] - refute_nil raw_output[:text] - refute_nil traced_output[:text] - assert_operator raw_output[:text].length, :>, 0 - assert_operator traced_output[:text].length, :>, 0 - - assert_operator raw_response.usage.total_tokens, :>, 0 - assert_operator traced_response.usage.total_tokens, :>, 0 - - spans = rig.drain - assert_equal 1, spans.length - assert_equal "openai.responses.create", spans[0].name - end - end - - def test_traced_vs_raw_responses_streaming - require "openai" - - # This test verifies that tracing doesn't mutate responses API streaming - VCR.use_cassette("openai_traced_vs_raw_responses_streaming") do - # Set up test rig - rig = setup_otel_test_rig - - # Create two clients: one raw, one traced - raw_client = OpenAI::Client.new(api_key: get_openai_key) - traced_client = OpenAI::Client.new(api_key: get_openai_key) - Braintrust::Trace::OpenAI.wrap(traced_client, tracer_provider: rig.tracer_provider) - - # Skip if responses API not available - skip "Responses API not available in this OpenAI gem version" unless raw_client.respond_to?(:responses) - - params = { - model: "gpt-4o-mini", - instructions: "You are a helpful assistant.", - input: "Count from 1 to 3" - } - - raw_events = [] - raw_stream = raw_client.responses.stream(**params) - raw_stream.each do |event| - raw_events << event - end - - traced_events = [] - traced_stream = traced_client.responses.stream(**params) - traced_stream.each do |event| - traced_events << event - end - - assert_equal raw_events.length, traced_events.length - - raw_event_types = raw_events.map(&:type) - traced_event_types = traced_events.map(&:type) - assert_equal raw_event_types, traced_event_types - - raw_completed = raw_events.find { |e| e.type == "response.completed" } - traced_completed = traced_events.find { |e| e.type == "response.completed" } - - if raw_completed && traced_completed - assert_match(/gpt-4o-mini/, raw_completed.response.model) - assert_match(/gpt-4o-mini/, traced_completed.response.model) - assert_operator raw_completed.response.usage.total_tokens, :>, 0 - assert_operator traced_completed.response.usage.total_tokens, :>, 0 - end - - spans = rig.drain - assert_equal 1, spans.length - assert_equal "openai.responses.create", spans[0].name - end - end - def test_wrap_handles_streaming_with_text VCR.use_cassette("openai/streaming_text") do require "openai" diff --git a/test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml b/test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml new file mode 100644 index 0000000..8701555 --- /dev/null +++ b/test/fixtures/vcr_cassettes/openai/streaming_chat_completions.yml @@ -0,0 +1,130 @@ +--- +http_interactions: +- request: + method: post + uri: https://api.openai.com/v1/chat/completions + body: + encoding: UTF-8 + string: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini","max_tokens":20,"stream_options":{"include_usage":true},"stream":true}' + headers: + Accept-Encoding: + - gzip;q=1.0,deflate;q=0.6,identity;q=0.3 + Accept: + - text/event-stream + User-Agent: + - OpenAI::Client/Ruby 0.39.0 + Host: + - api.openai.com + X-Stainless-Arch: + - x64 + X-Stainless-Lang: + - ruby + X-Stainless-Os: + - Linux + X-Stainless-Package-Version: + - 0.39.0 + X-Stainless-Runtime: + - ruby + X-Stainless-Runtime-Version: + - 3.2.9 + Content-Type: + - application/json + Authorization: + - Bearer + X-Stainless-Retry-Count: + - '0' + X-Stainless-Timeout: + - '600.0' + Content-Length: + - '144' + response: + status: + code: 200 + message: OK + headers: + Date: + - Wed, 17 Dec 2025 21:39:31 GMT + Content-Type: + - text/event-stream; charset=utf-8 + Transfer-Encoding: + - chunked + Connection: + - keep-alive + Access-Control-Expose-Headers: + - X-Request-ID + Openai-Organization: + - braintrust-data + Openai-Processing-Ms: + - '770' + Openai-Project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + Openai-Version: + - '2020-10-01' + X-Envoy-Upstream-Service-Time: + - '783' + X-Ratelimit-Limit-Requests: + - '30000' + X-Ratelimit-Limit-Tokens: + - '150000000' + X-Ratelimit-Remaining-Requests: + - '29999' + X-Ratelimit-Remaining-Tokens: + - '149999995' + X-Ratelimit-Reset-Requests: + - 2ms + X-Ratelimit-Reset-Tokens: + - 0s + X-Request-Id: + - req_ec79554aeb544044b6c2cacda6bf5deb + X-Openai-Proxy-Wasm: + - v0.1 + Cf-Cache-Status: + - DYNAMIC + Set-Cookie: + - __cf_bm=SULTPs8wSyJcfdzyW7AfYgAxNgFQhBWBh.cql5YgWK0-1766007571-1.0.1.1-dQJHZ9TQEcAnlZRHvdg1XfsuT75SbqxOy7CshY.xN_PW51.1TgtNQ9fBUxZb6_8NQvDeFYe3EjwY3BuSvB4Tx.xZYCofVzH1yOnByqls5oU; + path=/; expires=Wed, 17-Dec-25 22:09:31 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=jTnwp.LHHLW48BnvmEnO2LcqYuzDdW.89mY1nRyPvtU-1766007571885-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + Server: + - cloudflare + Cf-Ray: + - 9af992d469010010-ORD + Alt-Svc: + - h3=":443"; ma=86400 + body: + encoding: UTF-8 + string: |+ + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KaaErWmpF"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Hf0SpV"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"GGciGwweQs"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" How"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KOLqgQ8"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" can"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xKM2bpu"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" I"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"o0w7wEHVB"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" assist"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"pIQQ"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" you"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"0PBQOon"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":" today"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"gtUVW"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"tvwmRpNgk7"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"72xRr"} + + data: {"id":"chatcmpl-CntX14GGp6AdBZQ0SPoDV0yLpoZuW","object":"chat.completion.chunk","created":1766007571,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ee69c2ef48","choices":[],"usage":{"prompt_tokens":9,"completion_tokens":9,"total_tokens":18,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"NF9h1hYqo9hk"} + + data: [DONE] + + recorded_at: Wed, 17 Dec 2025 21:39:32 GMT +recorded_with: VCR 6.3.1 +...