From a586263d3f4908ddc2b795f196e034c4d2c86010 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 02:58:53 +0000
Subject: [PATCH 01/17] Extract run_benchmarks to its own object,
 BenchmarkSuite

---
 lib/benchmark_runner.rb       |  55 ----
 lib/benchmark_suite.rb        | 188 +++++++++++++
 run_benchmarks.rb             | 131 +--------
 test/benchmark_runner_test.rb | 110 --------
 test/benchmark_suite_test.rb  | 488 ++++++++++++++++++++++++++++++++++
 5 files changed, 680 insertions(+), 292 deletions(-)
 create mode 100644 lib/benchmark_suite.rb
 create mode 100644 test/benchmark_suite_test.rb

diff --git a/lib/benchmark_runner.rb b/lib/benchmark_runner.rb
index 4cc4a886..e3ea26de 100644
--- a/lib/benchmark_runner.rb
+++ b/lib/benchmark_runner.rb
@@ -16,31 +16,6 @@ def free_file_no(directory)
     end
   end
 
-  # Resolve the pre_init file path into a form that can be required
-  def expand_pre_init(path)
-    require 'pathname'
-
-    path = Pathname.new(path)
-
-    unless path.exist?
-      puts "--with-pre-init called with non-existent file!"
-      exit(-1)
-    end
-
-    if path.directory?
-      puts "--with-pre-init called with a directory, please pass a .rb file"
-      exit(-1)
-    end
-
-    library_name = path.basename(path.extname)
-    load_path = path.parent.expand_path
-
-    [
-      "-I", load_path,
-      "-r", library_name
-    ]
-  end
-
   # Sort benchmarks with headlines first, then others, then micro
   def sort_benchmarks(bench_names, metadata)
     headline_benchmarks = metadata.select { |_, meta| meta['category'] == 'headline' }.keys
@@ -51,36 +26,6 @@ def sort_benchmarks(bench_names, metadata)
     headline_names.sort + other_names.sort + micro_names.sort
   end
 
-  # Check which OS we are running
-  def os
-    @os ||= (
-      host_os = RbConfig::CONFIG['host_os']
-      case host_os
-      when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
-        :windows
-      when /darwin|mac os/
-        :macosx
-      when /linux/
-        :linux
-      when /solaris|bsd/
-        :unix
-      else
-        raise "unknown os: #{host_os.inspect}"
-      end
-    )
-  end
-
-  # Generate setarch prefix for Linux
-  def setarch_prefix
-    # Disable address space randomization (for determinism)
-    prefix = ["setarch", `uname -m`.strip, "-R"]
-
-    # Abort if we don't have permission (perhaps in a docker container).
-    return [] unless system(*prefix, "true", out: File::NULL, err: File::NULL)
-
-    prefix
-  end
-
   # Checked system - error or return info if the command fails
   def check_call(command, env: {}, raise_error: true, quiet: false)
     puts("+ #{command}") unless quiet
diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
new file mode 100644
index 00000000..5b7ceabc
--- /dev/null
+++ b/lib/benchmark_suite.rb
@@ -0,0 +1,188 @@
+# frozen_string_literal: true
+
+require 'json'
+require 'pathname'
+require 'fileutils'
+require 'shellwords'
+require 'etc'
+require 'yaml'
+require 'rbconfig'
+require_relative 'benchmark_filter'
+
+# BenchmarkSuite runs a collection of benchmarks and collects their results
+class BenchmarkSuite
+  attr_reader :ruby, :ruby_description, :categories, :name_filters, :out_path, :harness, :pre_init, :no_pinning
+
+  def initialize(ruby:, ruby_description:, categories:, name_filters:, out_path:, harness:, pre_init: nil, no_pinning: false)
+    @ruby = ruby
+    @ruby_description = ruby_description
+    @categories = categories
+    @name_filters = name_filters
+    @out_path = out_path
+    @harness = harness
+    @pre_init = pre_init ? expand_pre_init(pre_init) : nil
+    @no_pinning = no_pinning
+  end
+
+  # Run all the benchmarks and record execution times
+  # Returns [bench_data, bench_failures]
+  def run
+    bench_data = {}
+    bench_failures = {}
+
+    bench_dir = "benchmarks"
+    ractor_bench_dir = "benchmarks-ractor"
+
+    if categories == ["ractor-only"]
+      bench_dir = ractor_bench_dir
+      @harness = "harness-ractor"
+      @categories = []
+    end
+
+    bench_file_grouping = {}
+
+    # Get the list of benchmark files/directories matching name filters
+    filter = benchmark_filter(categories: categories, name_filters: name_filters)
+    bench_file_grouping[bench_dir] = Dir.children(bench_dir).sort.filter do |entry|
+      filter.match?(entry)
+    end
+
+    if categories == ["ractor"]
+      # We ignore the category filter here because everything in the
+      # benchmarks-ractor directory should be included when we're benchmarking the
+      # Ractor category
+      ractor_filter = benchmark_filter(categories: [], name_filters: name_filters)
+      bench_file_grouping[ractor_bench_dir] = Dir.children(ractor_bench_dir).sort.filter do |entry|
+        ractor_filter.match?(entry)
+      end
+    end
+
+    bench_file_grouping.each do |bench_dir, bench_files|
+      bench_files.each_with_index do |entry, idx|
+        bench_name = entry.gsub('.rb', '')
+
+        puts("Running benchmark \"#{bench_name}\" (#{idx+1}/#{bench_files.length})")
+
+        # Path to the benchmark runner script
+        script_path = File.join(bench_dir, entry)
+
+        if !script_path.end_with?('.rb')
+          script_path = File.join(script_path, 'benchmark.rb')
+        end
+
+        # Set up the environment for the benchmarking command
+        result_json_path = File.join(out_path, "temp#{Process.pid}.json")
+        ENV["RESULT_JSON_PATH"] = result_json_path
+
+        # Set up the benchmarking command
+        cmd = []
+        if linux?
+          cmd += setarch_prefix
+
+          # Pin the process to one given core to improve caching and reduce variance on CRuby
+          # Other Rubies need to use multiple cores, e.g., for JIT threads
+          if ruby_description.start_with?('ruby ') && !no_pinning
+            # The last few cores of Intel CPU may be slow E-Cores, so avoid using the last one.
+            cpu = [(Etc.nprocessors / 2) - 1, 0].max
+            cmd += ["taskset", "-c", "#{cpu}"]
+          end
+        end
+
+        # Fix for jruby/jruby#7394 in JRuby 9.4.2.0
+        script_path = File.expand_path(script_path)
+
+        cmd += [
+          *ruby,
+          "-I", harness,
+          *pre_init,
+          script_path,
+        ].compact
+
+        # When the Ruby running this script is not the first Ruby in PATH, shell commands
+        # like `bundle install` in a child process will not use the Ruby being benchmarked.
+        # It overrides PATH to guarantee the commands of the benchmarked Ruby will be used.
+        env = {}
+        ruby_path = `#{ruby.shelljoin} -e 'print RbConfig.ruby' 2> #{File::NULL}`
+        if ruby_path != RbConfig.ruby
+          env["PATH"] = "#{File.dirname(ruby_path)}:#{ENV["PATH"]}"
+
+          # chruby sets GEM_HOME and GEM_PATH in your shell. We have to unset it in the child
+          # process to avoid installing gems to the version that is running run_benchmarks.rb.
+          ["GEM_HOME", "GEM_PATH"].each do |var|
+            env[var] = nil if ENV.key?(var)
+          end
+        end
+
+        # Do the benchmarking
+        result = BenchmarkRunner.check_call(cmd.shelljoin, env: env, raise_error: false)
+
+        if result[:success]
+          bench_data[bench_name] = JSON.parse(File.read(result_json_path)).tap do |json|
+            json["command_line"] = cmd.shelljoin
+            File.unlink(result_json_path)
+          end
+        else
+          bench_failures[bench_name] = result[:status].exitstatus
+        end
+
+      end
+    end
+
+    [bench_data, bench_failures]
+  end
+
+  private
+
+  def benchmark_filter(categories:, name_filters:)
+    @benchmark_filter ||= {}
+    key = [categories, name_filters]
+    @benchmark_filter[key] ||= BenchmarkFilter.new(
+      categories: categories,
+      name_filters: name_filters,
+      metadata: benchmarks_metadata
+    )
+  end
+
+  def benchmarks_metadata
+    @benchmarks_metadata ||= YAML.load_file('benchmarks.yml')
+  end
+
+  # Check if running on Linux
+  def linux?
+    RbConfig::CONFIG['host_os'] =~ /linux/
+  end
+
+  # Generate setarch prefix for Linux
+  def setarch_prefix
+    # Disable address space randomization (for determinism)
+    prefix = ["setarch", `uname -m`.strip, "-R"]
+
+    # Abort if we don't have permission (perhaps in a docker container).
+    return [] unless system(*prefix, "true", out: File::NULL, err: File::NULL)
+
+    prefix
+  end
+
+  # Resolve the pre_init file path into a form that can be required
+  def expand_pre_init(path)
+    path = Pathname.new(path)
+
+    unless path.exist?
+      puts "--with-pre-init called with non-existent file!"
+      exit(-1)
+    end
+
+    if path.directory?
+      puts "--with-pre-init called with a directory, please pass a .rb file"
+      exit(-1)
+    end
+
+    library_name = path.basename(path.extname)
+    load_path = path.parent.expand_path
+
+    [
+      "-I", load_path,
+      "-r", library_name
+    ]
+  end
+end
diff --git a/run_benchmarks.rb b/run_benchmarks.rb
index 9c7038d5..f4991686 100755
--- a/run_benchmarks.rb
+++ b/run_benchmarks.rb
@@ -11,8 +11,8 @@
 require_relative 'misc/stats'
 require_relative 'lib/cpu_config'
 require_relative 'lib/benchmark_runner'
+require_relative 'lib/benchmark_suite'
 require_relative 'lib/table_formatter'
-require_relative 'lib/benchmark_filter'
 require_relative 'lib/argument_parser'
 
 def mean(values)
@@ -23,135 +23,11 @@ def stddev(values)
   Stats.new(values).stddev
 end
 
-def benchmark_filter(categories:, name_filters:)
-  @benchmark_filter ||= {}
-  key = [categories, name_filters]
-  @benchmark_filter[key] ||= BenchmarkFilter.new(
-    categories: categories,
-    name_filters: name_filters,
-    metadata: benchmarks_metadata
-  )
-end
-
-def benchmarks_metadata
-  @benchmarks_metadata ||= YAML.load_file('benchmarks.yml')
-end
-
 def sort_benchmarks(bench_names)
+  benchmarks_metadata = YAML.load_file('benchmarks.yml')
   BenchmarkRunner.sort_benchmarks(bench_names, benchmarks_metadata)
 end
 
-# Run all the benchmarks and record execution times
-def run_benchmarks(ruby:, ruby_description:, categories:, name_filters:, out_path:, harness:, pre_init:, no_pinning:)
-  bench_data = {}
-  bench_failures = {}
-
-  bench_dir = "benchmarks"
-  ractor_bench_dir = "benchmarks-ractor"
-
-  if categories == ["ractor-only"]
-    bench_dir = ractor_bench_dir
-    harness = "harness-ractor"
-    categories = []
-  end
-
-  bench_file_grouping = {}
-
-  # Get the list of benchmark files/directories matching name filters
-  filter = benchmark_filter(categories: categories, name_filters: name_filters)
-  bench_file_grouping[bench_dir] = Dir.children(bench_dir).sort.filter do |entry|
-    filter.match?(entry)
-  end
-
-  if categories == ["ractor"]
-    # We ignore the category filter here because everything in the
-    # benchmarks-ractor directory should be included when we're benchmarking the
-    # Ractor category
-    ractor_filter = benchmark_filter(categories: [], name_filters: name_filters)
-    bench_file_grouping[ractor_bench_dir] = Dir.children(ractor_bench_dir).sort.filter do |entry|
-      ractor_filter.match?(entry)
-    end
-  end
-
-  if pre_init
-    pre_init = BenchmarkRunner.expand_pre_init(pre_init)
-  end
-
-
-  bench_file_grouping.each do |bench_dir, bench_files|
-    bench_files.each_with_index do |entry, idx|
-      bench_name = entry.gsub('.rb', '')
-
-      puts("Running benchmark \"#{bench_name}\" (#{idx+1}/#{bench_files.length})")
-
-      # Path to the benchmark runner script
-      script_path = File.join(bench_dir, entry)
-
-      if !script_path.end_with?('.rb')
-        script_path = File.join(script_path, 'benchmark.rb')
-      end
-
-      # Set up the environment for the benchmarking command
-      result_json_path = File.join(out_path, "temp#{Process.pid}.json")
-      ENV["RESULT_JSON_PATH"] = result_json_path
-
-      # Set up the benchmarking command
-      cmd = []
-      if BenchmarkRunner.os == :linux
-        cmd += BenchmarkRunner.setarch_prefix
-
-        # Pin the process to one given core to improve caching and reduce variance on CRuby
-        # Other Rubies need to use multiple cores, e.g., for JIT threads
-        if ruby_description.start_with?('ruby ') && !no_pinning
-          # The last few cores of Intel CPU may be slow E-Cores, so avoid using the last one.
-          cpu = [(Etc.nprocessors / 2) - 1, 0].max
-          cmd += ["taskset", "-c", "#{cpu}"]
-        end
-      end
-
-      # Fix for jruby/jruby#7394 in JRuby 9.4.2.0
-      script_path = File.expand_path(script_path)
-
-      cmd += [
-        *ruby,
-        "-I", harness,
-        *pre_init,
-        script_path,
-      ].compact
-
-      # When the Ruby running this script is not the first Ruby in PATH, shell commands
-      # like `bundle install` in a child process will not use the Ruby being benchmarked.
-      # It overrides PATH to guarantee the commands of the benchmarked Ruby will be used.
-      env = {}
-      ruby_path = `#{ruby.shelljoin} -e 'print RbConfig.ruby' 2> #{File::NULL}`
-      if ruby_path != RbConfig.ruby
-        env["PATH"] = "#{File.dirname(ruby_path)}:#{ENV["PATH"]}"
-
-        # chruby sets GEM_HOME and GEM_PATH in your shell. We have to unset it in the child
-        # process to avoid installing gems to the version that is running run_benchmarks.rb.
-        ["GEM_HOME", "GEM_PATH"].each do |var|
-          env[var] = nil if ENV.key?(var)
-        end
-      end
-
-      # Do the benchmarking
-      result = BenchmarkRunner.check_call(cmd.shelljoin, env: env, raise_error: false)
-
-      if result[:success]
-        bench_data[bench_name] = JSON.parse(File.read(result_json_path)).tap do |json|
-          json["command_line"] = cmd.shelljoin
-          File.unlink(result_json_path)
-        end
-      else
-        bench_failures[bench_name] = result[:status].exitstatus
-      end
-
-    end
-  end
-
-  [bench_data, bench_failures]
-end
-
 args = ArgumentParser.parse(ARGV)
 
 CPUConfig.configure_for_benchmarking(turbo: args.turbo)
@@ -169,7 +45,7 @@ def run_benchmarks(ruby:, ruby_description:, categories:, name_filters:, out_pat
 bench_data = {}
 bench_failures = {}
 args.executables.each do |name, executable|
-  bench_data[name], failures = run_benchmarks(
+  suite = BenchmarkSuite.new(
     ruby: executable,
     ruby_description: ruby_descriptions[name],
     categories: args.categories,
@@ -179,6 +55,7 @@ def run_benchmarks(ruby:, ruby_description:, categories:, name_filters:, out_pat
     pre_init: args.with_pre_init,
     no_pinning: args.no_pinning
   )
+  bench_data[name], failures = suite.run
   # Make it easier to query later.
   bench_failures[name] = failures unless failures.empty?
 end
diff --git a/test/benchmark_runner_test.rb b/test/benchmark_runner_test.rb
index 3ecd592f..20a7b615 100644
--- a/test/benchmark_runner_test.rb
+++ b/test/benchmark_runner_test.rb
@@ -49,64 +49,6 @@
     end
   end
 
-  describe '.expand_pre_init' do
-    it 'returns load path and require options for valid file' do
-      Dir.mktmpdir do |dir|
-        file = File.join(dir, 'pre_init.rb')
-        FileUtils.touch(file)
-
-        result = BenchmarkRunner.expand_pre_init(file)
-
-        assert_equal 4, result.length
-        assert_equal '-I', result[0]
-        assert_equal dir, result[1].to_s
-        assert_equal '-r', result[2]
-        assert_equal 'pre_init', result[3].to_s
-      end
-    end
-
-    it 'handles files with different extensions' do
-      Dir.mktmpdir do |dir|
-        file = File.join(dir, 'my_config.rb')
-        FileUtils.touch(file)
-
-        result = BenchmarkRunner.expand_pre_init(file)
-
-        assert_equal 'my_config', result[3].to_s
-      end
-    end
-
-    it 'handles nested directories' do
-      Dir.mktmpdir do |dir|
-        subdir = File.join(dir, 'config', 'initializers')
-        FileUtils.mkdir_p(subdir)
-        file = File.join(subdir, 'setup.rb')
-        FileUtils.touch(file)
-
-        result = BenchmarkRunner.expand_pre_init(file)
-
-        assert_equal subdir, result[1].to_s
-        assert_equal 'setup', result[3].to_s
-      end
-    end
-
-    it 'exits when file does not exist' do
-      out = capture_io do
-        assert_raises(SystemExit) { BenchmarkRunner.expand_pre_init('/nonexistent/file.rb') }
-      end
-      assert_includes out, "--with-pre-init called with non-existent file!\n"
-    end
-
-    it 'exits when path is a directory' do
-      Dir.mktmpdir do |dir|
-        out = capture_io do
-          assert_raises(SystemExit) { BenchmarkRunner.expand_pre_init(dir) }
-        end
-        assert_includes out, "--with-pre-init called with a directory, please pass a .rb file\n"
-      end
-    end
-  end
-
   describe '.sort_benchmarks' do
     before do
       @metadata = {
@@ -158,24 +100,6 @@
     end
   end
 
-  describe '.os' do
-    it 'detects the operating system' do
-      result = BenchmarkRunner.os
-      assert_includes [:linux, :macosx, :windows, :unix], result
-    end
-
-    it 'caches the os result' do
-      first_call = BenchmarkRunner.os
-      second_call = BenchmarkRunner.os
-      assert_equal second_call, first_call
-    end
-
-    it 'returns a symbol' do
-      result = BenchmarkRunner.os
-      assert_instance_of Symbol, result
-    end
-  end
-
   describe '.check_call' do
     it 'runs a successful command and returns success status' do
       result = nil
@@ -252,40 +176,6 @@
     end
   end
 
-  describe '.setarch_prefix' do
-    it 'returns an array' do
-      result = BenchmarkRunner.setarch_prefix
-      assert_instance_of Array, result
-    end
-
-    it 'returns setarch command on Linux with proper permissions' do
-      skip 'Not on Linux' unless BenchmarkRunner.os == :linux
-
-      prefix = BenchmarkRunner.setarch_prefix
-
-      # Should either return the prefix or empty array if no permission
-      assert_includes [0, 3], prefix.length
-
-      if prefix.length == 3
-        assert_equal 'setarch', prefix[0]
-        assert_equal '-R', prefix[2]
-      end
-    end
-
-    it 'returns empty array when setarch fails' do
-      skip 'Test requires Linux' unless BenchmarkRunner.os == :linux
-
-      # If we don't have permissions, it should return empty array
-      prefix = BenchmarkRunner.setarch_prefix
-      if prefix.empty?
-        assert_equal [], prefix
-      else
-        # If we do have permissions, verify the structure
-        assert_equal 3, prefix.length
-      end
-    end
-  end
-
   describe 'Stats integration' do
     it 'calculates mean correctly' do
       values = [1, 2, 3, 4, 5]
diff --git a/test/benchmark_suite_test.rb b/test/benchmark_suite_test.rb
new file mode 100644
index 00000000..483c2b5b
--- /dev/null
+++ b/test/benchmark_suite_test.rb
@@ -0,0 +1,488 @@
+require_relative 'test_helper'
+require_relative '../lib/benchmark_suite'
+require 'tempfile'
+require 'tmpdir'
+require 'fileutils'
+require 'json'
+require 'yaml'
+
+describe BenchmarkSuite do
+  before do
+    @original_dir = Dir.pwd
+    @temp_dir = Dir.mktmpdir
+    Dir.chdir(@temp_dir)
+
+    # Create mock benchmarks directory structure
+    FileUtils.mkdir_p('benchmarks')
+    FileUtils.mkdir_p('benchmarks-ractor')
+    FileUtils.mkdir_p('harness')
+
+    # Create a simple benchmark file
+    File.write('benchmarks/simple.rb', <<~RUBY)
+      require 'json'
+      result = {
+        'warmup' => [0.001],
+        'bench' => [0.001, 0.0009, 0.0011],
+        'rss' => 10485760
+      }
+      File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+    RUBY
+
+    # Create benchmarks metadata
+    @metadata = {
+      'simple' => { 'category' => 'micro' },
+      'fib' => { 'category' => 'micro' }
+    }
+    File.write('benchmarks.yml', YAML.dump(@metadata))
+
+    @out_path = File.join(@temp_dir, 'output')
+    FileUtils.mkdir_p(@out_path)
+  end
+
+  after do
+    Dir.chdir(@original_dir)
+    FileUtils.rm_rf(@temp_dir)
+  end
+
+  describe '#initialize' do
+    it 'sets all required attributes' do
+      suite = BenchmarkSuite.new(
+        ruby: ['ruby'],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['micro'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness'
+      )
+
+      assert_equal ['ruby'], suite.ruby
+      assert_equal 'ruby 3.2.0', suite.ruby_description
+      assert_equal ['micro'], suite.categories
+      assert_equal [], suite.name_filters
+      assert_equal @out_path, suite.out_path
+      assert_equal 'harness', suite.harness
+      assert_nil suite.pre_init
+      assert_equal false, suite.no_pinning
+    end
+
+    it 'accepts optional parameters' do
+      suite = BenchmarkSuite.new(
+        ruby: ['ruby'],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      assert_equal true, suite.no_pinning
+    end
+  end
+
+  describe '#run' do
+    it 'returns bench_data and bench_failures as a tuple' do
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      result = nil
+      capture_io do
+        result = suite.run
+      end
+
+      assert_instance_of Array, result
+      assert_equal 2, result.length
+
+      bench_data, bench_failures = result
+      assert_instance_of Hash, bench_data
+      assert_instance_of Hash, bench_failures
+    end
+
+    it 'runs matching benchmarks and collects results' do
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data, bench_failures = nil
+      capture_io do
+        bench_data, bench_failures = suite.run
+      end
+
+      assert_includes bench_data, 'simple'
+      assert_includes bench_data['simple'], 'warmup'
+      assert_includes bench_data['simple'], 'bench'
+      assert_includes bench_data['simple'], 'rss'
+      assert_includes bench_data['simple'], 'command_line'
+
+      assert_empty bench_failures
+    end
+
+    it 'prints progress messages while running' do
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      output = capture_io do
+        suite.run
+      end
+
+      assert_includes output[0], 'Running benchmark "simple"'
+    end
+
+    it 'records failures when benchmark script fails' do
+      # Create a failing benchmark
+      File.write('benchmarks/failing.rb', <<~RUBY)
+        exit(1)
+      RUBY
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['failing'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data, bench_failures = nil
+      capture_io do
+        bench_data, bench_failures = suite.run
+      end
+
+      assert_empty bench_data
+      assert_includes bench_failures, 'failing'
+      assert_equal 1, bench_failures['failing']
+    end
+
+    it 'handles benchmarks in subdirectories' do
+      # Create a benchmark in a subdirectory
+      FileUtils.mkdir_p('benchmarks/subdir')
+      File.write('benchmarks/subdir/benchmark.rb', <<~RUBY)
+        require 'json'
+        result = {
+          'warmup' => [0.001],
+          'bench' => [0.001],
+          'rss' => 10485760
+        }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['subdir'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data, bench_failures = nil
+      capture_io do
+        bench_data, bench_failures = suite.run
+      end
+
+      assert_includes bench_data, 'subdir'
+      assert_empty bench_failures
+    end
+
+    it 'handles ractor-only category' do
+      # Create a ractor benchmark
+      File.write('benchmarks-ractor/ractor_test.rb', <<~RUBY)
+        require 'json'
+        result = {
+          'warmup' => [0.001],
+          'bench' => [0.001],
+          'rss' => 10485760
+        }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['ractor-only'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data, bench_failures = nil
+      capture_io do
+        bench_data, bench_failures = suite.run
+      end
+
+      # When ractor-only is specified, it should use benchmarks-ractor directory
+      assert_includes bench_data, 'ractor_test'
+      assert_empty bench_failures
+
+      # harness should be updated to harness-ractor
+      assert_equal 'harness-ractor', suite.harness
+    end
+
+    it 'includes both regular and ractor benchmarks with ractor category' do
+      File.write('benchmarks-ractor/ractor_bench.rb', <<~RUBY)
+        require 'json'
+        result = {
+          'warmup' => [0.001],
+          'bench' => [0.001],
+          'rss' => 10485760
+        }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['ractor'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data = nil
+      capture_io do
+        bench_data, _ = suite.run
+      end
+
+      # With ractor category, both directories should be scanned
+      # but we need appropriate filters
+      assert_instance_of Hash, bench_data
+    end
+
+    it 'expands pre_init when provided' do
+      # Create a pre_init file
+      pre_init_file = File.join(@temp_dir, 'pre_init.rb')
+      File.write(pre_init_file, "# Pre-initialization code\n")
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        pre_init: pre_init_file,
+        no_pinning: true
+      )
+
+      assert_instance_of Array, suite.pre_init
+      assert_equal 4, suite.pre_init.length
+      assert_equal '-I', suite.pre_init[0]
+      assert_equal @temp_dir, suite.pre_init[1].to_s
+      assert_equal '-r', suite.pre_init[2]
+      assert_equal 'pre_init', suite.pre_init[3].to_s
+    end
+
+    it 'handles pre_init with different file extensions' do
+      # Create a pre_init file with a different name
+      pre_init_file = File.join(@temp_dir, 'my_config.rb')
+      File.write(pre_init_file, "# Config code\n")
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        pre_init: pre_init_file,
+        no_pinning: true
+      )
+
+      # Should extract filename without extension
+      assert_equal 'my_config', suite.pre_init[3].to_s
+    end
+
+    it 'handles pre_init in nested directories' do
+      # Create a pre_init file in nested directory
+      subdir = File.join(@temp_dir, 'config', 'initializers')
+      FileUtils.mkdir_p(subdir)
+      pre_init_file = File.join(subdir, 'setup.rb')
+      File.write(pre_init_file, "# Setup code\n")
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        pre_init: pre_init_file,
+        no_pinning: true
+      )
+
+      # Should use the nested directory as load path
+      assert_equal subdir, suite.pre_init[1].to_s
+      assert_equal 'setup', suite.pre_init[3].to_s
+    end
+
+    it 'exits when pre_init file does not exist' do
+      output = capture_io do
+        assert_raises(SystemExit) do
+          BenchmarkSuite.new(
+            ruby: [RbConfig.ruby],
+            ruby_description: 'ruby 3.2.0',
+            categories: [],
+            name_filters: ['simple'],
+            out_path: @out_path,
+            harness: 'harness',
+            pre_init: '/nonexistent/file.rb',
+            no_pinning: true
+          )
+        end
+      end
+      assert_includes output[0], '--with-pre-init called with non-existent file!'
+    end
+
+    it 'exits when pre_init path is a directory' do
+      output = capture_io do
+        assert_raises(SystemExit) do
+          BenchmarkSuite.new(
+            ruby: [RbConfig.ruby],
+            ruby_description: 'ruby 3.2.0',
+            categories: [],
+            name_filters: ['simple'],
+            out_path: @out_path,
+            harness: 'harness',
+            pre_init: @temp_dir,
+            no_pinning: true
+          )
+        end
+      end
+      assert_includes output[0], '--with-pre-init called with a directory'
+    end
+
+    it 'stores command_line in benchmark results' do
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data, _ = nil
+      capture_io do
+        bench_data, _ = suite.run
+      end
+
+      assert_includes bench_data['simple'], 'command_line'
+      assert_instance_of String, bench_data['simple']['command_line']
+      assert_includes bench_data['simple']['command_line'], 'simple.rb'
+    end
+
+    it 'cleans up temporary JSON files after successful run' do
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['simple'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      capture_io do
+        suite.run
+      end
+
+      # Temporary files should be cleaned up
+      temp_files = Dir.glob(File.join(@out_path, 'temp*.json'))
+      assert_empty temp_files
+    end
+
+    it 'filters benchmarks by name_filters' do
+      # Create multiple benchmarks
+      File.write('benchmarks/bench_a.rb', <<~RUBY)
+        require 'json'
+        result = { 'warmup' => [0.001], 'bench' => [0.001], 'rss' => 10485760 }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      File.write('benchmarks/bench_b.rb', <<~RUBY)
+        require 'json'
+        result = { 'warmup' => [0.001], 'bench' => [0.001], 'rss' => 10485760 }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: [],
+        name_filters: ['bench_a'],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data = nil
+      capture_io do
+        bench_data, _ = suite.run
+      end
+
+      assert_includes bench_data, 'bench_a'
+      refute_includes bench_data, 'bench_b'
+    end
+  end
+
+  describe 'integration with BenchmarkFilter' do
+    it 'uses BenchmarkFilter to match benchmarks' do
+      # Create benchmarks with different categories
+      File.write('benchmarks/micro_bench.rb', <<~RUBY)
+        require 'json'
+        result = { 'warmup' => [0.001], 'bench' => [0.001], 'rss' => 10485760 }
+        File.write(ENV['RESULT_JSON_PATH'], JSON.generate(result))
+      RUBY
+
+      metadata = {
+        'micro_bench' => { 'category' => 'micro' },
+        'simple' => { 'category' => 'other' }
+      }
+      File.write('benchmarks.yml', YAML.dump(metadata))
+
+      suite = BenchmarkSuite.new(
+        ruby: [RbConfig.ruby],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['micro'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness',
+        no_pinning: true
+      )
+
+      bench_data = nil
+      capture_io do
+        bench_data, _ = suite.run
+      end
+
+      # Should only include micro category benchmarks
+      assert_includes bench_data, 'micro_bench'
+      refute_includes bench_data, 'simple'
+    end
+  end
+end

From f0da3c3b0750a5a7d025e0b94c40dcca84f0b329 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:01:00 +0000
Subject: [PATCH 02/17] Extract a few constants

---
 lib/benchmark_suite.rb | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 5b7ceabc..41276931 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -11,6 +11,12 @@
 
 # BenchmarkSuite runs a collection of benchmarks and collects their results
 class BenchmarkSuite
+  BENCHMARKS_DIR = "benchmarks"
+  RACTOR_BENCHMARKS_DIR = "benchmarks-ractor"
+  RACTOR_ONLY_CATEGORY = "ractor-only"
+  RACTOR_CATEGORY = "ractor"
+  RACTOR_HARNESS = "harness-ractor"
+
   attr_reader :ruby, :ruby_description, :categories, :name_filters, :out_path, :harness, :pre_init, :no_pinning
 
   def initialize(ruby:, ruby_description:, categories:, name_filters:, out_path:, harness:, pre_init: nil, no_pinning: false)
@@ -30,12 +36,12 @@ def run
     bench_data = {}
     bench_failures = {}
 
-    bench_dir = "benchmarks"
-    ractor_bench_dir = "benchmarks-ractor"
+    bench_dir = BENCHMARKS_DIR
+    ractor_bench_dir = RACTOR_BENCHMARKS_DIR
 
-    if categories == ["ractor-only"]
+    if categories == [RACTOR_ONLY_CATEGORY]
       bench_dir = ractor_bench_dir
-      @harness = "harness-ractor"
+      @harness = RACTOR_HARNESS
       @categories = []
     end
 
@@ -47,7 +53,7 @@ def run
       filter.match?(entry)
     end
 
-    if categories == ["ractor"]
+    if categories == [RACTOR_CATEGORY]
       # We ignore the category filter here because everything in the
       # benchmarks-ractor directory should be included when we're benchmarking the
       # Ractor category

From 7550537d0586f72c133a74d3a03685fb32914da7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:06:38 +0000
Subject: [PATCH 03/17] Determine if the suite is ractor-only in the
 initializer

---
 lib/benchmark_suite.rb | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 41276931..8d643e44 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -13,7 +13,7 @@
 class BenchmarkSuite
   BENCHMARKS_DIR = "benchmarks"
   RACTOR_BENCHMARKS_DIR = "benchmarks-ractor"
-  RACTOR_ONLY_CATEGORY = "ractor-only"
+  RACTOR_ONLY_CATEGORY = ["ractor-only"].freeze
   RACTOR_CATEGORY = "ractor"
   RACTOR_HARNESS = "harness-ractor"
 
@@ -28,6 +28,7 @@ def initialize(ruby:, ruby_description:, categories:, name_filters:, out_path:,
     @harness = harness
     @pre_init = pre_init ? expand_pre_init(pre_init) : nil
     @no_pinning = no_pinning
+    @ractor_only = (categories == RACTOR_ONLY_CATEGORY)
   end
 
   # Run all the benchmarks and record execution times
@@ -39,7 +40,7 @@ def run
     bench_dir = BENCHMARKS_DIR
     ractor_bench_dir = RACTOR_BENCHMARKS_DIR
 
-    if categories == [RACTOR_ONLY_CATEGORY]
+    if @racktor_only
       bench_dir = ractor_bench_dir
       @harness = RACTOR_HARNESS
       @categories = []

From 97cf092dd3a9bffcdabd485bf265e4c42f8b9ad6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:16:28 +0000
Subject: [PATCH 04/17] Simplify the logic in run by determining benchmark
 directories during initialization

---
 lib/benchmark_suite.rb       | 63 ++++++++++++++++++++----------------
 test/benchmark_suite_test.rb | 48 +++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 28 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 8d643e44..50a87925 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -8,6 +8,7 @@
 require 'yaml'
 require 'rbconfig'
 require_relative 'benchmark_filter'
+require_relative 'benchmark_runner'
 
 # BenchmarkSuite runs a collection of benchmarks and collects their results
 class BenchmarkSuite
@@ -17,7 +18,7 @@ class BenchmarkSuite
   RACTOR_CATEGORY = "ractor"
   RACTOR_HARNESS = "harness-ractor"
 
-  attr_reader :ruby, :ruby_description, :categories, :name_filters, :out_path, :harness, :pre_init, :no_pinning
+  attr_reader :ruby, :ruby_description, :categories, :name_filters, :out_path, :harness, :pre_init, :no_pinning, :bench_dir, :ractor_bench_dir
 
   def initialize(ruby:, ruby_description:, categories:, name_filters:, out_path:, harness:, pre_init: nil, no_pinning: false)
     @ruby = ruby
@@ -29,6 +30,15 @@ def initialize(ruby:, ruby_description:, categories:, name_filters:, out_path:,
     @pre_init = pre_init ? expand_pre_init(pre_init) : nil
     @no_pinning = no_pinning
     @ractor_only = (categories == RACTOR_ONLY_CATEGORY)
+
+    @bench_dir = BENCHMARKS_DIR
+    @ractor_bench_dir = RACTOR_BENCHMARKS_DIR
+
+    if @ractor_only
+      @bench_dir = @ractor_bench_dir
+      @harness = RACTOR_HARNESS
+      @categories = []
+    end
   end
 
   # Run all the benchmarks and record execution times
@@ -37,33 +47,6 @@ def run
     bench_data = {}
     bench_failures = {}
 
-    bench_dir = BENCHMARKS_DIR
-    ractor_bench_dir = RACTOR_BENCHMARKS_DIR
-
-    if @racktor_only
-      bench_dir = ractor_bench_dir
-      @harness = RACTOR_HARNESS
-      @categories = []
-    end
-
-    bench_file_grouping = {}
-
-    # Get the list of benchmark files/directories matching name filters
-    filter = benchmark_filter(categories: categories, name_filters: name_filters)
-    bench_file_grouping[bench_dir] = Dir.children(bench_dir).sort.filter do |entry|
-      filter.match?(entry)
-    end
-
-    if categories == [RACTOR_CATEGORY]
-      # We ignore the category filter here because everything in the
-      # benchmarks-ractor directory should be included when we're benchmarking the
-      # Ractor category
-      ractor_filter = benchmark_filter(categories: [], name_filters: name_filters)
-      bench_file_grouping[ractor_bench_dir] = Dir.children(ractor_bench_dir).sort.filter do |entry|
-        ractor_filter.match?(entry)
-      end
-    end
-
     bench_file_grouping.each do |bench_dir, bench_files|
       bench_files.each_with_index do |entry, idx|
         bench_name = entry.gsub('.rb', '')
@@ -140,6 +123,30 @@ def run
 
   private
 
+  def bench_file_grouping
+    bench_file_grouping = {}
+
+    # Get the list of benchmark files/directories matching name filters
+    filter = benchmark_filter(categories: categories, name_filters: name_filters)
+    bench_file_grouping[bench_dir] = filtered_bench_entries(bench_dir, filter)
+
+    if categories == [RACTOR_CATEGORY]
+      # We ignore the category filter here because everything in the
+      # benchmarks-ractor directory should be included when we're benchmarking the
+      # Ractor category
+      ractor_filter = benchmark_filter(categories: [], name_filters: name_filters)
+      bench_file_grouping[ractor_bench_dir] = filtered_bench_entries(ractor_bench_dir, ractor_filter)
+    end
+
+    bench_file_grouping
+  end
+
+  def filtered_bench_entries(dir, filter)
+    Dir.children(dir).sort.filter do |entry|
+      filter.match?(entry)
+    end
+  end
+
   def benchmark_filter(categories:, name_filters:)
     @benchmark_filter ||= {}
     key = [categories, name_filters]
diff --git a/test/benchmark_suite_test.rb b/test/benchmark_suite_test.rb
index 483c2b5b..ce953b91 100644
--- a/test/benchmark_suite_test.rb
+++ b/test/benchmark_suite_test.rb
@@ -78,6 +78,54 @@
 
       assert_equal true, suite.no_pinning
     end
+
+    it 'sets bench_dir to BENCHMARKS_DIR by default' do
+      suite = BenchmarkSuite.new(
+        ruby: ['ruby'],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['micro'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness'
+      )
+
+      assert_equal 'benchmarks', suite.bench_dir
+      assert_equal 'benchmarks-ractor', suite.ractor_bench_dir
+      assert_equal 'harness', suite.harness
+      assert_equal ['micro'], suite.categories
+    end
+
+    it 'sets bench_dir to ractor directory and updates harness when ractor-only category is used' do
+      suite = BenchmarkSuite.new(
+        ruby: ['ruby'],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['ractor-only'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness'
+      )
+
+      assert_equal 'benchmarks-ractor', suite.bench_dir
+      assert_equal 'benchmarks-ractor', suite.ractor_bench_dir
+      assert_equal 'harness-ractor', suite.harness
+      assert_equal [], suite.categories
+    end
+
+    it 'keeps bench_dir as BENCHMARKS_DIR when ractor category is used' do
+      suite = BenchmarkSuite.new(
+        ruby: ['ruby'],
+        ruby_description: 'ruby 3.2.0',
+        categories: ['ractor'],
+        name_filters: [],
+        out_path: @out_path,
+        harness: 'harness'
+      )
+
+      assert_equal 'benchmarks', suite.bench_dir
+      assert_equal 'benchmarks-ractor', suite.ractor_bench_dir
+      assert_equal 'harness', suite.harness
+      assert_equal ['ractor'], suite.categories
+    end
   end
 
   describe '#run' do

From 18384bc8a07bfc7764a97cd28bf4378a42efd750 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:29:31 +0000
Subject: [PATCH 05/17] Give meaning to the category checking

---
 lib/benchmark_suite.rb | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 50a87925..6a7c2a3e 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -15,7 +15,7 @@ class BenchmarkSuite
   BENCHMARKS_DIR = "benchmarks"
   RACTOR_BENCHMARKS_DIR = "benchmarks-ractor"
   RACTOR_ONLY_CATEGORY = ["ractor-only"].freeze
-  RACTOR_CATEGORY = "ractor"
+  RACTOR_CATEGORY = ["ractor"].freeze
   RACTOR_HARNESS = "harness-ractor"
 
   attr_reader :ruby, :ruby_description, :categories, :name_filters, :out_path, :harness, :pre_init, :no_pinning, :bench_dir, :ractor_bench_dir
@@ -130,7 +130,7 @@ def bench_file_grouping
     filter = benchmark_filter(categories: categories, name_filters: name_filters)
     bench_file_grouping[bench_dir] = filtered_bench_entries(bench_dir, filter)
 
-    if categories == [RACTOR_CATEGORY]
+    if benchmark_ractor_directory?
       # We ignore the category filter here because everything in the
       # benchmarks-ractor directory should be included when we're benchmarking the
       # Ractor category
@@ -141,12 +141,6 @@ def bench_file_grouping
     bench_file_grouping
   end
 
-  def filtered_bench_entries(dir, filter)
-    Dir.children(dir).sort.filter do |entry|
-      filter.match?(entry)
-    end
-  end
-
   def benchmark_filter(categories:, name_filters:)
     @benchmark_filter ||= {}
     key = [categories, name_filters]
@@ -161,6 +155,16 @@ def benchmarks_metadata
     @benchmarks_metadata ||= YAML.load_file('benchmarks.yml')
   end
 
+  def filtered_bench_entries(dir, filter)
+    Dir.children(dir).sort.filter do |entry|
+      filter.match?(entry)
+    end
+  end
+
+  def benchmark_ractor_directory?
+    categories == RACTOR_CATEGORY
+  end
+
   # Check if running on Linux
   def linux?
     RbConfig::CONFIG['host_os'] =~ /linux/

From 51e532c7e4abc36f4c237e573f5ac719c47b65ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:31:37 +0000
Subject: [PATCH 06/17] name_filters is always the same inside the instance

---
 lib/benchmark_suite.rb | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 6a7c2a3e..c6c0e2a5 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -127,24 +127,23 @@ def bench_file_grouping
     bench_file_grouping = {}
 
     # Get the list of benchmark files/directories matching name filters
-    filter = benchmark_filter(categories: categories, name_filters: name_filters)
+    filter = benchmark_filter(categories: categories)
     bench_file_grouping[bench_dir] = filtered_bench_entries(bench_dir, filter)
 
     if benchmark_ractor_directory?
       # We ignore the category filter here because everything in the
       # benchmarks-ractor directory should be included when we're benchmarking the
       # Ractor category
-      ractor_filter = benchmark_filter(categories: [], name_filters: name_filters)
+      ractor_filter = benchmark_filter(categories: [])
       bench_file_grouping[ractor_bench_dir] = filtered_bench_entries(ractor_bench_dir, ractor_filter)
     end
 
     bench_file_grouping
   end
 
-  def benchmark_filter(categories:, name_filters:)
+  def benchmark_filter(categories:)
     @benchmark_filter ||= {}
-    key = [categories, name_filters]
-    @benchmark_filter[key] ||= BenchmarkFilter.new(
+    @benchmark_filter[categories] ||= BenchmarkFilter.new(
       categories: categories,
       name_filters: name_filters,
       metadata: benchmarks_metadata

From b335cbe0a2da644576953c01b8822eda074bb1b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:34:18 +0000
Subject: [PATCH 07/17] Memoize linux check

It is done inside a loop
---
 lib/benchmark_suite.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index c6c0e2a5..5ee3bfa0 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -166,7 +166,7 @@ def benchmark_ractor_directory?
 
   # Check if running on Linux
   def linux?
-    RbConfig::CONFIG['host_os'] =~ /linux/
+    @linux ||= RbConfig::CONFIG['host_os'] =~ /linux/
   end
 
   # Generate setarch prefix for Linux

From cc01d0d58ee04c8d911d2eed766e6951769fbc4d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:43:16 +0000
Subject: [PATCH 08/17] Extract CPU pinning logic into a separate method

This will avoid to calculate it inside the loop.
---
 lib/benchmark_suite.rb | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 5ee3bfa0..5a4e68da 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -65,18 +65,7 @@ def run
         ENV["RESULT_JSON_PATH"] = result_json_path
 
         # Set up the benchmarking command
-        cmd = []
-        if linux?
-          cmd += setarch_prefix
-
-          # Pin the process to one given core to improve caching and reduce variance on CRuby
-          # Other Rubies need to use multiple cores, e.g., for JIT threads
-          if ruby_description.start_with?('ruby ') && !no_pinning
-            # The last few cores of Intel CPU may be slow E-Cores, so avoid using the last one.
-            cpu = [(Etc.nprocessors / 2) - 1, 0].max
-            cmd += ["taskset", "-c", "#{cpu}"]
-          end
-        end
+        cmd = base_cmd
 
         # Fix for jruby/jruby#7394 in JRuby 9.4.2.0
         script_path = File.expand_path(script_path)
@@ -169,6 +158,27 @@ def linux?
     @linux ||= RbConfig::CONFIG['host_os'] =~ /linux/
   end
 
+  # Set up the base command with CPU pinning if needed
+  def base_cmd
+    @base_cmd ||= begin
+      cmd = []
+
+      if linux?
+        cmd += setarch_prefix
+
+        # Pin the process to one given core to improve caching and reduce variance on CRuby
+        # Other Rubies need to use multiple cores, e.g., for JIT threads
+        if ruby_description.start_with?('ruby ') && !no_pinning
+          # The last few cores of Intel CPU may be slow E-Cores, so avoid using the last one.
+          cpu = [(Etc.nprocessors / 2) - 1, 0].max
+          cmd += ["taskset", "-c", "#{cpu}"]
+        end
+      end
+
+      cmd
+    end
+  end
+
   # Generate setarch prefix for Linux
   def setarch_prefix
     # Disable address space randomization (for determinism)

From 027d36c7ecd0c9e109df4af932a6618dc129157e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:43:53 +0000
Subject: [PATCH 09/17] Use delete_suffix instead of gsub to remove file
 extension

---
 lib/benchmark_suite.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 5a4e68da..73be73ac 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -49,7 +49,7 @@ def run
 
     bench_file_grouping.each do |bench_dir, bench_files|
       bench_files.each_with_index do |entry, idx|
-        bench_name = entry.gsub('.rb', '')
+        bench_name = entry.delete_suffix('.rb')
 
         puts("Running benchmark \"#{bench_name}\" (#{idx+1}/#{bench_files.length})")
 

From 4ede817c28c8c49e59514a4297bcae29ef6ffa5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:46:54 +0000
Subject: [PATCH 10/17] Extract benchmark_env since it doesn't change inside
 the loop

---
 lib/benchmark_suite.rb | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 73be73ac..cec4a5a7 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -77,23 +77,8 @@ def run
           script_path,
         ].compact
 
-        # When the Ruby running this script is not the first Ruby in PATH, shell commands
-        # like `bundle install` in a child process will not use the Ruby being benchmarked.
-        # It overrides PATH to guarantee the commands of the benchmarked Ruby will be used.
-        env = {}
-        ruby_path = `#{ruby.shelljoin} -e 'print RbConfig.ruby' 2> #{File::NULL}`
-        if ruby_path != RbConfig.ruby
-          env["PATH"] = "#{File.dirname(ruby_path)}:#{ENV["PATH"]}"
-
-          # chruby sets GEM_HOME and GEM_PATH in your shell. We have to unset it in the child
-          # process to avoid installing gems to the version that is running run_benchmarks.rb.
-          ["GEM_HOME", "GEM_PATH"].each do |var|
-            env[var] = nil if ENV.key?(var)
-          end
-        end
-
         # Do the benchmarking
-        result = BenchmarkRunner.check_call(cmd.shelljoin, env: env, raise_error: false)
+        result = BenchmarkRunner.check_call(cmd.shelljoin, env: benchmark_env, raise_error: false)
 
         if result[:success]
           bench_data[bench_name] = JSON.parse(File.read(result_json_path)).tap do |json|
@@ -112,6 +97,28 @@ def run
 
   private
 
+  def benchmark_env
+    @benchmark_env ||= begin
+      # When the Ruby running this script is not the first Ruby in PATH, shell commands
+      # like `bundle install` in a child process will not use the Ruby being benchmarked.
+      # It overrides PATH to guarantee the commands of the benchmarked Ruby will be used.
+      env = {}
+      ruby_path = `#{ruby.shelljoin} -e 'print RbConfig.ruby' 2> #{File::NULL}`
+
+      if ruby_path != RbConfig.ruby
+        env["PATH"] = "#{File.dirname(ruby_path)}:#{ENV["PATH"]}"
+
+        # chruby sets GEM_HOME and GEM_PATH in your shell. We have to unset it in the child
+        # process to avoid installing gems to the version that is running run_benchmarks.rb.
+        ["GEM_HOME", "GEM_PATH"].each do |var|
+          env[var] = nil if ENV.key?(var)
+        end
+      end
+
+      env
+    end
+  end
+
   def bench_file_grouping
     bench_file_grouping = {}
 

From 312aa93be79bcabde1bdfe562e80998773c67fa0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:49:57 +0000
Subject: [PATCH 11/17] Extract method to run single benchmark

---
 lib/benchmark_suite.rb | 57 ++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index cec4a5a7..b5519c59 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -53,42 +53,17 @@ def run
 
         puts("Running benchmark \"#{bench_name}\" (#{idx+1}/#{bench_files.length})")
 
-        # Path to the benchmark runner script
-        script_path = File.join(bench_dir, entry)
-
-        if !script_path.end_with?('.rb')
-          script_path = File.join(script_path, 'benchmark.rb')
-        end
-
-        # Set up the environment for the benchmarking command
         result_json_path = File.join(out_path, "temp#{Process.pid}.json")
-        ENV["RESULT_JSON_PATH"] = result_json_path
-
-        # Set up the benchmarking command
-        cmd = base_cmd
-
-        # Fix for jruby/jruby#7394 in JRuby 9.4.2.0
-        script_path = File.expand_path(script_path)
-
-        cmd += [
-          *ruby,
-          "-I", harness,
-          *pre_init,
-          script_path,
-        ].compact
-
-        # Do the benchmarking
-        result = BenchmarkRunner.check_call(cmd.shelljoin, env: benchmark_env, raise_error: false)
+        result = run_single_benchmark(bench_dir, entry, result_json_path)
 
         if result[:success]
           bench_data[bench_name] = JSON.parse(File.read(result_json_path)).tap do |json|
-            json["command_line"] = cmd.shelljoin
+            json["command_line"] = result[:command]
             File.unlink(result_json_path)
           end
         else
           bench_failures[bench_name] = result[:status].exitstatus
         end
-
       end
     end
 
@@ -97,6 +72,34 @@ def run
 
   private
 
+  def run_single_benchmark(bench_dir, entry, result_json_path)
+    # Path to the benchmark runner script
+    script_path = File.join(bench_dir, entry)
+
+    if !script_path.end_with?('.rb')
+      script_path = File.join(script_path, 'benchmark.rb')
+    end
+
+    # Fix for jruby/jruby#7394 in JRuby 9.4.2.0
+    script_path = File.expand_path(script_path)
+
+    # Set up the environment for the benchmarking command
+    ENV["RESULT_JSON_PATH"] = result_json_path
+
+    # Set up the benchmarking command
+    cmd = base_cmd + [
+      *ruby,
+      "-I", harness,
+      *pre_init,
+      script_path,
+    ].compact
+
+    # Do the benchmarking
+    result = BenchmarkRunner.check_call(cmd.shelljoin, env: benchmark_env, raise_error: false)
+    result[:command] = cmd.shelljoin
+    result
+  end
+
   def benchmark_env
     @benchmark_env ||= begin
       # When the Ruby running this script is not the first Ruby in PATH, shell commands

From 223c21d578556814541fb030fd1d67bf44c507d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:50:44 +0000
Subject: [PATCH 12/17] Use unless instead of if !

---
 lib/benchmark_suite.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index b5519c59..0db2c890 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -76,7 +76,7 @@ def run_single_benchmark(bench_dir, entry, result_json_path)
     # Path to the benchmark runner script
     script_path = File.join(bench_dir, entry)
 
-    if !script_path.end_with?('.rb')
+    unless script_path.end_with?('.rb')
       script_path = File.join(script_path, 'benchmark.rb')
     end
 

From 613ca2be205986e346c649b48f6a7064a605a1ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:54:09 +0000
Subject: [PATCH 13/17] Extract benchmark result processing

---
 lib/benchmark_suite.rb | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 0db2c890..9ea92832 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -57,10 +57,7 @@ def run
         result = run_single_benchmark(bench_dir, entry, result_json_path)
 
         if result[:success]
-          bench_data[bench_name] = JSON.parse(File.read(result_json_path)).tap do |json|
-            json["command_line"] = result[:command]
-            File.unlink(result_json_path)
-          end
+          bench_data[bench_name] = process_benchmark_result(result_json_path, result[:command])
         else
           bench_failures[bench_name] = result[:status].exitstatus
         end
@@ -72,6 +69,13 @@ def run
 
   private
 
+  def process_benchmark_result(result_json_path, command)
+    JSON.parse(File.read(result_json_path)).tap do |json|
+      json["command_line"] = command
+      File.unlink(result_json_path)
+    end
+  end
+
   def run_single_benchmark(bench_dir, entry, result_json_path)
     # Path to the benchmark runner script
     script_path = File.join(bench_dir, entry)

From 281ece3c066be1593db7b6936b50621c139deb34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:56:03 +0000
Subject: [PATCH 14/17] Simplify memoization of benchmark filters in
 BenchmarkSuite

---
 lib/benchmark_suite.rb | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 9ea92832..af7447ef 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -127,32 +127,37 @@ def benchmark_env
   end
 
   def bench_file_grouping
-    bench_file_grouping = {}
+    grouping = {}
 
     # Get the list of benchmark files/directories matching name filters
-    filter = benchmark_filter(categories: categories)
-    bench_file_grouping[bench_dir] = filtered_bench_entries(bench_dir, filter)
+    grouping[bench_dir] = filtered_bench_entries(bench_dir, main_benchmark_filter)
 
     if benchmark_ractor_directory?
       # We ignore the category filter here because everything in the
       # benchmarks-ractor directory should be included when we're benchmarking the
       # Ractor category
-      ractor_filter = benchmark_filter(categories: [])
-      bench_file_grouping[ractor_bench_dir] = filtered_bench_entries(ractor_bench_dir, ractor_filter)
+      grouping[ractor_bench_dir] = filtered_bench_entries(ractor_bench_dir, ractor_benchmark_filter)
     end
 
-    bench_file_grouping
+    grouping
   end
 
-  def benchmark_filter(categories:)
-    @benchmark_filter ||= {}
-    @benchmark_filter[categories] ||= BenchmarkFilter.new(
+  def main_benchmark_filter
+    @main_benchmark_filter ||= BenchmarkFilter.new(
       categories: categories,
       name_filters: name_filters,
       metadata: benchmarks_metadata
     )
   end
 
+  def ractor_benchmark_filter
+    @ractor_benchmark_filter ||= BenchmarkFilter.new(
+      categories: [],
+      name_filters: name_filters,
+      metadata: benchmarks_metadata
+    )
+  end
+
   def benchmarks_metadata
     @benchmarks_metadata ||= YAML.load_file('benchmarks.yml')
   end

From f38b0f695a39dafd4b2c078c70159e461108f544 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 03:58:34 +0000
Subject: [PATCH 15/17] Build the hash with values since there is no
 conditional

---
 lib/benchmark_suite.rb | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index af7447ef..5235645d 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -127,10 +127,7 @@ def benchmark_env
   end
 
   def bench_file_grouping
-    grouping = {}
-
-    # Get the list of benchmark files/directories matching name filters
-    grouping[bench_dir] = filtered_bench_entries(bench_dir, main_benchmark_filter)
+    grouping = { bench_dir => filtered_bench_entries(bench_dir, main_benchmark_filter) }
 
     if benchmark_ractor_directory?
       # We ignore the category filter here because everything in the

From b572f109fea41117f3e6edd2ebb83c2c390cf0f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:02:52 +0000
Subject: [PATCH 16/17] Simplify base_cmd

---
 lib/benchmark_suite.rb | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index 5235645d..e9b0a763 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -176,22 +176,20 @@ def linux?
 
   # Set up the base command with CPU pinning if needed
   def base_cmd
-    @base_cmd ||= begin
-      cmd = []
-
-      if linux?
-        cmd += setarch_prefix
-
-        # Pin the process to one given core to improve caching and reduce variance on CRuby
-        # Other Rubies need to use multiple cores, e.g., for JIT threads
-        if ruby_description.start_with?('ruby ') && !no_pinning
-          # The last few cores of Intel CPU may be slow E-Cores, so avoid using the last one.
-          cpu = [(Etc.nprocessors / 2) - 1, 0].max
-          cmd += ["taskset", "-c", "#{cpu}"]
-        end
+    @base_cmd ||= if linux?
+      cmd = setarch_prefix
+
+      # Pin the process to one given core to improve caching and reduce variance on CRuby
+      # Other Rubies need to use multiple cores, e.g., for JIT threads
+      if ruby_description.start_with?('ruby ') && !no_pinning
+        # The last few cores of Intel CPU may be slow E-Cores, so avoid using the last one.
+        cpu = [(Etc.nprocessors / 2) - 1, 0].max
+        cmd.concat(["taskset", "-c", "#{cpu}"])
       end
 
       cmd
+    else
+      []
     end
   end
 

From fe8e2ebfda4ba36a24fe20af168707755e6d215a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:04:18 +0000
Subject: [PATCH 17/17] Extract directory setup to a private method

---
 lib/benchmark_suite.rb | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/lib/benchmark_suite.rb b/lib/benchmark_suite.rb
index e9b0a763..83cc8bb0 100644
--- a/lib/benchmark_suite.rb
+++ b/lib/benchmark_suite.rb
@@ -31,14 +31,7 @@ def initialize(ruby:, ruby_description:, categories:, name_filters:, out_path:,
     @no_pinning = no_pinning
     @ractor_only = (categories == RACTOR_ONLY_CATEGORY)
 
-    @bench_dir = BENCHMARKS_DIR
-    @ractor_bench_dir = RACTOR_BENCHMARKS_DIR
-
-    if @ractor_only
-      @bench_dir = @ractor_bench_dir
-      @harness = RACTOR_HARNESS
-      @categories = []
-    end
+    setup_benchmark_directories
   end
 
   # Run all the benchmarks and record execution times
@@ -69,6 +62,18 @@ def run
 
   private
 
+  def setup_benchmark_directories
+    if @ractor_only
+      @bench_dir = RACTOR_BENCHMARKS_DIR
+      @ractor_bench_dir = RACTOR_BENCHMARKS_DIR
+      @harness = RACTOR_HARNESS
+      @categories = []
+    else
+      @bench_dir = BENCHMARKS_DIR
+      @ractor_bench_dir = RACTOR_BENCHMARKS_DIR
+    end
+  end
+
   def process_benchmark_result(result_json_path, command)
     JSON.parse(File.read(result_json_path)).tap do |json|
       json["command_line"] = command