From d049075be5f35ac08782ca33be529c37b47b98a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:06:32 +0000
Subject: [PATCH 01/12] Only loop in `args.executables` once

---
 run_benchmarks.rb | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/run_benchmarks.rb b/run_benchmarks.rb
index f4991686..a010da5e 100755
--- a/run_benchmarks.rb
+++ b/run_benchmarks.rb
@@ -36,15 +36,14 @@ def sort_benchmarks(bench_names)
 FileUtils.mkdir_p(args.out_path)
 
 ruby_descriptions = {}
-args.executables.each do |name, executable|
-  ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp
-end
 
 # Benchmark with and without YJIT
 bench_start_time = Time.now.to_f
 bench_data = {}
 bench_failures = {}
 args.executables.each do |name, executable|
+  ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp
+
   suite = BenchmarkSuite.new(
     ruby: executable,
     ruby_description: ruby_descriptions[name],

From 74b7d89bd430c911843caf0e45093a5ee643bcb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:22:54 +0000
Subject: [PATCH 02/12] Extract class to build the results table from benchmark
 data

---
 lib/results_table_builder.rb       | 104 ++++++++++++++++
 run_benchmarks.rb                  |  65 ++--------
 test/results_table_builder_test.rb | 192 +++++++++++++++++++++++++++++
 3 files changed, 305 insertions(+), 56 deletions(-)
 create mode 100644 lib/results_table_builder.rb
 create mode 100644 test/results_table_builder_test.rb

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
new file mode 100644
index 00000000..ec0de6af
--- /dev/null
+++ b/lib/results_table_builder.rb
@@ -0,0 +1,104 @@
+require_relative '../misc/stats'
+
+class ResultsTableBuilder
+  def initialize(executable_names:, bench_data:, bench_names:, include_rss: false)
+    @executable_names = executable_names
+    @bench_data = bench_data
+    @bench_names = bench_names
+    @include_rss = include_rss
+    @base_name = executable_names.first
+    @other_names = executable_names[1..]
+  end
+
+  def build
+    table = [build_header]
+    format = build_format
+
+    @bench_names.each do |bench_name|
+      # Skip this bench_name if we failed to get data for any of the executables.
+      next unless @bench_data.all? { |(_k, v)| v[bench_name] }
+
+      row = build_row(bench_name)
+      table << row
+    end
+
+    [table, format]
+  end
+
+  private
+
+  def build_header
+    header = ["bench"]
+    
+    @executable_names.each do |name|
+      header += ["#{name} (ms)", "stddev (%)"]
+      header += ["RSS (MiB)"] if @include_rss
+    end
+    
+    @other_names.each do |name|
+      header += ["#{name} 1st itr"]
+    end
+    
+    @other_names.each do |name|
+      header += ["#{@base_name}/#{name}"]
+    end
+    
+    header
+  end
+
+  def build_format
+    format = ["%s"]
+    
+    @executable_names.each do |_name|
+      format += ["%.1f", "%.1f"]
+      format += ["%.1f"] if @include_rss
+    end
+    
+    @other_names.each do |_name|
+      format += ["%.3f"]
+    end
+    
+    @other_names.each do |_name|
+      format += ["%.3f"]
+    end
+    
+    format
+  end
+
+  def build_row(bench_name)
+    t0s = @executable_names.map { |name| (bench_data_for(name, bench_name)['warmup'][0] || bench_data_for(name, bench_name)['bench'][0]) * 1000.0 }
+    times_no_warmup = @executable_names.map { |name| bench_data_for(name, bench_name)['bench'].map { |v| v * 1000.0 } }
+    rsss = @executable_names.map { |name| bench_data_for(name, bench_name)['rss'] / 1024.0 / 1024.0 }
+
+    base_t0, *other_t0s = t0s
+    base_t, *other_ts = times_no_warmup
+    base_rss, *other_rsss = rsss
+
+    ratio_1sts = other_t0s.map { |other_t0| base_t0 / other_t0 }
+    ratios = other_ts.map { |other_t| mean(base_t) / mean(other_t) }
+
+    row = [bench_name, mean(base_t), 100 * stddev(base_t) / mean(base_t)]
+    row << base_rss if @include_rss
+    
+    other_ts.zip(other_rsss).each do |other_t, other_rss|
+      row += [mean(other_t), 100 * stddev(other_t) / mean(other_t)]
+      row << other_rss if @include_rss
+    end
+
+    row += ratio_1sts + ratios
+
+    row
+  end
+
+  def bench_data_for(name, bench_name)
+    @bench_data[name][bench_name]
+  end
+
+  def mean(values)
+    Stats.new(values).mean
+  end
+
+  def stddev(values)
+    Stats.new(values).stddev
+  end
+end
diff --git a/run_benchmarks.rb b/run_benchmarks.rb
index a010da5e..f72f1a40 100755
--- a/run_benchmarks.rb
+++ b/run_benchmarks.rb
@@ -8,20 +8,12 @@
 require 'rbconfig'
 require 'etc'
 require 'yaml'
-require_relative 'misc/stats'
 require_relative 'lib/cpu_config'
 require_relative 'lib/benchmark_runner'
 require_relative 'lib/benchmark_suite'
 require_relative 'lib/table_formatter'
 require_relative 'lib/argument_parser'
-
-def mean(values)
-  Stats.new(values).mean
-end
-
-def stddev(values)
-  Stats.new(values).stddev
-end
+require_relative 'lib/results_table_builder'
 
 def sort_benchmarks(bench_names)
   benchmarks_metadata = YAML.load_file('benchmarks.yml')
@@ -72,55 +64,16 @@ def sort_benchmarks(bench_names)
 
 puts
 
-# Table for the data we've gathered
+# Build results table
 all_names = args.executables.keys
 base_name, *other_names = all_names
-table  = [["bench"]]
-format = ["%s"]
-all_names.each do |name|
-  table[0] += ["#{name} (ms)", "stddev (%)"]
-  format   += ["%.1f",         "%.1f"]
-  if args.rss
-    table[0] += ["RSS (MiB)"]
-    format   += ["%.1f"]
-  end
-end
-other_names.each do |name|
-  table[0] += ["#{name} 1st itr"]
-  format   += ["%.3f"]
-end
-other_names.each do |name|
-  table[0] += ["#{base_name}/#{name}"]
-  format   += ["%.3f"]
-end
-
-# Format the results table
-bench_names.each do |bench_name|
-  # Skip this bench_name if we failed to get data for any of the executables.
-  next unless bench_data.all? { |(_k, v)| v[bench_name] }
-
-  t0s = all_names.map { |name| (bench_data[name][bench_name]['warmup'][0] || bench_data[name][bench_name]['bench'][0]) * 1000.0 }
-  times_no_warmup = all_names.map { |name| bench_data[name][bench_name]['bench'].map { |v| v * 1000.0 } }
-  rsss = all_names.map { |name| bench_data[name][bench_name]['rss'] / 1024.0 / 1024.0 }
-
-  base_t0, *other_t0s = t0s
-  base_t, *other_ts = times_no_warmup
-  base_rss, *other_rsss = rsss
-
-  ratio_1sts = other_t0s.map { |other_t0| base_t0 / other_t0 }
-  ratios = other_ts.map { |other_t| mean(base_t) / mean(other_t) }
-
-  row = [bench_name, mean(base_t), 100 * stddev(base_t) / mean(base_t)]
-  row << base_rss if args.rss
-  other_ts.zip(other_rsss).each do |other_t, other_rss|
-    row += [mean(other_t), 100 * stddev(other_t) / mean(other_t)]
-    row << other_rss if args.rss
-  end
-
-  row += ratio_1sts + ratios
-
-  table << row
-end
+builder = ResultsTableBuilder.new(
+  executable_names: all_names,
+  bench_data: bench_data,
+  bench_names: bench_names,
+  include_rss: args.rss
+)
+table, format = builder.build
 
 output_path = nil
 if args.out_override
diff --git a/test/results_table_builder_test.rb b/test/results_table_builder_test.rb
new file mode 100644
index 00000000..bf3f2828
--- /dev/null
+++ b/test/results_table_builder_test.rb
@@ -0,0 +1,192 @@
+require_relative 'test_helper'
+require_relative '../lib/results_table_builder'
+
+describe ResultsTableBuilder do
+  describe '#build' do
+    it 'builds a table with header and data rows' do
+      executable_names = ['ruby', 'ruby-yjit']
+      bench_data = {
+        'ruby' => {
+          'fib' => {
+            'warmup' => [0.1],
+            'bench' => [0.1, 0.11, 0.09],
+            'rss' => 1024 * 1024 * 10
+          }
+        },
+        'ruby-yjit' => {
+          'fib' => {
+            'warmup' => [0.05],
+            'bench' => [0.05, 0.06, 0.04],
+            'rss' => 1024 * 1024 * 12
+          }
+        }
+      }
+      bench_names = ['fib']
+
+      builder = ResultsTableBuilder.new(
+        executable_names: executable_names,
+        bench_data: bench_data,
+        bench_names: bench_names,
+        include_rss: false
+      )
+
+      table, format = builder.build
+
+      assert_equal ['bench', 'ruby (ms)', 'stddev (%)', 'ruby-yjit (ms)', 'stddev (%)', 'ruby-yjit 1st itr', 'ruby/ruby-yjit'], table[0]
+
+      assert_equal ['%s', '%.1f', '%.1f', '%.1f', '%.1f', '%.3f', '%.3f'], format
+
+      assert_equal 'fib', table[1][0]
+      assert_in_delta 100.0, table[1][1], 1.0
+      assert_in_delta 50.0, table[1][3], 1.0
+      assert_in_delta 2.0, table[1][5], 0.1
+      assert_in_delta 2.0, table[1][6], 0.1
+    end
+
+    it 'includes RSS columns when include_rss is true' do
+      executable_names = ['ruby']
+      bench_data = {
+        'ruby' => {
+          'fib' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          }
+        }
+      }
+      bench_names = ['fib']
+
+      builder = ResultsTableBuilder.new(
+        executable_names: executable_names,
+        bench_data: bench_data,
+        bench_names: bench_names,
+        include_rss: true
+      )
+
+      table, format = builder.build
+
+      assert_equal ['bench', 'ruby (ms)', 'stddev (%)', 'RSS (MiB)'], table[0]
+
+      assert_equal ['%s', '%.1f', '%.1f', '%.1f'], format
+
+      assert_in_delta 10.0, table[1][3], 0.1
+    end
+
+    it 'skips benchmarks with missing data' do
+      executable_names = ['ruby', 'ruby-yjit']
+      bench_data = {
+        'ruby' => {
+          'fib' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          },
+          'loop' => {
+            'warmup' => [0.2],
+            'bench' => [0.2],
+            'rss' => 1024 * 1024 * 10
+          }
+        },
+        'ruby-yjit' => {
+          'fib' => {
+            'warmup' => [0.05],
+            'bench' => [0.05],
+            'rss' => 1024 * 1024 * 12
+          }
+        }
+      }
+      bench_names = ['fib', 'loop']
+
+      builder = ResultsTableBuilder.new(
+        executable_names: executable_names,
+        bench_data: bench_data,
+        bench_names: bench_names,
+        include_rss: false
+      )
+
+      table, _format = builder.build
+
+      assert_equal 2, table.length
+      assert_equal 'fib', table[1][0]
+    end
+
+    it 'handles multiple executables correctly' do
+      executable_names = ['ruby', 'ruby-yjit', 'ruby-rjit']
+      bench_data = {
+        'ruby' => {
+          'fib' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          }
+        },
+        'ruby-yjit' => {
+          'fib' => {
+            'warmup' => [0.05],
+            'bench' => [0.05],
+            'rss' => 1024 * 1024 * 12
+          }
+        },
+        'ruby-rjit' => {
+          'fib' => {
+            'warmup' => [0.07],
+            'bench' => [0.07],
+            'rss' => 1024 * 1024 * 11
+          }
+        }
+      }
+      bench_names = ['fib']
+
+      builder = ResultsTableBuilder.new(
+        executable_names: executable_names,
+        bench_data: bench_data,
+        bench_names: bench_names,
+        include_rss: false
+      )
+
+      table, format = builder.build
+
+      expected_header = [
+        'bench',
+        'ruby (ms)', 'stddev (%)',
+        'ruby-yjit (ms)', 'stddev (%)',
+        'ruby-rjit (ms)', 'stddev (%)',
+        'ruby-yjit 1st itr',
+        'ruby-rjit 1st itr',
+        'ruby/ruby-yjit',
+        'ruby/ruby-rjit'
+      ]
+      assert_equal expected_header, table[0]
+
+      expected_format = ['%s', '%.1f', '%.1f', '%.1f', '%.1f', '%.1f', '%.1f', '%.3f', '%.3f', '%.3f', '%.3f']
+      assert_equal expected_format, format
+    end
+
+    it 'uses bench data when warmup is missing' do
+      executable_names = ['ruby']
+      bench_data = {
+        'ruby' => {
+          'fib' => {
+            'warmup' => [],
+            'bench' => [0.1, 0.11],
+            'rss' => 1024 * 1024 * 10
+          }
+        }
+      }
+      bench_names = ['fib']
+
+      builder = ResultsTableBuilder.new(
+        executable_names: executable_names,
+        bench_data: bench_data,
+        bench_names: bench_names,
+        include_rss: false
+      )
+
+      table, _format = builder.build
+
+      assert_equal 2, table.length
+      assert_equal 'fib', table[1][0]
+      assert_in_delta 100.0, table[1][1], 5.0
+    end
+  end
+end

From 145f8ecc7f8dda74b40dd719ba1fb2f9465e59da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:25:00 +0000
Subject: [PATCH 03/12] Extract methods to explain what the data means

---
 lib/results_table_builder.rb | 43 ++++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index ec0de6af..5fcbd3ca 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -29,46 +29,46 @@ def build
 
   def build_header
     header = ["bench"]
-    
+
     @executable_names.each do |name|
       header += ["#{name} (ms)", "stddev (%)"]
       header += ["RSS (MiB)"] if @include_rss
     end
-    
+
     @other_names.each do |name|
       header += ["#{name} 1st itr"]
     end
-    
+
     @other_names.each do |name|
       header += ["#{@base_name}/#{name}"]
     end
-    
+
     header
   end
 
   def build_format
     format = ["%s"]
-    
+
     @executable_names.each do |_name|
       format += ["%.1f", "%.1f"]
       format += ["%.1f"] if @include_rss
     end
-    
+
     @other_names.each do |_name|
       format += ["%.3f"]
     end
-    
+
     @other_names.each do |_name|
       format += ["%.3f"]
     end
-    
+
     format
   end
 
   def build_row(bench_name)
-    t0s = @executable_names.map { |name| (bench_data_for(name, bench_name)['warmup'][0] || bench_data_for(name, bench_name)['bench'][0]) * 1000.0 }
-    times_no_warmup = @executable_names.map { |name| bench_data_for(name, bench_name)['bench'].map { |v| v * 1000.0 } }
-    rsss = @executable_names.map { |name| bench_data_for(name, bench_name)['rss'] / 1024.0 / 1024.0 }
+    t0s = extract_first_iteration_times(bench_name)
+    times_no_warmup = extract_benchmark_times(bench_name)
+    rsss = extract_rss_values(bench_name)
 
     base_t0, *other_t0s = t0s
     base_t, *other_ts = times_no_warmup
@@ -79,7 +79,7 @@ def build_row(bench_name)
 
     row = [bench_name, mean(base_t), 100 * stddev(base_t) / mean(base_t)]
     row << base_rss if @include_rss
-    
+
     other_ts.zip(other_rsss).each do |other_t, other_rss|
       row += [mean(other_t), 100 * stddev(other_t) / mean(other_t)]
       row << other_rss if @include_rss
@@ -90,6 +90,25 @@ def build_row(bench_name)
     row
   end
 
+  def extract_first_iteration_times(bench_name)
+    @executable_names.map do |name|
+      data = bench_data_for(name, bench_name)
+      (data['warmup'][0] || data['bench'][0]) * 1000.0
+    end
+  end
+
+  def extract_benchmark_times(bench_name)
+    @executable_names.map do |name|
+      bench_data_for(name, bench_name)['bench'].map { |v| v * 1000.0 }
+    end
+  end
+
+  def extract_rss_values(bench_name)
+    @executable_names.map do |name|
+      bench_data_for(name, bench_name)['rss'] / 1024.0 / 1024.0
+    end
+  end
+
   def bench_data_for(name, bench_name)
     @bench_data[name][bench_name]
   end

From f59d1eb796e9d4dc9f7c9fff8e1d94a22b85ca73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:28:38 +0000
Subject: [PATCH 04/12] Extract methods to build columns

---
 lib/results_table_builder.rb | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index 5fcbd3ca..164c7fc2 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -77,17 +77,27 @@ def build_row(bench_name)
     ratio_1sts = other_t0s.map { |other_t0| base_t0 / other_t0 }
     ratios = other_ts.map { |other_t| mean(base_t) / mean(other_t) }
 
-    row = [bench_name, mean(base_t), 100 * stddev(base_t) / mean(base_t)]
+    row = [bench_name]
+    build_base_columns(row, base_t, base_rss)
+    build_comparison_columns(row, other_ts, other_rsss)
+    row.concat(ratio_1sts)
+    row.concat(ratios)
+
+    row
+  end
+
+  def build_base_columns(row, base_t, base_rss)
+    row << mean(base_t)
+    row << 100 * stddev(base_t) / mean(base_t)
     row << base_rss if @include_rss
+  end
 
+  def build_comparison_columns(row, other_ts, other_rsss)
     other_ts.zip(other_rsss).each do |other_t, other_rss|
-      row += [mean(other_t), 100 * stddev(other_t) / mean(other_t)]
+      row << mean(other_t)
+      row << 100 * stddev(other_t) / mean(other_t)
       row << other_rss if @include_rss
     end
-
-    row += ratio_1sts + ratios
-
-    row
   end
 
   def extract_first_iteration_times(bench_name)

From ab2f6da668b4c7a882fe454ab66b52acaf9aa491 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:30:07 +0000
Subject: [PATCH 05/12] Extract method to build the ratio columns

---
 lib/results_table_builder.rb | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index 164c7fc2..fa322dd1 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -74,14 +74,10 @@ def build_row(bench_name)
     base_t, *other_ts = times_no_warmup
     base_rss, *other_rsss = rsss
 
-    ratio_1sts = other_t0s.map { |other_t0| base_t0 / other_t0 }
-    ratios = other_ts.map { |other_t| mean(base_t) / mean(other_t) }
-
     row = [bench_name]
     build_base_columns(row, base_t, base_rss)
     build_comparison_columns(row, other_ts, other_rsss)
-    row.concat(ratio_1sts)
-    row.concat(ratios)
+    build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
 
     row
   end
@@ -100,6 +96,13 @@ def build_comparison_columns(row, other_ts, other_rsss)
     end
   end
 
+  def build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
+    ratio_1sts = other_t0s.map { |other_t0| base_t0 / other_t0 }
+    ratios = other_ts.map { |other_t| mean(base_t) / mean(other_t) }
+    row.concat(ratio_1sts)
+    row.concat(ratios)
+  end
+
   def extract_first_iteration_times(bench_name)
     @executable_names.map do |name|
       data = bench_data_for(name, bench_name)

From 2f37643756e337e3418e8a7991312ac9c2ec3b49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:31:05 +0000
Subject: [PATCH 06/12] Extract constants to explain the convertions

---
 lib/results_table_builder.rb | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index fa322dd1..320778bd 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -1,6 +1,9 @@
 require_relative '../misc/stats'
 
 class ResultsTableBuilder
+  SECONDS_TO_MS = 1000.0
+  BYTES_TO_MIB = 1024.0 * 1024.0
+
   def initialize(executable_names:, bench_data:, bench_names:, include_rss: false)
     @executable_names = executable_names
     @bench_data = bench_data
@@ -106,19 +109,19 @@ def build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
   def extract_first_iteration_times(bench_name)
     @executable_names.map do |name|
       data = bench_data_for(name, bench_name)
-      (data['warmup'][0] || data['bench'][0]) * 1000.0
+      (data['warmup'][0] || data['bench'][0]) * SECONDS_TO_MS
     end
   end
 
   def extract_benchmark_times(bench_name)
     @executable_names.map do |name|
-      bench_data_for(name, bench_name)['bench'].map { |v| v * 1000.0 }
+      bench_data_for(name, bench_name)['bench'].map { |v| v * SECONDS_TO_MS }
     end
   end
 
   def extract_rss_values(bench_name)
     @executable_names.map do |name|
-      bench_data_for(name, bench_name)['rss'] / 1024.0 / 1024.0
+      bench_data_for(name, bench_name)['rss'] / BYTES_TO_MIB
     end
   end
 

From b5493e17b28e25ae5763b2a861ae96ed7c1daef2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:31:58 +0000
Subject: [PATCH 07/12] Extract method to calculate stddev percentage

---
 lib/results_table_builder.rb | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index 320778bd..264efc30 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -87,14 +87,14 @@ def build_row(bench_name)
 
   def build_base_columns(row, base_t, base_rss)
     row << mean(base_t)
-    row << 100 * stddev(base_t) / mean(base_t)
+    row << stddev_percent(base_t)
     row << base_rss if @include_rss
   end
 
   def build_comparison_columns(row, other_ts, other_rsss)
     other_ts.zip(other_rsss).each do |other_t, other_rss|
       row << mean(other_t)
-      row << 100 * stddev(other_t) / mean(other_t)
+      row << stddev_percent(other_t)
       row << other_rss if @include_rss
     end
   end
@@ -136,4 +136,8 @@ def mean(values)
   def stddev(values)
     Stats.new(values).stddev
   end
+
+  def stddev_percent(values)
+    100 * stddev(values) / mean(values)
+  end
 end

From 2eeb2ffb3e50d4dd4a1b47727297a529f1862b76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:33:44 +0000
Subject: [PATCH 08/12] Avoid extra array allocations

---
 lib/results_table_builder.rb | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index 264efc30..41b6db5a 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -34,16 +34,16 @@ def build_header
     header = ["bench"]
 
     @executable_names.each do |name|
-      header += ["#{name} (ms)", "stddev (%)"]
-      header += ["RSS (MiB)"] if @include_rss
+      header << "#{name} (ms)" << "stddev (%)"
+      header << "RSS (MiB)" if @include_rss
     end
 
     @other_names.each do |name|
-      header += ["#{name} 1st itr"]
+      header << "#{name} 1st itr"
     end
 
     @other_names.each do |name|
-      header += ["#{@base_name}/#{name}"]
+      header << "#{@base_name}/#{name}"
     end
 
     header
@@ -53,16 +53,16 @@ def build_format
     format = ["%s"]
 
     @executable_names.each do |_name|
-      format += ["%.1f", "%.1f"]
-      format += ["%.1f"] if @include_rss
+      format << "%.1f" << "%.1f"
+      format << "%.1f" if @include_rss
     end
 
     @other_names.each do |_name|
-      format += ["%.3f"]
+      format << "%.3f"
     end
 
     @other_names.each do |_name|
-      format += ["%.3f"]
+      format << "%.3f"
     end
 
     format

From fad55f88ff6f1157088f588ae1ce457a18ade203 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 04:35:57 +0000
Subject: [PATCH 09/12] Extract method to explain the skipping logic

---
 lib/results_table_builder.rb | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index 41b6db5a..ac47fcdb 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -18,8 +18,7 @@ def build
     format = build_format
 
     @bench_names.each do |bench_name|
-      # Skip this bench_name if we failed to get data for any of the executables.
-      next unless @bench_data.all? { |(_k, v)| v[bench_name] }
+      next unless has_complete_data?(bench_name)
 
       row = build_row(bench_name)
       table << row
@@ -30,6 +29,10 @@ def build
 
   private
 
+  def has_complete_data?(bench_name)
+    @bench_data.all? { |(_k, v)| v[bench_name] }
+  end
+
   def build_header
     header = ["bench"]
 

From 03fafdf250ad67db3a0bcdb1dae2957b19964375 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Tue, 18 Nov 2025 05:12:49 +0000
Subject: [PATCH 10/12] Inline method in the only object that is using it

---
 lib/benchmark_runner.rb            |  10 --
 lib/results_table_builder.rb       |  22 +++-
 run_benchmarks.rb                  |   9 --
 test/benchmark_runner_test.rb      |  51 --------
 test/results_table_builder_test.rb | 199 +++++++++++++++++++++++++++--
 5 files changed, 209 insertions(+), 82 deletions(-)

diff --git a/lib/benchmark_runner.rb b/lib/benchmark_runner.rb
index e3ea26de..6333cbae 100644
--- a/lib/benchmark_runner.rb
+++ b/lib/benchmark_runner.rb
@@ -16,16 +16,6 @@ def free_file_no(directory)
     end
   end
 
-  # Sort benchmarks with headlines first, then others, then micro
-  def sort_benchmarks(bench_names, metadata)
-    headline_benchmarks = metadata.select { |_, meta| meta['category'] == 'headline' }.keys
-    micro_benchmarks = metadata.select { |_, meta| meta['category'] == 'micro' }.keys
-
-    headline_names, bench_names = bench_names.partition { |name| headline_benchmarks.include?(name) }
-    micro_names, other_names = bench_names.partition { |name| micro_benchmarks.include?(name) }
-    headline_names.sort + other_names.sort + micro_names.sort
-  end
-
   # Checked system - error or return info if the command fails
   def check_call(command, env: {}, raise_error: true, quiet: false)
     puts("+ #{command}") unless quiet
diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index ac47fcdb..8d481791 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -1,16 +1,17 @@
 require_relative '../misc/stats'
+require 'yaml'
 
 class ResultsTableBuilder
   SECONDS_TO_MS = 1000.0
   BYTES_TO_MIB = 1024.0 * 1024.0
 
-  def initialize(executable_names:, bench_data:, bench_names:, include_rss: false)
+  def initialize(executable_names:, bench_data:, include_rss: false)
     @executable_names = executable_names
     @bench_data = bench_data
-    @bench_names = bench_names
     @include_rss = include_rss
     @base_name = executable_names.first
     @other_names = executable_names[1..]
+    @bench_names = compute_bench_names
   end
 
   def build
@@ -143,4 +144,21 @@ def stddev(values)
   def stddev_percent(values)
     100 * stddev(values) / mean(values)
   end
+
+  def compute_bench_names
+    # Get keys from all rows in case a benchmark failed for only some executables
+    all_bench_names = @bench_data.map { |k, v| v.keys }.flatten.uniq
+    benchmarks_metadata = YAML.load_file('benchmarks.yml')
+    sort_benchmarks(all_bench_names, benchmarks_metadata)
+  end
+
+  # Sort benchmarks with headlines first, then others, then micro
+  def sort_benchmarks(bench_names, metadata)
+    headline_benchmarks = metadata.select { |_, meta| meta['category'] == 'headline' }.keys
+    micro_benchmarks = metadata.select { |_, meta| meta['category'] == 'micro' }.keys
+
+    headline_names, bench_names = bench_names.partition { |name| headline_benchmarks.include?(name) }
+    micro_names, other_names = bench_names.partition { |name| micro_benchmarks.include?(name) }
+    headline_names.sort + other_names.sort + micro_names.sort
+  end
 end
diff --git a/run_benchmarks.rb b/run_benchmarks.rb
index f72f1a40..e85cc897 100755
--- a/run_benchmarks.rb
+++ b/run_benchmarks.rb
@@ -15,11 +15,6 @@
 require_relative 'lib/argument_parser'
 require_relative 'lib/results_table_builder'
 
-def sort_benchmarks(bench_names)
-  benchmarks_metadata = YAML.load_file('benchmarks.yml')
-  BenchmarkRunner.sort_benchmarks(bench_names, benchmarks_metadata)
-end
-
 args = ArgumentParser.parse(ARGV)
 
 CPUConfig.configure_for_benchmarking(turbo: args.turbo)
@@ -52,9 +47,6 @@ def sort_benchmarks(bench_names)
 end
 
 bench_end_time = Time.now.to_f
-# Get keys from all rows in case a benchmark failed for only some executables.
-bench_names = sort_benchmarks(bench_data.map { |k, v| v.keys }.flatten.uniq)
-
 bench_total_time = (bench_end_time - bench_start_time).to_i
 puts("Total time spent benchmarking: #{bench_total_time}s")
 
@@ -70,7 +62,6 @@ def sort_benchmarks(bench_names)
 builder = ResultsTableBuilder.new(
   executable_names: all_names,
   bench_data: bench_data,
-  bench_names: bench_names,
   include_rss: args.rss
 )
 table, format = builder.build
diff --git a/test/benchmark_runner_test.rb b/test/benchmark_runner_test.rb
index 20a7b615..ef450323 100644
--- a/test/benchmark_runner_test.rb
+++ b/test/benchmark_runner_test.rb
@@ -49,57 +49,6 @@
     end
   end
 
-  describe '.sort_benchmarks' do
-    before do
-      @metadata = {
-        'fib' => { 'category' => 'micro' },
-        'railsbench' => { 'category' => 'headline' },
-        'optcarrot' => { 'category' => 'headline' },
-        'some_bench' => { 'category' => 'other' },
-        'another_bench' => { 'category' => 'other' },
-        'zebra' => { 'category' => 'other' }
-      }
-    end
-
-    it 'sorts benchmarks with headlines first, then others, then micro' do
-      bench_names = ['fib', 'some_bench', 'railsbench', 'another_bench', 'optcarrot']
-      result = BenchmarkRunner.sort_benchmarks(bench_names, @metadata)
-
-      # Headlines should be first
-      headline_indices = [result.index('railsbench'), result.index('optcarrot')]
-      assert_equal true, headline_indices.all? { |i| i < 2 }
-
-      # Micro should be last
-      assert_equal 'fib', result.last
-
-      # Others in the middle
-      other_indices = [result.index('some_bench'), result.index('another_bench')]
-      assert_equal true, other_indices.all? { |i| i >= 2 && i < result.length - 1 }
-    end
-
-    it 'sorts alphabetically within categories' do
-      bench_names = ['zebra', 'another_bench', 'some_bench']
-      result = BenchmarkRunner.sort_benchmarks(bench_names, @metadata)
-      assert_equal ['another_bench', 'some_bench', 'zebra'], result
-    end
-
-    it 'handles empty list' do
-      result = BenchmarkRunner.sort_benchmarks([], @metadata)
-      assert_equal [], result
-    end
-
-    it 'handles single benchmark' do
-      result = BenchmarkRunner.sort_benchmarks(['fib'], @metadata)
-      assert_equal ['fib'], result
-    end
-
-    it 'handles only headline benchmarks' do
-      bench_names = ['railsbench', 'optcarrot']
-      result = BenchmarkRunner.sort_benchmarks(bench_names, @metadata)
-      assert_equal ['optcarrot', 'railsbench'], result
-    end
-  end
-
   describe '.check_call' do
     it 'runs a successful command and returns success status' do
       result = nil
diff --git a/test/results_table_builder_test.rb b/test/results_table_builder_test.rb
index bf3f2828..f5b811fc 100644
--- a/test/results_table_builder_test.rb
+++ b/test/results_table_builder_test.rb
@@ -1,7 +1,33 @@
 require_relative 'test_helper'
 require_relative '../lib/results_table_builder'
+require 'yaml'
+require 'tmpdir'
 
 describe ResultsTableBuilder do
+  before do
+    @original_dir = Dir.pwd
+    @temp_dir = Dir.mktmpdir
+    Dir.chdir(@temp_dir)
+
+    benchmarks_metadata = {
+      'fib' => { 'category' => 'micro' },
+      'loop' => { 'category' => 'micro' },
+      'railsbench' => { 'category' => 'headline' },
+      'optcarrot' => { 'category' => 'headline' },
+      'zebra' => { 'category' => 'other' },
+      'apple' => { 'category' => 'other' },
+      'mango' => { 'category' => 'other' },
+      'some_bench' => { 'category' => 'other' },
+      'another_bench' => { 'category' => 'other' }
+    }
+    File.write('benchmarks.yml', YAML.dump(benchmarks_metadata))
+  end
+
+  after do
+    Dir.chdir(@original_dir)
+    FileUtils.rm_rf(@temp_dir)
+  end
+
   describe '#build' do
     it 'builds a table with header and data rows' do
       executable_names = ['ruby', 'ruby-yjit']
@@ -21,12 +47,10 @@
           }
         }
       }
-      bench_names = ['fib']
 
       builder = ResultsTableBuilder.new(
         executable_names: executable_names,
         bench_data: bench_data,
-        bench_names: bench_names,
         include_rss: false
       )
 
@@ -54,12 +78,10 @@
           }
         }
       }
-      bench_names = ['fib']
 
       builder = ResultsTableBuilder.new(
         executable_names: executable_names,
         bench_data: bench_data,
-        bench_names: bench_names,
         include_rss: true
       )
 
@@ -95,12 +117,10 @@
           }
         }
       }
-      bench_names = ['fib', 'loop']
 
       builder = ResultsTableBuilder.new(
         executable_names: executable_names,
         bench_data: bench_data,
-        bench_names: bench_names,
         include_rss: false
       )
 
@@ -135,12 +155,10 @@
           }
         }
       }
-      bench_names = ['fib']
 
       builder = ResultsTableBuilder.new(
         executable_names: executable_names,
         bench_data: bench_data,
-        bench_names: bench_names,
         include_rss: false
       )
 
@@ -173,12 +191,10 @@
           }
         }
       }
-      bench_names = ['fib']
 
       builder = ResultsTableBuilder.new(
         executable_names: executable_names,
         bench_data: bench_data,
-        bench_names: bench_names,
         include_rss: false
       )
 
@@ -188,5 +204,168 @@
       assert_equal 'fib', table[1][0]
       assert_in_delta 100.0, table[1][1], 5.0
     end
+
+    it 'sorts benchmarks with headlines first, then others, then micro' do
+      executable_names = ['ruby']
+      bench_data = {
+        'ruby' => {
+          'fib' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          },
+          'loop' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          },
+          'railsbench' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          },
+          'optcarrot' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          }
+        }
+      }
+
+      builder = ResultsTableBuilder.new(
+        executable_names: executable_names,
+        bench_data: bench_data,
+        include_rss: false
+      )
+
+      table, _format = builder.build
+
+      bench_names = table[1..].map { |row| row[0] }
+
+      assert_equal 'optcarrot', bench_names[0]
+      assert_equal 'railsbench', bench_names[1]
+
+      assert_equal 'fib', bench_names[2]
+      assert_equal 'loop', bench_names[3]
+    end
+
+    it 'sorts benchmarks alphabetically within other category' do
+      executable_names = ['ruby']
+      bench_data = {
+        'ruby' => {
+          'zebra' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          },
+          'apple' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          },
+          'mango' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          }
+        }
+      }
+
+      builder = ResultsTableBuilder.new(
+        executable_names: executable_names,
+        bench_data: bench_data,
+        include_rss: false
+      )
+
+      table, _format = builder.build
+
+      bench_names = table[1..].map { |row| row[0] }
+
+      assert_equal ['apple', 'mango', 'zebra'], bench_names
+    end
+
+    it 'handles single benchmark' do
+      executable_names = ['ruby']
+      bench_data = {
+        'ruby' => {
+          'fib' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          }
+        }
+      }
+
+      builder = ResultsTableBuilder.new(
+        executable_names: executable_names,
+        bench_data: bench_data,
+        include_rss: false
+      )
+
+      table, _format = builder.build
+
+      assert_equal 2, table.length
+      assert_equal 'fib', table[1][0]
+    end
+
+    it 'handles only headline benchmarks' do
+      executable_names = ['ruby']
+      bench_data = {
+        'ruby' => {
+          'railsbench' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          },
+          'optcarrot' => {
+            'warmup' => [0.1],
+            'bench' => [0.1],
+            'rss' => 1024 * 1024 * 10
+          }
+        }
+      }
+
+      builder = ResultsTableBuilder.new(
+        executable_names: executable_names,
+        bench_data: bench_data,
+        include_rss: false
+      )
+
+      table, _format = builder.build
+
+      bench_names = table[1..].map { |row| row[0] }
+
+      assert_equal ['optcarrot', 'railsbench'], bench_names
+    end
+
+    it 'sorts mixed categories correctly with multiple benchmarks' do
+      executable_names = ['ruby']
+      bench_data = {
+        'ruby' => {
+          'fib' => { 'warmup' => [0.1], 'bench' => [0.1], 'rss' => 1024 * 1024 * 10 },
+          'some_bench' => { 'warmup' => [0.1], 'bench' => [0.1], 'rss' => 1024 * 1024 * 10 },
+          'railsbench' => { 'warmup' => [0.1], 'bench' => [0.1], 'rss' => 1024 * 1024 * 10 },
+          'another_bench' => { 'warmup' => [0.1], 'bench' => [0.1], 'rss' => 1024 * 1024 * 10 },
+          'optcarrot' => { 'warmup' => [0.1], 'bench' => [0.1], 'rss' => 1024 * 1024 * 10 }
+        }
+      }
+
+      builder = ResultsTableBuilder.new(
+        executable_names: executable_names,
+        bench_data: bench_data,
+        include_rss: false
+      )
+
+      table, _format = builder.build
+      bench_names = table[1..].map { |row| row[0] }
+
+      assert_equal 'optcarrot', bench_names[0]
+      assert_equal 'railsbench', bench_names[1]
+
+      assert_equal 'another_bench', bench_names[2]
+      assert_equal 'some_bench', bench_names[3]
+
+      assert_equal 'fib', bench_names[4]
+    end
   end
 end

From c357a8745748e31ce2a249f9293184474c436a69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Fri, 21 Nov 2025 18:23:14 +0000
Subject: [PATCH 11/12] Simplify benchmark sorting by using sort_by with
 category priority

---
 lib/results_table_builder.rb | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index 8d481791..f95250a0 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -154,11 +154,15 @@ def compute_bench_names
 
   # Sort benchmarks with headlines first, then others, then micro
   def sort_benchmarks(bench_names, metadata)
-    headline_benchmarks = metadata.select { |_, meta| meta['category'] == 'headline' }.keys
-    micro_benchmarks = metadata.select { |_, meta| meta['category'] == 'micro' }.keys
+    bench_names.sort_by { |name| [category_priority(name, metadata), name] }
+  end
 
-    headline_names, bench_names = bench_names.partition { |name| headline_benchmarks.include?(name) }
-    micro_names, other_names = bench_names.partition { |name| micro_benchmarks.include?(name) }
-    headline_names.sort + other_names.sort + micro_names.sort
+  def category_priority(bench_name, metadata)
+    category = metadata.dig(bench_name, 'category') || 'other'
+    case category
+    when 'headline' then 0
+    when 'micro' then 2
+    else 1
+    end
   end
 end

From 54071c684588a90e7a1669d6b28e593862c2495f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafael=20Mendon=C3=A7a=20Fran=C3=A7a?=
 <rafael.franca@shopify.com>
Date: Fri, 21 Nov 2025 18:28:30 +0000
Subject: [PATCH 12/12] Improve benchmark name extraction

Use Ruby methods to make the logic easier to understand.
---
 lib/results_table_builder.rb | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/results_table_builder.rb b/lib/results_table_builder.rb
index f95250a0..6bbc53f7 100644
--- a/lib/results_table_builder.rb
+++ b/lib/results_table_builder.rb
@@ -146,10 +146,12 @@ def stddev_percent(values)
   end
 
   def compute_bench_names
-    # Get keys from all rows in case a benchmark failed for only some executables
-    all_bench_names = @bench_data.map { |k, v| v.keys }.flatten.uniq
     benchmarks_metadata = YAML.load_file('benchmarks.yml')
-    sort_benchmarks(all_bench_names, benchmarks_metadata)
+    sort_benchmarks(all_benchmark_names, benchmarks_metadata)
+  end
+
+  def all_benchmark_names
+    @bench_data.values.flat_map(&:keys).uniq
   end
 
   # Sort benchmarks with headlines first, then others, then micro