Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -779,14 +779,15 @@ the two samples, which is common in benchmark scenarios.

### Enabling T-Test Mode

Enable t-test mode with `ttest: true`. This automatically sets `repeatSuite=30` to collect enough
independent samples for reliable statistical analysis (per the Central Limit Theorem):
Enable t-test mode with `ttest: true`. Requires 30 independent samples for reliable statistical analysis (per the
Central Limit Theorem):

```js
const { Suite } = require('bench-node');

const suite = new Suite({
ttest: true, // Enables t-test and auto-sets repeatSuite=30
ttest: true, // Enables t-test, which requires 30, preferably 40 samples for statistical significance
minSamples: 40 // sample count is >= minSamples x repeatSuite
});

suite
Expand Down
Binary file added examples/.DS_Store
Binary file not shown.
3 changes: 2 additions & 1 deletion examples/statistical-significance/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ Enable t-test mode with `ttest: true`:
const { Suite } = require('bench-node');

const suite = new Suite({
ttest: true, // Automatically sets repeatSuite=30
ttest: true,
minSamples: 30, // minSamples x repeatSuite must be > 30
});

suite.add('baseline', { baseline: true }, () => {
Expand Down
1 change: 1 addition & 0 deletions examples/statistical-significance/node.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ const { Suite } = require('../../lib');
// Enable t-test mode - this automatically sets repeatSuite=30 for all benchmarks
const suite = new Suite({
ttest: true,
minSamples: 30
});

// Baseline: Simple array sum using for loop
Expand Down
10 changes: 7 additions & 3 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ const defaultBenchOptions = {
};

// Minimum repeatSuite runs required for reliable t-test results
const MIN_REPEAT_FOR_TTEST = 30;
const MIN_SAMPLES_FOR_TTEST = 30;

function throwIfNoNativesSyntax() {
if (process.execArgv.includes("--allow-natives-syntax") === false) {
Expand Down Expand Up @@ -180,8 +180,6 @@ class Suite {
if (options.repeatSuite !== undefined) {
validateNumber(options.repeatSuite, "options.repeatSuite", 1);
repeatSuite = options.repeatSuite;
} else if (this.#ttest) {
repeatSuite = MIN_REPEAT_FOR_TTEST;
}
this.#repeatSuite = repeatSuite;

Expand Down Expand Up @@ -231,6 +229,12 @@ class Suite {
throw new Error("There is already a baseline benchmark");
}

if (this.#ttest && (options.minSamples * options.repeatSuite < MIN_SAMPLES_FOR_TTEST)) {
process.emitWarning(
`The benchmark "${name}" may not have enough samples to run t-test analysis. Please set minSamples x repeatSuite >= ${MIN_SAMPLES_FOR_TTEST}`,
);
}

const benchmark = new Benchmark(
name,
fn,
Expand Down
9 changes: 7 additions & 2 deletions lib/utils/analyze.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,9 @@ function analyze(results, sorted = true, options = {}) {
}

if (ttest) {
const resultSamples = result.opsSecPerRun;
const baselineSamplesForTest = baselineResult.opsSecPerRun;
const resultSamples = result.histogram?.sampleData ?? [];
const baselineSamplesForTest =
baselineResult.histogram?.sampleData ?? [];

if (
baselineSamplesForTest?.length >= 30 &&
Expand All @@ -62,6 +63,10 @@ function analyze(results, sorted = true, options = {}) {
confidence: ttestResult.confidence,
stars: ttestResult.stars,
};
} else {
result.significanceTest = {
significant: false
}
}
}
}
Expand Down
60 changes: 29 additions & 31 deletions test/ttest.js
Original file line number Diff line number Diff line change
Expand Up @@ -238,13 +238,13 @@ describe("T-Test Integration with analyze", () => {
assert.strictEqual(testResult.significanceTest, undefined);
});

it("should include significanceTest when ttest is true and opsSecPerRun >= 30", () => {
// Generate 30+ opsSecPerRun samples (from repeatSuite)
const baselineOpsSecPerRun = Array.from(
it("should include significanceTest when ttest is true and samples >= 30", () => {
// Generate 30+ samples
const baselineSampleData = Array.from(
{ length: 30 },
(_, i) => 100 + (i % 3) - 1,
);
const testOpsSecPerRun = Array.from(
const testSampleData = Array.from(
{ length: 30 },
(_, i) => 200 + (i % 3) - 1,
);
Expand All @@ -254,12 +254,12 @@ describe("T-Test Integration with analyze", () => {
name: "baseline",
opsSec: 100,
baseline: true,
opsSecPerRun: baselineOpsSecPerRun,
histogram: { sampleData: baselineSampleData },
},
{
name: "test",
opsSec: 200,
opsSecPerRun: testOpsSecPerRun,
histogram: { sampleData: testSampleData },
},
];

Expand All @@ -272,7 +272,7 @@ describe("T-Test Integration with analyze", () => {
assert.ok(typeof testResult.significanceTest.confidence === "string");
});

it("should not include significanceTest without opsSecPerRun", () => {
it("should mark significanceTest as failed without samples", () => {
const results = [
{
name: "baseline",
Expand All @@ -288,39 +288,37 @@ describe("T-Test Integration with analyze", () => {
const analyzed = analyze(results, true, { ttest: true });
const testResult = analyzed.find((r) => r.name === "test");

// Should not throw, and significanceTest should not be set (no opsSecPerRun)
assert.strictEqual(testResult.significanceTest, undefined);
assert.deepEqual(testResult.significanceTest, { significant: false});
});

it("should not include significanceTest when opsSecPerRun < 30", () => {
it("should not include significanceTest when samples < 30", () => {
const results = [
{
name: "baseline",
opsSec: 100,
baseline: true,
opsSecPerRun: Array.from({ length: 10 }, () => 100),
histogram: { samples: Array.from({ length: 10 }, () => 100) },
},
{
name: "test",
opsSec: 200,
opsSecPerRun: Array.from({ length: 10 }, () => 200),
histogram: { samples: Array.from({ length: 10 }, () => 200) },
},
];

const analyzed = analyze(results, true, { ttest: true });
const testResult = analyzed.find((r) => r.name === "test");

// Should not throw, and significanceTest should not be set (not enough samples)
assert.strictEqual(testResult.significanceTest, undefined);
assert.deepEqual(testResult.significanceTest, { significant: false});
});

it("should detect significant difference between clearly different benchmarks", () => {
// Generate 30+ opsSecPerRun with clearly different means
const baselineOpsSecPerRun = Array.from(
// Generate 30+ samples with clearly different means
const baselineSampleData = Array.from(
{ length: 30 },
(_, i) => 100 + (i % 5) - 2,
);
const fastOpsSecPerRun = Array.from(
const fastSampleData = Array.from(
{ length: 30 },
(_, i) => 200 + (i % 5) - 2,
);
Expand All @@ -330,12 +328,12 @@ describe("T-Test Integration with analyze", () => {
name: "baseline",
opsSec: 100,
baseline: true,
opsSecPerRun: baselineOpsSecPerRun,
histogram: { sampleData: baselineSampleData },
},
{
name: "fast",
opsSec: 200,
opsSecPerRun: fastOpsSecPerRun,
histogram: { sampleData: fastSampleData },
},
];

Expand All @@ -348,12 +346,12 @@ describe("T-Test Integration with analyze", () => {

it("should not mark as significant when differences are within noise", () => {
// Same benchmark run twice - should have similar results with high variance overlap
// Generate 30+ opsSecPerRun with overlapping distributions
const baselineOpsSecPerRun = Array.from(
// Generate 30+ samples with overlapping distributions
const baselineSampleData = Array.from(
{ length: 30 },
(_, i) => 100 + ((i % 5) - 2) * 2,
);
const similarOpsSecPerRun = Array.from(
const similarSampleData = Array.from(
{ length: 30 },
(_, i) => 101 + ((i % 5) - 2) * 2,
);
Expand All @@ -363,12 +361,12 @@ describe("T-Test Integration with analyze", () => {
name: "baseline",
opsSec: 100,
baseline: true,
opsSecPerRun: baselineOpsSecPerRun,
histogram: { sampleData: baselineSampleData },
},
{
name: "similar",
opsSec: 101, // Very close to baseline
opsSecPerRun: similarOpsSecPerRun,
histogram: { sampleData: similarSampleData },
},
];

Expand All @@ -384,18 +382,18 @@ describe("Statistical significance requires repeatSuite >= 30", () => {
const { analyze } = require("../lib/utils/analyze");

it("should only compute significance when repeatSuite provides 30+ samples", () => {
// With 30+ opsSecPerRun, significance should be computed
// With 30+ samples, significance should be computed
const results = [
{
name: "baseline",
opsSec: 100,
baseline: true,
opsSecPerRun: Array.from({ length: 30 }, () => 100),
histogram: { sampleData: Array.from({ length: 30 }, () => 100) },
},
{
name: "test",
opsSec: 200,
opsSecPerRun: Array.from({ length: 30 }, () => 200),
histogram: { sampleData: Array.from({ length: 30 }, () => 200) },
},
];

Expand All @@ -406,24 +404,24 @@ describe("Statistical significance requires repeatSuite >= 30", () => {
});

it("should not compute significance when repeatSuite < 30", () => {
// With fewer than 30 opsSecPerRun, significance should not be computed
// With fewer than 30 samples, significance should not be computed
const results = [
{
name: "baseline",
opsSec: 100,
baseline: true,
opsSecPerRun: Array.from({ length: 10 }, () => 100),
histogram: { sampleData: Array.from({ length: 10 }, () => 100) },
},
{
name: "test",
opsSec: 200,
opsSecPerRun: Array.from({ length: 10 }, () => 200),
histogram: { sampleData: Array.from({ length: 10 }, () => 200) },
},
];

const analyzed = analyze(results, true, { ttest: true });
const testResult = analyzed.find((r) => r.name === "test");

assert.strictEqual(testResult.significanceTest, undefined);
assert.deepEqual(testResult.significanceTest, { significant: false});
});
});
Loading