Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Benchmarks/Aiur.lean
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def proveBench : IO $ Array BenchReport := do
let funIdx := toplevel.getFuncIdx `main |>.get!
bgroup "nat_fib" [
bench "prove fib 10" (Aiur.AiurSystem.prove system friParameters funIdx #[10]) default,
]
] { samplingMode := .flat, sampleTime := 10, throughput := .some (Throughput.bytes 1) }

def verifyBench : IO $ Array BenchReport := do
match toplevel.checkAndSimplify with
Expand All @@ -124,5 +124,6 @@ def verifyBench : IO $ Array BenchReport := do
bench "verify fib 10" (Aiur.AiurSystem.verify system friParameters claim) proof
]

-- Test
def main (_args : List String) : IO Unit := do
let _result ← proveBench
9 changes: 6 additions & 3 deletions Benchmarks/Blake3.lean
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ import Ix.Aiur.Compile
import Ix.Aiur.Protocol
import Ix.Benchmark.Bench

abbrev dataSizes := #[64, 128, 256, 512, 1024, 2048]
abbrev numHashesPerProof := #[1, 2, 4, 8, 16, 32]
-- abbrev dataSizes := #[64, 128, 256, 512, 1024, 2048]
-- abbrev numHashesPerProof := #[1, 2, 4, 8, 16, 32]
abbrev dataSizes := #[1, 2, 3, 4, 5]
abbrev numHashesPerProof := #[1]

def commitmentParameters : Aiur.CommitmentParameters := {
logBlowup := 1
Expand Down Expand Up @@ -40,7 +42,8 @@ def blake3Bench : IO $ Array BenchReport := do
data := ioBuffer.data ++ data
map := ioBuffer.map.insert #[.ofNat idx] ioKeyInfo }
benches := benches.push <| bench s!"dataSize={dataSize} numHashes={numHashes}" (aiurSystem.prove friParameters funIdx #[Aiur.G.ofNat numHashes]) ioBuffer
bgroup "prove blake3" benches.toList { oneShot := true }
bgroup "prove blake3" benches.toList { oneShot := false, throughput := .some (Throughput.bytes 100) }
-- bgroup "prove blake3" benches.toList { oneShot := false }

def parseFunction (words : List String) (param : String): Option String :=
words.find? (·.startsWith param) |> .map (·.stripPrefix param)
Expand Down
161 changes: 95 additions & 66 deletions Ix/Benchmark/Bench.lean
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import Batteries

import Ix.Benchmark.Serde
import Ix.Benchmark.Tukey
import Ix.Benchmark.Throughput

open Batteries (RBMap)

Expand All @@ -31,7 +32,7 @@ structure BenchGroup where
name : String
config : Config

inductive BenchFn ( α β : Type) where
inductive BenchFn (α β : Type) where
| pure (fn : α → β)
| io (fn : α → IO β)

Expand Down Expand Up @@ -78,8 +79,8 @@ def printRunning (config : Config) (expectedTime : Float) (numIters : Nat) (warn
IO.eprintln s!"Warning: Unable to complete {config.numSamples} samples in {config.sampleTime.floatPretty 2}s. You may wish to increase target time to {expectedTime.floatPretty 2}s"
IO.println s!"Running {config.numSamples} samples in {expectedTime.floatPretty 2}s ({numIters.natPretty} iterations)\n"

-- Core sampling loop that runs the benchmark function and returns the array of measured timings
def runSample (sampleIters : Array Nat) (bench : Benchmarkable α β) : IO (Array Nat) := do
-- Core sampling loop that runs the benchmark function for a fixed (if flat) or linearly increasing (if linear) number of iterations per sample, and returns the final dataset as the array of measured times for each sample
def runSamples (sampleIters : Array Nat) (bench : Benchmarkable α β) : IO (Array Nat) := do
let func := bench.getFn
let mut timings : Array Nat := #[]
for iters in sampleIters do
Expand All @@ -92,9 +93,9 @@ def runSample (sampleIters : Array Nat) (bench : Benchmarkable α β) : IO (Arra

-- TODO: Recommend sample count if expectedTime >>> bg.config.sampleTime (i.e. itersPerSample == 1)
/--
Runs the sample as a sequence of constant iterations per data point, where the iteration count attempts to fit into the configurable `sampleTime` but cannot be less than 1.
Runs the benchmark samples as a sequence of constant iterations per data point, where the iteration count attempts to fit into the configurable `sampleTime` but cannot be less than 1.

Returns the iteration counts and elapsed time per data point.
Returns the iteration counts and elapsed time per sample.
-/
def BenchGroup.sampleFlat (bg : BenchGroup) (bench : Benchmarkable α β)
(warmupMean : Float) : IO (Array Nat × Array Nat) := do
Expand All @@ -106,11 +107,11 @@ def BenchGroup.sampleFlat (bg : BenchGroup) (bench : Benchmarkable α β)
printRunning bg.config expectedTime totalIters itersPerSample
--IO.println s!"Flat sample. Iters per sample: {itersPerSample}"
let sampleIters := Array.replicate bg.config.numSamples itersPerSample
let timings ← runSample sampleIters bench
let timings ← runSamples sampleIters bench
return (sampleIters, timings)

/--
Runs the sample as a sequence of linearly increasing iterations [d, 2d, 3d, ..., Nd] where `N` is the total number of samples and `d` is a factor derived from the warmup iteration time.
Runs the benchmarks samples as a sequence of linearly increasing iterations [d, 2d, 3d, ..., Nd] where `N` is the total number of samples and `d` is a factor derived from the warmup iteration time.

The sum of this series should be roughly equivalent to the total `sampleTime`.

Expand All @@ -126,22 +127,14 @@ def BenchGroup.sampleLinear (bg : BenchGroup) (bench : Benchmarkable α β) (war
let sampleIters := (Array.range bg.config.numSamples).map (fun x => (x + 1) * d)
printRunning bg.config expectedTime sampleIters.sum d
--IO.println s!"Linear sample. Iters increase by a factor of {d} per sample"
let timings ← runSample sampleIters bench
let timings ← runSamples sampleIters bench
return (sampleIters, timings)

def BenchGroup.sample (bg : BenchGroup) (bench : Benchmarkable α β) (warmupMean : Float) : IO (Array Nat × Array Nat) := do
match bg.config.samplingMode with
| .flat => bg.sampleFlat bench warmupMean
| .linear => bg.sampleLinear bench warmupMean

-- TODO
inductive Throughput where
| Bytes (n : UInt64)
| BytesDecimal (n : UInt64)
| Elements (n : UInt64)
| Bits (n : UInt64)
deriving Repr

structure MeasurementData where
data : Data
avgTimes : Distribution
Expand Down Expand Up @@ -197,45 +190,68 @@ def compareToThreshold (estimate : Estimate) (noiseThreshold : Float) : Comparis
let (lb, ub) := (ci.lowerBound, ci.upperBound)
let noiseNeg := noiseThreshold.neg

if lb < noiseNeg && ub < noiseNeg
then
if lb < noiseNeg && ub < noiseNeg then
ComparisonResult.Improved
else if lb > noiseThreshold && ub > noiseThreshold
then
else if lb > noiseThreshold && ub > noiseThreshold then
ComparisonResult.Regressed
else
ComparisonResult.NonSignificant


-- TODO: Print ~24 whitespace characters before time, change, regression
-- TODO: Does color of change percents depend on positive T-test?
def BenchGroup.printResults (bg : BenchGroup) (benchName : String) (m : MeasurementData) : IO Unit := do
let estimates := m.absoluteEstimates
let typicalEstimate := estimates.slope.getD estimates.mean
IO.println s!"{bg.name}/{benchName}"
let lb := typicalEstimate.confidenceInterval.lowerBound.formatNanos
let pointEst := typicalEstimate.pointEstimate.formatNanos
let ub := typicalEstimate.confidenceInterval.upperBound.formatNanos
IO.println s!"time: [{lb} {pointEst} {ub}]"
if let some comp := m.comparison
then
let title := green s!"{bg.name}/{benchName}"
let indentWidth := 24
let indent := String.mk <| List.replicate indentWidth ' '
IO.print title
-- Print time on separate line from title if title is >22 chars long
if title.length + 2 > indentWidth then
IO.print s!"\n{indent}"
else
IO.print <| indent.drop title.length
let lb := typicalEstimate.confidenceInterval.lowerBound
let pointEst := typicalEstimate.pointEstimate
let ub := typicalEstimate.confidenceInterval.upperBound
println! "time: [{lb.formatNanos} {bold pointEst.formatNanos} {ub.formatNanos}]"
let thrpts : Option Throughput :=
bg.config.throughput.map (fun thrpt =>
match thrpt with
| .bytes b =>
thrptsFromBps b.toFloat lb pointEst ub
| .elements _e => ⟨ 1.0, 2.0, 3.0, 4.0 ⟩)
if let some ts := thrpts then
let (lbStr, pointEstStr, ubStr) :=
(formatThrpt ts.lowerBound, formatThrpt ts.pointEstimate, formatThrpt ts.upperBound)
println! "{indent}thrpt: [{lbStr} {bold pointEstStr} {ubStr}]"

if let some comp := m.comparison then
let diffMean := comp.pValue > comp.significanceThreshold
let meanEst := comp.relativeEstimates.mean
let pointEst := (meanEst.pointEstimate * 100).floatPretty 4
let lb := (meanEst.confidenceInterval.lowerBound * 100).floatPretty 4
let ub := (meanEst.confidenceInterval.upperBound * 100).floatPretty 4
let symbol := if diffMean then ">" else "<"
IO.println s!"change: [{lb}% {pointEst}% {ub}%] (p = {comp.pValue.floatPretty 2} {symbol} {comp.significanceThreshold.floatPretty 2})"
let explanation := if diffMean
then
"No change in performance detected"
let (explanation, color) := if diffMean then
("No change in performance detected", false)
else
match compareToThreshold meanEst comp.noiseThreshold with
| ComparisonResult.Improved => "Performance has improved"
| ComparisonResult.Regressed => "Performance has regressed"
| ComparisonResult.NonSignificant => "Change within noise threshold"
IO.println explanation
IO.println ""
m.avgTimes.tukey
| ComparisonResult.Improved => (s!"Performance has {green "improved"}", true)
| ComparisonResult.Regressed => (s!"Performance has {red "regressed"}", true)
| ComparisonResult.NonSignificant => ("Change within noise threshold", false)
let (lb, pointEst, ub) := meanEst.formatPercents color
let symbol := if diffMean then ">" else "<"
let changeStr := "change:"
if let some thrptsNew := thrpts then
let changeIndent := indent.drop changeStr.length
IO.println <| changeIndent ++ changeStr
println! "{indent}time: [{lb} {pointEst} {ub}] (p = {comp.pValue.floatPretty 2} {symbol} {comp.significanceThreshold.floatPretty 2})"
let base := comp.baseEstimates
let typical := base.slope.getD base.mean
let thrptsBase := thrptsFromBps thrptsNew.size typical.confidenceInterval.lowerBound typical.pointEstimate typical.confidenceInterval.upperBound

let (lbStr, pointEstStr, ubStr) := thrptsNew.formatPercents thrptsBase color
println! "{indent}thrpt: [{lbStr}, {pointEstStr}, {ubStr}]"
else
IO.println s!"{indent}{changeStr} [{lb} {pointEst} {ub}] (p = {comp.pValue.floatPretty 2} {symbol} {comp.significanceThreshold.floatPretty 2})"
IO.println <| indent ++ explanation
m.avgTimes.runTukey

-- TODO: Write sampling mode and config in `sample.json` for comparison
def saveComparison (groupName : String) (benchName : String) (comparison : ComparisonData) (config : Config) : IO Unit := do
Expand All @@ -261,7 +277,6 @@ def saveMeasurement (groupName : String) (benchName : String) (mData : Measureme
let fileExt := toString config.serde
if let some compData := mData.comparison then saveComparison groupName benchName compData config
else
println! "No compdata"
let (newEstimatesFile, newSampleFile) := (newPath / s!"estimates.{fileExt}", newPath / s!"sample.{fileExt}")
moveBaseFile newSampleFile baseDir
moveBaseFile newEstimatesFile baseDir
Expand Down Expand Up @@ -305,17 +320,17 @@ def getColumnWidths' (maxWidths : ColumnWidths) (row: BenchReport) : ColumnWidth
let function := if fnLen > maxWidths.function then fnLen else maxWidths.function
let newBenchLen := row.newBench.getTime.formatNanos.length
let newBench := if newBenchLen > maxWidths.newBench then newBenchLen else maxWidths.newBench
let baseBench := if let some baseBench := row.baseBench then
let baseBenchLen := baseBench.getTime.formatNanos.length
if baseBenchLen > maxWidths.baseBench then baseBenchLen
else maxWidths.baseBench
else maxWidths.baseBench
let percentChange := if let some percentChange := row.percentChange then
let percentChangeStr := percentChangeToString percentChange
let percentLen := percentChangeStr.length
if percentLen > maxWidths.percentChange then percentLen
else maxWidths.percentChange
else maxWidths.percentChange
let baseBench :=
row.baseBench.elim maxWidths.baseBench (fun baseBench =>
let baseBenchLen := baseBench.getTime.formatNanos.length
if baseBenchLen > maxWidths.baseBench then baseBenchLen
else maxWidths.baseBench)
let percentChange :=
row.percentChange.elim maxWidths.percentChange (fun percentChange =>
let percentChangeStr := percentChangeToString percentChange
let percentLen := percentChangeStr.length
if percentLen > maxWidths.percentChange then percentLen
else maxWidths.percentChange)
{ function, newBench, baseBench, percentChange }

-- Gets the max lengths of each data type for pretty-printing columns
Expand All @@ -342,13 +357,11 @@ def mkReportPretty' (columnWidths : ColumnWidths) (reportPretty : String) (row :
let functionStr := padWhitespace row.function columnWidths.function
let newBenchStr := row.newBench.getTime.formatNanos
let newBenchStr := padWhitespace newBenchStr columnWidths.newBench
let baseBenchStr := if let some baseBench := row.baseBench then
baseBench.getTime.formatNanos
else "None"
let baseBenchStr :=
row.baseBench.elim "None" (·.getTime.formatNanos)
let baseBenchStr := padWhitespace baseBenchStr columnWidths.baseBench
let percentChangeStr := if let some percentChange := row.percentChange then
percentChangeToString percentChange
else "N/A"
let percentChangeStr :=
row.percentChange.elim "N/A" (percentChangeToString ·)
let percentChangeStr := padWhitespace percentChangeStr columnWidths.percentChange
let rowPretty :=
s!"| {functionStr} | {newBenchStr} | {baseBenchStr} | {percentChangeStr} |\n"
Expand Down Expand Up @@ -384,7 +397,25 @@ def oneShotBench {α β : Type} (groupName : String) (bench: Benchmarkable α β
let _res ← bench.getFn bench.arg
let finish ← IO.monoNanosNow
let benchTime := finish - start
IO.println s!"time: {benchTime.toFloat.formatNanos}"
let title := green s!"{groupName}/{bench.name}"
let indentWidth := 24
let indent := String.mk <| List.replicate indentWidth ' '
IO.print title
-- Print time on separate line from title if title is >22 chars long
if title.length + 2 > indentWidth then
IO.print s!"\n{indent}"
else
IO.print <| indent.drop title.length
IO.println s!"time: {bold benchTime.toFloat.formatNanos}"
let thrpts : Option Throughput :=
bg.config.throughput.map (fun thrpt =>
match thrpt with
| .bytes b =>
bytesPerSecond b
| .elements _e => ⟨ 1.0, 2.0, 3.0, 4.0 ⟩)
if let some ts := thrpts then
let thrpt := formatThrpt ts
println! "{indent}thrpt: {bold thrpt}"
let (basePath, newPath) ← mkDirs groupName bench.name
let fileExt := toString config.serde
let newFile := newPath / s!"one-shot.{fileExt}"
Expand Down Expand Up @@ -413,7 +444,6 @@ def bgroup {α β : Type} (name: String) (benches : List (Benchmarkable α β))
let mut reports : Array BenchReport := #[]
for b in benches do
let report : BenchReport ← if config.oneShot then do
IO.println s!"{bg.name}/{b.name}"
oneShotBench name b config
else
let warmupMean ← bg.warmup b
Expand All @@ -423,8 +453,7 @@ def bgroup {α β : Type} (name: String) (benches : List (Benchmarkable α β))
let avgTimes : Distribution := { d := data.map (fun (x,y) => Float.ofNat y / Float.ofNat x) }
let gen ← IO.stdGenRef.get
let mut (distributions, estimates) := avgTimes.estimates bg.config gen
if bg.config.samplingMode == .linear
then
if bg.config.samplingMode == .linear then
let data' : Data := { d := data }
let (distribution, slope) := data'.regression bg.config gen
estimates := { estimates with slope := .some slope }
Expand All @@ -436,7 +465,7 @@ def bgroup {α β : Type} (name: String) (benches : List (Benchmarkable α β))
absoluteEstimates := estimates,
distributions,
comparison := comparisonData
throughput := none
throughput := config.throughput
}
bg.printResults b.name measurement
IO.println ""
Expand Down
25 changes: 25 additions & 0 deletions Ix/Benchmark/Common.lean
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ instance : ToString SerdeFormat where
| .json => "json"
| .ixon => "ixon"

inductive ThroughputUnit where
| bytes (bytes : Nat)
| elements (elems : Nat)
deriving Repr

structure Config where
/-- Warmup time in seconds -/
warmupTime : Float := 3.0
Expand All @@ -32,6 +37,8 @@ structure Config where
noiseThreshold : Float := 0.01
/-- Serde format for bench report written to disk, defaults to JSON for human readability -/
serde : SerdeFormat := .json
/-- Throughput -/
throughput : Option ThroughputUnit := .none
/-- Whether to skip sampling altogether and only collect a single data point. Takes precedence over all sampling settings. Used for expensive benchmarks -/
oneShot : Bool := false
/-- Whether to generate a Markdown report of all timings including comparison to disk if possible-/
Expand Down Expand Up @@ -106,3 +113,21 @@ def Float.formatNanos (f : Float) : String :=
(f / 10 ^ 3).floatPretty 2 ++ "µs"
else
f.floatPretty 2 ++ "ns"

def color (code : String) (s : String) : String :=
s!"\x1b[{code}m{s}\x1b[0m"

def red := color "31"

def green := color "32"

def yellow := color "33"

def bold := color "1"

def formatPercent (p : Float) : String :=
let percent := (p * 100).floatPretty 4 ++ "%"
if p > 0 then
s!"+{percent}"
else
percent
Loading
Loading