diff --git a/Benchmarks/Aiur.lean b/Benchmarks/Aiur.lean index f672ed6d..c03a9bcc 100644 --- a/Benchmarks/Aiur.lean +++ b/Benchmarks/Aiur.lean @@ -110,7 +110,7 @@ def proveBench : IO $ Array BenchReport := do let funIdx := toplevel.getFuncIdx `main |>.get! bgroup "nat_fib" [ bench "prove fib 10" (Aiur.AiurSystem.prove system friParameters funIdx #[10]) default, - ] + ] { samplingMode := .flat, sampleTime := 10, throughput := .some (Throughput.bytes 1) } def verifyBench : IO $ Array BenchReport := do match toplevel.checkAndSimplify with @@ -124,5 +124,6 @@ def verifyBench : IO $ Array BenchReport := do bench "verify fib 10" (Aiur.AiurSystem.verify system friParameters claim) proof ] +-- Test def main (_args : List String) : IO Unit := do let _result ← proveBench diff --git a/Benchmarks/Blake3.lean b/Benchmarks/Blake3.lean index c947dcd0..62f0f813 100644 --- a/Benchmarks/Blake3.lean +++ b/Benchmarks/Blake3.lean @@ -4,8 +4,10 @@ import Ix.Aiur.Compile import Ix.Aiur.Protocol import Ix.Benchmark.Bench -abbrev dataSizes := #[64, 128, 256, 512, 1024, 2048] -abbrev numHashesPerProof := #[1, 2, 4, 8, 16, 32] +-- abbrev dataSizes := #[64, 128, 256, 512, 1024, 2048] +-- abbrev numHashesPerProof := #[1, 2, 4, 8, 16, 32] +abbrev dataSizes := #[1, 2, 3, 4, 5] +abbrev numHashesPerProof := #[1] def commitmentParameters : Aiur.CommitmentParameters := { logBlowup := 1 @@ -40,7 +42,8 @@ def blake3Bench : IO $ Array BenchReport := do data := ioBuffer.data ++ data map := ioBuffer.map.insert #[.ofNat idx] ioKeyInfo } benches := benches.push <| bench s!"dataSize={dataSize} numHashes={numHashes}" (aiurSystem.prove friParameters funIdx #[Aiur.G.ofNat numHashes]) ioBuffer - bgroup "prove blake3" benches.toList { oneShot := true } + bgroup "prove blake3" benches.toList { oneShot := false, throughput := .some (Throughput.bytes 100) } + -- bgroup "prove blake3" benches.toList { oneShot := false } def parseFunction (words : List String) (param : String): Option String := words.find? (·.startsWith param) |> .map (·.stripPrefix param) diff --git a/Ix/Benchmark/Bench.lean b/Ix/Benchmark/Bench.lean index 178017ba..25d00e05 100644 --- a/Ix/Benchmark/Bench.lean +++ b/Ix/Benchmark/Bench.lean @@ -7,6 +7,7 @@ import Batteries import Ix.Benchmark.Serde import Ix.Benchmark.Tukey +import Ix.Benchmark.Throughput open Batteries (RBMap) @@ -31,7 +32,7 @@ structure BenchGroup where name : String config : Config -inductive BenchFn ( α β : Type) where +inductive BenchFn (α β : Type) where | pure (fn : α → β) | io (fn : α → IO β) @@ -78,8 +79,8 @@ def printRunning (config : Config) (expectedTime : Float) (numIters : Nat) (warn IO.eprintln s!"Warning: Unable to complete {config.numSamples} samples in {config.sampleTime.floatPretty 2}s. You may wish to increase target time to {expectedTime.floatPretty 2}s" IO.println s!"Running {config.numSamples} samples in {expectedTime.floatPretty 2}s ({numIters.natPretty} iterations)\n" --- Core sampling loop that runs the benchmark function and returns the array of measured timings -def runSample (sampleIters : Array Nat) (bench : Benchmarkable α β) : IO (Array Nat) := do +-- Core sampling loop that runs the benchmark function for a fixed (if flat) or linearly increasing (if linear) number of iterations per sample, and returns the final dataset as the array of measured times for each sample +def runSamples (sampleIters : Array Nat) (bench : Benchmarkable α β) : IO (Array Nat) := do let func := bench.getFn let mut timings : Array Nat := #[] for iters in sampleIters do @@ -92,9 +93,9 @@ def runSample (sampleIters : Array Nat) (bench : Benchmarkable α β) : IO (Arra -- TODO: Recommend sample count if expectedTime >>> bg.config.sampleTime (i.e. itersPerSample == 1) /-- -Runs the sample as a sequence of constant iterations per data point, where the iteration count attempts to fit into the configurable `sampleTime` but cannot be less than 1. +Runs the benchmark samples as a sequence of constant iterations per data point, where the iteration count attempts to fit into the configurable `sampleTime` but cannot be less than 1. -Returns the iteration counts and elapsed time per data point. +Returns the iteration counts and elapsed time per sample. -/ def BenchGroup.sampleFlat (bg : BenchGroup) (bench : Benchmarkable α β) (warmupMean : Float) : IO (Array Nat × Array Nat) := do @@ -106,11 +107,11 @@ def BenchGroup.sampleFlat (bg : BenchGroup) (bench : Benchmarkable α β) printRunning bg.config expectedTime totalIters itersPerSample --IO.println s!"Flat sample. Iters per sample: {itersPerSample}" let sampleIters := Array.replicate bg.config.numSamples itersPerSample - let timings ← runSample sampleIters bench + let timings ← runSamples sampleIters bench return (sampleIters, timings) /-- -Runs the sample as a sequence of linearly increasing iterations [d, 2d, 3d, ..., Nd] where `N` is the total number of samples and `d` is a factor derived from the warmup iteration time. +Runs the benchmarks samples as a sequence of linearly increasing iterations [d, 2d, 3d, ..., Nd] where `N` is the total number of samples and `d` is a factor derived from the warmup iteration time. The sum of this series should be roughly equivalent to the total `sampleTime`. @@ -126,7 +127,7 @@ def BenchGroup.sampleLinear (bg : BenchGroup) (bench : Benchmarkable α β) (war let sampleIters := (Array.range bg.config.numSamples).map (fun x => (x + 1) * d) printRunning bg.config expectedTime sampleIters.sum d --IO.println s!"Linear sample. Iters increase by a factor of {d} per sample" - let timings ← runSample sampleIters bench + let timings ← runSamples sampleIters bench return (sampleIters, timings) def BenchGroup.sample (bg : BenchGroup) (bench : Benchmarkable α β) (warmupMean : Float) : IO (Array Nat × Array Nat) := do @@ -134,14 +135,6 @@ def BenchGroup.sample (bg : BenchGroup) (bench : Benchmarkable α β) (warmupMea | .flat => bg.sampleFlat bench warmupMean | .linear => bg.sampleLinear bench warmupMean --- TODO -inductive Throughput where -| Bytes (n : UInt64) -| BytesDecimal (n : UInt64) -| Elements (n : UInt64) -| Bits (n : UInt64) - deriving Repr - structure MeasurementData where data : Data avgTimes : Distribution @@ -197,45 +190,68 @@ def compareToThreshold (estimate : Estimate) (noiseThreshold : Float) : Comparis let (lb, ub) := (ci.lowerBound, ci.upperBound) let noiseNeg := noiseThreshold.neg - if lb < noiseNeg && ub < noiseNeg - then + if lb < noiseNeg && ub < noiseNeg then ComparisonResult.Improved - else if lb > noiseThreshold && ub > noiseThreshold - then + else if lb > noiseThreshold && ub > noiseThreshold then ComparisonResult.Regressed else ComparisonResult.NonSignificant - --- TODO: Print ~24 whitespace characters before time, change, regression +-- TODO: Does color of change percents depend on positive T-test? def BenchGroup.printResults (bg : BenchGroup) (benchName : String) (m : MeasurementData) : IO Unit := do let estimates := m.absoluteEstimates let typicalEstimate := estimates.slope.getD estimates.mean - IO.println s!"{bg.name}/{benchName}" - let lb := typicalEstimate.confidenceInterval.lowerBound.formatNanos - let pointEst := typicalEstimate.pointEstimate.formatNanos - let ub := typicalEstimate.confidenceInterval.upperBound.formatNanos - IO.println s!"time: [{lb} {pointEst} {ub}]" - if let some comp := m.comparison - then + let title := green s!"{bg.name}/{benchName}" + let indentWidth := 24 + let indent := String.mk <| List.replicate indentWidth ' ' + IO.print title + -- Print time on separate line from title if title is >22 chars long + if title.length + 2 > indentWidth then + IO.print s!"\n{indent}" + else + IO.print <| indent.drop title.length + let lb := typicalEstimate.confidenceInterval.lowerBound + let pointEst := typicalEstimate.pointEstimate + let ub := typicalEstimate.confidenceInterval.upperBound + println! "time: [{lb.formatNanos} {bold pointEst.formatNanos} {ub.formatNanos}]" + let thrpts : Option Throughput := + bg.config.throughput.map (fun thrpt => + match thrpt with + | .bytes b => + thrptsFromBps b.toFloat lb pointEst ub + | .elements _e => ⟨ 1.0, 2.0, 3.0, 4.0 ⟩) + if let some ts := thrpts then + let (lbStr, pointEstStr, ubStr) := + (formatThrpt ts.lowerBound, formatThrpt ts.pointEstimate, formatThrpt ts.upperBound) + println! "{indent}thrpt: [{lbStr} {bold pointEstStr} {ubStr}]" + + if let some comp := m.comparison then let diffMean := comp.pValue > comp.significanceThreshold let meanEst := comp.relativeEstimates.mean - let pointEst := (meanEst.pointEstimate * 100).floatPretty 4 - let lb := (meanEst.confidenceInterval.lowerBound * 100).floatPretty 4 - let ub := (meanEst.confidenceInterval.upperBound * 100).floatPretty 4 - let symbol := if diffMean then ">" else "<" - IO.println s!"change: [{lb}% {pointEst}% {ub}%] (p = {comp.pValue.floatPretty 2} {symbol} {comp.significanceThreshold.floatPretty 2})" - let explanation := if diffMean - then - "No change in performance detected" + let (explanation, color) := if diffMean then + ("No change in performance detected", false) else match compareToThreshold meanEst comp.noiseThreshold with - | ComparisonResult.Improved => "Performance has improved" - | ComparisonResult.Regressed => "Performance has regressed" - | ComparisonResult.NonSignificant => "Change within noise threshold" - IO.println explanation - IO.println "" - m.avgTimes.tukey + | ComparisonResult.Improved => (s!"Performance has {green "improved"}", true) + | ComparisonResult.Regressed => (s!"Performance has {red "regressed"}", true) + | ComparisonResult.NonSignificant => ("Change within noise threshold", false) + let (lb, pointEst, ub) := meanEst.formatPercents color + let symbol := if diffMean then ">" else "<" + let changeStr := "change:" + if let some thrptsNew := thrpts then + let changeIndent := indent.drop changeStr.length + IO.println <| changeIndent ++ changeStr + println! "{indent}time: [{lb} {pointEst} {ub}] (p = {comp.pValue.floatPretty 2} {symbol} {comp.significanceThreshold.floatPretty 2})" + let base := comp.baseEstimates + let typical := base.slope.getD base.mean + let thrptsBase := thrptsFromBps thrptsNew.size typical.confidenceInterval.lowerBound typical.pointEstimate typical.confidenceInterval.upperBound + + let (lbStr, pointEstStr, ubStr) := thrptsNew.formatPercents thrptsBase color + println! "{indent}thrpt: [{lbStr}, {pointEstStr}, {ubStr}]" + else + IO.println s!"{indent}{changeStr} [{lb} {pointEst} {ub}] (p = {comp.pValue.floatPretty 2} {symbol} {comp.significanceThreshold.floatPretty 2})" + IO.println <| indent ++ explanation + m.avgTimes.runTukey -- TODO: Write sampling mode and config in `sample.json` for comparison def saveComparison (groupName : String) (benchName : String) (comparison : ComparisonData) (config : Config) : IO Unit := do @@ -261,7 +277,6 @@ def saveMeasurement (groupName : String) (benchName : String) (mData : Measureme let fileExt := toString config.serde if let some compData := mData.comparison then saveComparison groupName benchName compData config else - println! "No compdata" let (newEstimatesFile, newSampleFile) := (newPath / s!"estimates.{fileExt}", newPath / s!"sample.{fileExt}") moveBaseFile newSampleFile baseDir moveBaseFile newEstimatesFile baseDir @@ -305,17 +320,17 @@ def getColumnWidths' (maxWidths : ColumnWidths) (row: BenchReport) : ColumnWidth let function := if fnLen > maxWidths.function then fnLen else maxWidths.function let newBenchLen := row.newBench.getTime.formatNanos.length let newBench := if newBenchLen > maxWidths.newBench then newBenchLen else maxWidths.newBench - let baseBench := if let some baseBench := row.baseBench then - let baseBenchLen := baseBench.getTime.formatNanos.length - if baseBenchLen > maxWidths.baseBench then baseBenchLen - else maxWidths.baseBench - else maxWidths.baseBench - let percentChange := if let some percentChange := row.percentChange then - let percentChangeStr := percentChangeToString percentChange - let percentLen := percentChangeStr.length - if percentLen > maxWidths.percentChange then percentLen - else maxWidths.percentChange - else maxWidths.percentChange + let baseBench := + row.baseBench.elim maxWidths.baseBench (fun baseBench => + let baseBenchLen := baseBench.getTime.formatNanos.length + if baseBenchLen > maxWidths.baseBench then baseBenchLen + else maxWidths.baseBench) + let percentChange := + row.percentChange.elim maxWidths.percentChange (fun percentChange => + let percentChangeStr := percentChangeToString percentChange + let percentLen := percentChangeStr.length + if percentLen > maxWidths.percentChange then percentLen + else maxWidths.percentChange) { function, newBench, baseBench, percentChange } -- Gets the max lengths of each data type for pretty-printing columns @@ -342,13 +357,11 @@ def mkReportPretty' (columnWidths : ColumnWidths) (reportPretty : String) (row : let functionStr := padWhitespace row.function columnWidths.function let newBenchStr := row.newBench.getTime.formatNanos let newBenchStr := padWhitespace newBenchStr columnWidths.newBench - let baseBenchStr := if let some baseBench := row.baseBench then - baseBench.getTime.formatNanos - else "None" + let baseBenchStr := + row.baseBench.elim "None" (·.getTime.formatNanos) let baseBenchStr := padWhitespace baseBenchStr columnWidths.baseBench - let percentChangeStr := if let some percentChange := row.percentChange then - percentChangeToString percentChange - else "N/A" + let percentChangeStr := + row.percentChange.elim "N/A" (percentChangeToString ·) let percentChangeStr := padWhitespace percentChangeStr columnWidths.percentChange let rowPretty := s!"| {functionStr} | {newBenchStr} | {baseBenchStr} | {percentChangeStr} |\n" @@ -384,7 +397,25 @@ def oneShotBench {α β : Type} (groupName : String) (bench: Benchmarkable α β let _res ← bench.getFn bench.arg let finish ← IO.monoNanosNow let benchTime := finish - start - IO.println s!"time: {benchTime.toFloat.formatNanos}" + let title := green s!"{groupName}/{bench.name}" + let indentWidth := 24 + let indent := String.mk <| List.replicate indentWidth ' ' + IO.print title + -- Print time on separate line from title if title is >22 chars long + if title.length + 2 > indentWidth then + IO.print s!"\n{indent}" + else + IO.print <| indent.drop title.length + IO.println s!"time: {bold benchTime.toFloat.formatNanos}" + let thrpts : Option Throughput := + bg.config.throughput.map (fun thrpt => + match thrpt with + | .bytes b => + bytesPerSecond b + | .elements _e => ⟨ 1.0, 2.0, 3.0, 4.0 ⟩) + if let some ts := thrpts then + let thrpt := formatThrpt ts + println! "{indent}thrpt: {bold thrpt}" let (basePath, newPath) ← mkDirs groupName bench.name let fileExt := toString config.serde let newFile := newPath / s!"one-shot.{fileExt}" @@ -413,7 +444,6 @@ def bgroup {α β : Type} (name: String) (benches : List (Benchmarkable α β)) let mut reports : Array BenchReport := #[] for b in benches do let report : BenchReport ← if config.oneShot then do - IO.println s!"{bg.name}/{b.name}" oneShotBench name b config else let warmupMean ← bg.warmup b @@ -423,8 +453,7 @@ def bgroup {α β : Type} (name: String) (benches : List (Benchmarkable α β)) let avgTimes : Distribution := { d := data.map (fun (x,y) => Float.ofNat y / Float.ofNat x) } let gen ← IO.stdGenRef.get let mut (distributions, estimates) := avgTimes.estimates bg.config gen - if bg.config.samplingMode == .linear - then + if bg.config.samplingMode == .linear then let data' : Data := { d := data } let (distribution, slope) := data'.regression bg.config gen estimates := { estimates with slope := .some slope } @@ -436,7 +465,7 @@ def bgroup {α β : Type} (name: String) (benches : List (Benchmarkable α β)) absoluteEstimates := estimates, distributions, comparison := comparisonData - throughput := none + throughput := config.throughput } bg.printResults b.name measurement IO.println "" diff --git a/Ix/Benchmark/Common.lean b/Ix/Benchmark/Common.lean index 809b7fda..fa657f58 100644 --- a/Ix/Benchmark/Common.lean +++ b/Ix/Benchmark/Common.lean @@ -13,6 +13,11 @@ instance : ToString SerdeFormat where | .json => "json" | .ixon => "ixon" +inductive ThroughputUnit where +| bytes (bytes : Nat) +| elements (elems : Nat) + deriving Repr + structure Config where /-- Warmup time in seconds -/ warmupTime : Float := 3.0 @@ -32,6 +37,8 @@ structure Config where noiseThreshold : Float := 0.01 /-- Serde format for bench report written to disk, defaults to JSON for human readability -/ serde : SerdeFormat := .json + /-- Throughput -/ + throughput : Option ThroughputUnit := .none /-- Whether to skip sampling altogether and only collect a single data point. Takes precedence over all sampling settings. Used for expensive benchmarks -/ oneShot : Bool := false /-- Whether to generate a Markdown report of all timings including comparison to disk if possible-/ @@ -106,3 +113,21 @@ def Float.formatNanos (f : Float) : String := (f / 10 ^ 3).floatPretty 2 ++ "µs" else f.floatPretty 2 ++ "ns" + +def color (code : String) (s : String) : String := + s!"\x1b[{code}m{s}\x1b[0m" + +def red := color "31" + +def green := color "32" + +def yellow := color "33" + +def bold := color "1" + +def formatPercent (p : Float) : String := + let percent := (p * 100).floatPretty 4 ++ "%" + if p > 0 then + s!"+{percent}" + else + percent diff --git a/Ix/Benchmark/Estimate.lean b/Ix/Benchmark/Estimate.lean index 1bb5e0e5..412a81b3 100644 --- a/Ix/Benchmark/Estimate.lean +++ b/Ix/Benchmark/Estimate.lean @@ -1,4 +1,5 @@ import Lean.Data.Json +import Ix.Benchmark.Common structure ConfidenceInterval where confidenceLevel : Float @@ -12,6 +13,19 @@ structure Estimate where stdErr : Float deriving Repr, Lean.ToJson, Lean.FromJson +def Estimate.formatPercents (est : Estimate) (color : Bool) : (String × String × String) := + let lb := formatPercent est.confidenceInterval.lowerBound + let pointEst := bold <| formatPercent est.pointEstimate + let ub := formatPercent est.confidenceInterval.upperBound + let pointEst := if color then + if est.pointEstimate < 0 then + green pointEst + else + red pointEst + else + pointEst + (lb, pointEst, ub) + structure Estimates where mean : Estimate median : Estimate @@ -20,6 +34,9 @@ structure Estimates where stdDev : Estimate deriving Repr, Lean.ToJson, Lean.FromJson +def Estimates.typical (est : Estimates) : Estimate := + est.slope.getD est.mean + structure PointEstimates where mean : Float median : Float diff --git a/Ix/Benchmark/OneShot.lean b/Ix/Benchmark/OneShot.lean index ceb189c0..bc13bfe7 100644 --- a/Ix/Benchmark/OneShot.lean +++ b/Ix/Benchmark/OneShot.lean @@ -2,5 +2,11 @@ import Lean.Data.Json.FromToJson structure OneShot where benchTime : Nat + throughput : Option Float deriving Lean.ToJson, Lean.FromJson, Repr +structure OneShots where + base : Option OneShot + new : OneShot + deriving Repr + diff --git a/Ix/Benchmark/Serde.lean b/Ix/Benchmark/Serde.lean index da9e9462..54d885d3 100644 --- a/Ix/Benchmark/Serde.lean +++ b/Ix/Benchmark/Serde.lean @@ -95,7 +95,10 @@ instance : Ixon.Serialize ChangeEstimates where get := getChangeEstimates def getOneShot: Ixon.GetM OneShot := do - return { benchTime := (← Ixon.Serialize.get) } + return { + benchTime := (← Ixon.Serialize.get) + throughput := (← Ixon.Serialize.get) + } instance : Ixon.Serialize OneShot where put os := Ixon.Serialize.put os.benchTime diff --git a/Ix/Benchmark/Tukey.lean b/Ix/Benchmark/Tukey.lean index b841ac7e..b62d9d08 100644 --- a/Ix/Benchmark/Tukey.lean +++ b/Ix/Benchmark/Tukey.lean @@ -13,7 +13,7 @@ def Outliers.getTotal (o : Outliers) : Nat := o.highSevere + o.highMild + o.lowMild + o.lowSevere -- TODO: Refactor to cut down verbosity, and return the list for plotting -def Distribution.tukey (data : Distribution) : IO Unit := do +def Distribution.runTukey (data : Distribution) : IO Unit := do let upper := (data.percentile? 75).get! let lower := (data.percentile? 25).get! let iqr := upper - lower @@ -34,7 +34,7 @@ def Distribution.tukey (data : Distribution) : IO Unit := do let outLength := out.outliers.length if outLength > 0 then let samples := data.d.size - IO.println s!"Found {outLength} outliers among {samples} measurements" + IO.println <| yellow s!"Found {outLength} outliers among {samples} measurements" if out.lowMild > 0 then let pct := Float.ofNat out.lowMild / (Float.ofNat samples) * 100 IO.println s!" {out.lowMild} ({pct.floatPretty 2}%) low mild"