@@ -127,17 +127,29 @@ def merge_values(values, merge_function):
127127 return values .groupby (level = 1 ).apply (merge_function )
128128
129129
130- def get_values (values ):
131- # Create data view without diff column.
132- if "diff" in values . columns :
133- values = values [[c for c in values .columns if c != "diff" ]]
130+ def get_values (values , lhs_name , rhs_name ):
131+ exclude_cols = [ "diff" , "t-value" , "p-value" , "significant" ]
132+ exclude_cols . extend ([ f'std_ { lhs_name } ' , f'std_ { rhs_name } ' ])
133+ values = values [[c for c in values .columns if c not in exclude_cols ]]
134134 has_two_runs = len (values .columns ) == 2
135135 if has_two_runs :
136136 return (values .iloc [:, 0 ], values .iloc [:, 1 ])
137137 else :
138138 return (values .min (axis = 1 ), values .max (axis = 1 ))
139139
140140
141+ def get_default_metric (data , second_data = None ):
142+ """Find a default metric to use if none specified.
143+ data: Primary dataframe to check
144+ second_data: Optional secondary dataframe (for 'vs' mode with lhs/rhs)
145+ """
146+ defaults = ["Exec_Time" , "exec_time" , "Value" , "Runtime" ]
147+ for defkey in defaults :
148+ if defkey in data .columns or (second_data is not None and defkey in second_data .columns ):
149+ return [defkey ]
150+ return []
151+
152+
141153def add_diff_column (metric , values , absolute_diff = False ):
142154 values0 , values1 = get_values (values [metric ])
143155 values0 .fillna (0.0 , inplace = True )
@@ -150,14 +162,68 @@ def add_diff_column(metric, values, absolute_diff=False):
150162 return values
151163
152164
153- def add_geomean_row (metrics , data , dataout ):
165+ def compute_statistics (lhs_d , rhs_d , metrics , alpha , lhs_name , rhs_name ):
166+ stats_dict = {}
167+
168+ for metric in metrics :
169+ if metric not in lhs_d .columns or metric not in rhs_d .columns :
170+ continue
171+
172+ stats_dict [metric ] = {}
173+
174+ # Group by program (more efficient than unique+loc)
175+ for program , lhs_group in lhs_d .groupby (level = 1 ):
176+ lhs_values = lhs_group [metric ].dropna ()
177+ rhs_values = rhs_d .loc [(slice (None ), program ), metric ].dropna ()
178+
179+ # Compute t-test if we have enough samples
180+ if len (lhs_values ) >= 2 and len (rhs_values ) >= 2 :
181+ stats_dict [metric ][program ] = {
182+ f'std_{ lhs_name } ' : lhs_values .std (ddof = 1 ),
183+ f'std_{ rhs_name } ' : rhs_values .std (ddof = 1 ),
184+ }
185+ t_stat , p_val = stats .ttest_ind (lhs_values , rhs_values )
186+ stats_dict [metric ][program ]['t-value' ] = t_stat
187+ stats_dict [metric ][program ]['p-value' ] = p_val
188+ stats_dict [metric ][program ]['significant' ] = "Y" if p_val < alpha else "N"
189+ else :
190+ stats_dict [metric ][program ] = {
191+ f'std_{ lhs_name } ' : float ('nan' ),
192+ f'std_{ rhs_name } ' : float ('nan' ),
193+ 't-value' : float ('nan' ),
194+ 'p-value' : float ('nan' ),
195+ 'significant' : ""
196+ }
197+
198+ return stats_dict
199+
200+
201+ def add_precomputed_statistics (data , stats_dict , stat_col_names ):
202+ """Add precomputed statistics to the unstacked dataframe."""
203+ for metric in data .columns .levels [0 ]:
204+ if metric not in stats_dict :
205+ continue
206+
207+ for stat_name in stat_col_names :
208+ values = []
209+ for program in data .index :
210+ if program in stats_dict [metric ]:
211+ values .append (stats_dict [metric ][program ][stat_name ])
212+ else :
213+ values .append (float ('nan' ) if stat_name != 'significant' else "" )
214+ data [(metric , stat_name )] = values
215+
216+ return data
217+
218+
219+ def add_geomean_row (metrics , data , dataout , lhs_name , rhs_name ):
154220 """
155221 Normalize values1 over values0, compute geomean difference and add a
156222 summary row to dataout.
157223 """
158224 gm = pd .DataFrame (index = [GEOMEAN_ROW ], columns = dataout .columns , dtype = "float64" )
159225 for metric in metrics :
160- values0 , values1 = get_values (data [metric ])
226+ values0 , values1 = get_values (data [metric ], lhs_name , rhs_name )
161227 # Avoid infinite values in the diff and instead use NaN, as otherwise
162228 # the computation of the geometric mean will fail.
163229 values0 = values0 .replace ({0 : float ("NaN" )})
@@ -249,6 +315,8 @@ def print_result(
249315 sortkey = "diff" ,
250316 sort_by_abs = True ,
251317 absolute_diff = False ,
318+ lhs_name = "lhs" ,
319+ rhs_name = "rhs"
252320):
253321 metrics = d .columns .levels [0 ]
254322 if sort_by_abs :
@@ -272,6 +340,16 @@ def print_result(
272340 if not absolute_diff :
273341 for m in metrics :
274342 formatters [(m , "diff" )] = format_relative_diff
343+ # Add formatters for statistical columns
344+ for m in metrics :
345+ if (m , "p-value" ) in dataout .columns :
346+ formatters [(m , "p-value" )] = lambda x : "%.4f" % x if not pd .isna (x ) else ""
347+ if (m , "t-value" ) in dataout .columns :
348+ formatters [(m , "t-value" )] = lambda x : "%.3f" % x if not pd .isna (x ) else ""
349+ if (m , f'std_{ lhs_name } ' ) in dataout .columns :
350+ formatters [(m , f'std_{ lhs_name } ' )] = lambda x : "%.3f" % x if not pd .isna (x ) else ""
351+ if (m , f'std_{ rhs_name } ' ) in dataout .columns :
352+ formatters [(m , f'std_{ rhs_name } ' )] = lambda x : "%.3f" % x if not pd .isna (x ) else ""
275353 # Turn index into a column so we can format it...
276354 formatted_program = dataout .index .to_series ()
277355 if shorten_names :
@@ -302,7 +380,7 @@ def strip_name_fully(name):
302380 # as it will otherwise interfere with common prefix/suffix computation.
303381 if show_diff_column and not absolute_diff :
304382 # geometric mean only makes sense for relative differences.
305- dataout = add_geomean_row (metrics , d , dataout )
383+ dataout = add_geomean_row (metrics , d , dataout , lhs_name , rhs_name )
306384
307385 def float_format (x ):
308386 if x == "" :
@@ -320,7 +398,10 @@ def float_format(x):
320398 formatters = formatters ,
321399 )
322400 print (out )
323- print (d .describe ())
401+ exclude_from_summary = ["t-value" , "p-value" , "significant" ]
402+ exclude_from_summary .extend ([f'std_{ lhs_name } ' , f'std_{ rhs_name } ' ])
403+ d_summary = d .drop (columns = exclude_from_summary , level = 1 , errors = 'ignore' )
404+ print (d_summary .describe ())
324405
325406
326407def main ():
@@ -400,6 +481,19 @@ def main():
400481 default = False ,
401482 help = "Don't use abs() when sorting results" ,
402483 )
484+ parser .add_argument (
485+ "--statistics" ,
486+ action = "store_true" ,
487+ dest = "statistics" ,
488+ default = False ,
489+ help = "Add statistical analysis columns (std, t-value, p-value, significance)" ,
490+ )
491+ parser .add_argument (
492+ "--alpha" ,
493+ type = float ,
494+ default = 0.05 ,
495+ help = "Significance level for statistical tests (default: 0.05)" ,
496+ )
403497 config = parser .parse_args ()
404498
405499 if config .show_diff is None :
@@ -425,15 +519,30 @@ def main():
425519
426520 # Read inputs
427521 files = config .files
522+ stats_dict = None
523+ stat_col_names = None
428524 if "vs" in files :
429525 split = files .index ("vs" )
430526 lhs = files [0 :split ]
431527 rhs = files [split + 1 :]
432528
433529 # Combine the multiple left and right hand sides.
434530 lhs_d = readmulti (lhs )
435- lhs_merged = merge_values (lhs_d , config .merge_function )
436531 rhs_d = readmulti (rhs )
532+
533+ # Compute statistics on raw data before merging (if requested)
534+ if config .statistics :
535+ metrics_for_stats = config .metrics if len (config .metrics ) > 0 else get_default_metric (lhs_d , rhs_d )
536+ stats_dict = compute_statistics (
537+ lhs_d , rhs_d , metrics_for_stats ,
538+ alpha = config .alpha ,
539+ lhs_name = config .lhs_name ,
540+ rhs_name = config .rhs_name
541+ )
542+ stat_col_names = [f'std_{ config .lhs_name } ' , f'std_{ config .rhs_name } ' , 't-value' , 'p-value' , 'significant' ]
543+
544+ # Merge data
545+ lhs_merged = merge_values (lhs_d , config .merge_function )
437546 rhs_merged = merge_values (rhs_d , config .merge_function )
438547
439548 # Combine to new dataframe
@@ -448,11 +557,7 @@ def main():
448557 # Decide which metric to display / what is our "main" metric
449558 metrics = config .metrics
450559 if len (metrics ) == 0 :
451- defaults = ["Exec_Time" , "exec_time" , "Value" , "Runtime" ]
452- for defkey in defaults :
453- if defkey in data .columns :
454- metrics = [defkey ]
455- break
560+ metrics = get_default_metric (data )
456561 if len (metrics ) == 0 :
457562 sys .stderr .write ("No default metric found and none specified\n " )
458563 sys .stderr .write ("Available metrics:\n " )
@@ -508,6 +613,9 @@ def main():
508613 for metric in data .columns .levels [0 ]:
509614 data = add_diff_column (metric , data , absolute_diff = config .absolute_diff )
510615
616+ if config .statistics and stats_dict is not None :
617+ data = add_precomputed_statistics (data , stats_dict , stat_col_names )
618+
511619 sortkey = "diff"
512620 # TODO: should we still be sorting by diff even if the diff is hidden?
513621 if len (config .files ) == 1 :
@@ -526,6 +634,8 @@ def main():
526634 sortkey ,
527635 config .no_abs_sort ,
528636 config .absolute_diff ,
637+ config .lhs_name ,
638+ config .rhs_name ,
529639 )
530640
531641
0 commit comments