fix: increase the gt threshold

terryyz · terryyz · commit a5bff4a7b6b1 · 2024-06-28T03:32:38.000+08:00
diff --git a/bigcodebench/evaluate.py b/bigcodebench/evaluate.py
@@ -142,7 +142,7 @@ def evaluate(flags):
         
         if flags.check_gt_only:
         
-            if gt_pass_rate > 0.95:
+            if gt_pass_rate > 0.99:
                 cprint(f"Groundtruth pass rate: {gt_pass_rate:.3f}", "green")
             else:
                 cprint(f"Groundtruth pass rate: {gt_pass_rate:.3f}\nPlease be cautious!", "red")
@@ -250,7 +250,7 @@ def stucking_checker():
     if flags.no_gt:
         cprint(f"Groundtruth is not checked", "yellow")
     else:
-        if gt_pass_rate > 0.95:
+        if gt_pass_rate > 0.99:
             cprint(f"Groundtruth pass rate: {gt_pass_rate:.3f}", "green")
         else:
             cprint(f"Groundtruth pass rate: {gt_pass_rate:.3f}\nPlease be cautious!", "red")