[EASY] Allow ability to specify custom datasets for openorca format (#160)

vipannalla · web-flow · commit 92fa0482951a · 2025-01-06T11:58:33.000-08:00
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
@@ -245,12 +245,16 @@ def load_sharegpt_dataset(
   return dataset
 
 
-def load_openorca_dataset_pkl():
+def load_openorca_dataset_pkl(
+    dataset_path: str,
+) -> list[tuple[Any, Any]]:
+  if not dataset_path:
+    dataset_path = "open_orca_gpt4_tokenized_llama.calibration_1000.pkl"
   # read pickle file
   samples = pandas.read_pickle(
       os.path.join(
           os.path.dirname(os.path.relpath(__file__)),
-          "open_orca_gpt4_tokenized_llama.calibration_1000.pkl",
+          dataset_path,
       )
   )
 
@@ -663,7 +667,7 @@ def main(args: argparse.Namespace):
   else:
     dataset = []
     if args.dataset == "openorca":
-      dataset = load_openorca_dataset_pkl()
+      dataset = load_openorca_dataset_pkl(args.dataset_path)
     elif args.dataset == "sharegpt":
       dataset = load_sharegpt_dataset(
           args.dataset_path,