Skip to content

Commit 92fa048

Browse files
authored
[EASY] Allow ability to specify custom datasets for openorca format (#160)
1 parent 8bc6d88 commit 92fa048

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

benchmarks/benchmark_serving.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -245,12 +245,16 @@ def load_sharegpt_dataset(
245245
return dataset
246246

247247

248-
def load_openorca_dataset_pkl():
248+
def load_openorca_dataset_pkl(
249+
dataset_path: str,
250+
) -> list[tuple[Any, Any]]:
251+
if not dataset_path:
252+
dataset_path = "open_orca_gpt4_tokenized_llama.calibration_1000.pkl"
249253
# read pickle file
250254
samples = pandas.read_pickle(
251255
os.path.join(
252256
os.path.dirname(os.path.relpath(__file__)),
253-
"open_orca_gpt4_tokenized_llama.calibration_1000.pkl",
257+
dataset_path,
254258
)
255259
)
256260

@@ -663,7 +667,7 @@ def main(args: argparse.Namespace):
663667
else:
664668
dataset = []
665669
if args.dataset == "openorca":
666-
dataset = load_openorca_dataset_pkl()
670+
dataset = load_openorca_dataset_pkl(args.dataset_path)
667671
elif args.dataset == "sharegpt":
668672
dataset = load_sharegpt_dataset(
669673
args.dataset_path,

0 commit comments

Comments
 (0)