@@ -24,25 +24,29 @@ def get_dataset_metadata(name: str, version: str, mini: bool, noextreme: bool =
2424 return url , cache_path
2525
2626
27- def make_cache (gzip_url , cache_path ):
27+ def make_cache (gzip_url , hf_data , cache_path , gh = False ):
2828 # Check if open eval file exists in CACHE_DIR
2929 if not os .path .exists (cache_path ):
30- # Install BigCodeBench dataset and parse as jsonl
31- print (f"Downloading dataset from { gzip_url } " )
32- with tempdir .TempDir () as tmpdir :
33- gz_path = os .path .join (tmpdir , f"data.jsonl.gz" )
34- wget .download (gzip_url , gz_path )
35-
36- with gzip .open (gz_path , "rb" ) as f :
37- data = f .read ().decode ("utf-8" )
38-
39- # create CACHE_DIR if not exists
40- if not os .path .exists (CACHE_DIR ):
41- os .makedirs (CACHE_DIR )
42-
43- # Write the original open eval file to CACHE_DIR
44- with open (cache_path , "w" ) as f :
45- f .write (data )
30+
31+ if gh :
32+ # Install BigCodeBench dataset and parse as jsonl
33+ print (f"Downloading dataset from { gzip_url } " )
34+ with tempdir .TempDir () as tmpdir :
35+ gz_path = os .path .join (tmpdir , f"data.jsonl.gz" )
36+ wget .download (gzip_url , gz_path )
37+
38+ with gzip .open (gz_path , "rb" ) as f :
39+ data = f .read ().decode ("utf-8" )
40+
41+ # create CACHE_DIR if not exists
42+ if not os .path .exists (CACHE_DIR ):
43+ os .makedirs (CACHE_DIR )
44+
45+ # Write the original open eval file to CACHE_DIR
46+ with open (cache_path , "w" ) as f :
47+ f .write (data )
48+ else :
49+ hf_data .to_json (cache_path )
4650
4751
4852def write_jsonl (
0 commit comments