1919https://arxiv.org/abs/2005.14165
2020"""
2121
22- from lighteval .metrics .metrics import Metrics
22+ from inspect_ai .dataset import Sample
23+ from inspect_ai .solver import generate
24+
25+ from lighteval .metrics .metrics import Metrics , math_scorer
2326from lighteval .tasks .lighteval_task import LightevalTaskConfig
2427from lighteval .tasks .requests import Doc
2528
2629
30+ # TODO: convert dataset to parquet
31+
32+
2733def arithmetic_prompt (line , task_name : str = None ):
2834 return Doc (task_name = task_name , query = line ["context" ], choices = [line ["completion" ]], gold_index = [0 ])
2935
3036
37+ def record_to_sample (record ):
38+ return Sample (input = record ["context" ], target = record ["completion" ])
39+
40+
3141arithmetic_1dc = LightevalTaskConfig (
3242 name = "arithmetic:1dc" ,
3343 prompt_function = arithmetic_prompt ,
@@ -41,6 +51,9 @@ def arithmetic_prompt(line, task_name: str = None):
4151 metrics = [Metrics .exact_match ],
4252 stop_sequence = ["\n " ],
4353 version = 0 ,
54+ sample_fields = record_to_sample ,
55+ solver = [generate (cache = True )],
56+ scorer = math_scorer (),
4457)
4558
4659arithmetic_2da = LightevalTaskConfig (
@@ -56,6 +69,9 @@ def arithmetic_prompt(line, task_name: str = None):
5669 metrics = [Metrics .exact_match ],
5770 stop_sequence = ["\n " ],
5871 version = 0 ,
72+ sample_fields = record_to_sample ,
73+ solver = [generate (cache = True )],
74+ scorer = math_scorer (),
5975)
6076
6177arithmetic_2dm = LightevalTaskConfig (
@@ -71,6 +87,9 @@ def arithmetic_prompt(line, task_name: str = None):
7187 metrics = [Metrics .exact_match ],
7288 stop_sequence = ["\n " ],
7389 version = 0 ,
90+ sample_fields = record_to_sample ,
91+ solver = [generate (cache = True )],
92+ scorer = math_scorer (),
7493)
7594
7695arithmetic_2ds = LightevalTaskConfig (
@@ -86,6 +105,9 @@ def arithmetic_prompt(line, task_name: str = None):
86105 metrics = [Metrics .exact_match ],
87106 stop_sequence = ["\n " ],
88107 version = 0 ,
108+ sample_fields = record_to_sample ,
109+ solver = [generate (cache = True )],
110+ scorer = math_scorer (),
89111)
90112
91113arithmetic_3da = LightevalTaskConfig (
@@ -101,6 +123,9 @@ def arithmetic_prompt(line, task_name: str = None):
101123 metrics = [Metrics .exact_match ],
102124 stop_sequence = ["\n " ],
103125 version = 0 ,
126+ sample_fields = record_to_sample ,
127+ solver = [generate (cache = True )],
128+ scorer = math_scorer (),
104129)
105130
106131arithmetic_3ds = LightevalTaskConfig (
@@ -116,6 +141,9 @@ def arithmetic_prompt(line, task_name: str = None):
116141 metrics = [Metrics .exact_match ],
117142 stop_sequence = ["\n " ],
118143 version = 0 ,
144+ sample_fields = record_to_sample ,
145+ solver = [generate (cache = True )],
146+ scorer = math_scorer (),
119147)
120148
121149arithmetic_4da = LightevalTaskConfig (
@@ -131,6 +159,9 @@ def arithmetic_prompt(line, task_name: str = None):
131159 metrics = [Metrics .exact_match ],
132160 stop_sequence = ["\n " ],
133161 version = 0 ,
162+ sample_fields = record_to_sample ,
163+ solver = [generate (cache = True )],
164+ scorer = math_scorer (),
134165)
135166
136167arithmetic_4ds = LightevalTaskConfig (
@@ -146,6 +177,9 @@ def arithmetic_prompt(line, task_name: str = None):
146177 metrics = [Metrics .exact_match ],
147178 stop_sequence = ["\n " ],
148179 version = 0 ,
180+ sample_fields = record_to_sample ,
181+ solver = [generate (cache = True )],
182+ scorer = math_scorer (),
149183)
150184
151185arithmetic_5da = LightevalTaskConfig (
@@ -161,6 +195,9 @@ def arithmetic_prompt(line, task_name: str = None):
161195 metrics = [Metrics .exact_match ],
162196 stop_sequence = ["\n " ],
163197 version = 0 ,
198+ sample_fields = record_to_sample ,
199+ solver = [generate (cache = True )],
200+ scorer = math_scorer (),
164201)
165202
166203arithmetic_5ds = LightevalTaskConfig (
@@ -176,6 +213,9 @@ def arithmetic_prompt(line, task_name: str = None):
176213 metrics = [Metrics .exact_match ],
177214 stop_sequence = ["\n " ],
178215 version = 0 ,
216+ sample_fields = record_to_sample ,
217+ solver = [generate (cache = True )],
218+ scorer = math_scorer (),
179219)
180220
181221TASKS_TABLE = [
0 commit comments