Skip to content

Commit 2f2e49b

Browse files
committed
feat: add passing arguments for ram limits
1 parent c9e8a79 commit 2f2e49b

File tree

4 files changed

+36
-15
lines changed

4 files changed

+36
-15
lines changed

bigcodebench/eval/__init__.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ def unsafe_execute(
110110
code: str,
111111
test_code: str,
112112
timeout: float,
113+
max_as_limit: float,
114+
max_data_limit: float,
115+
max_stack_limit: float,
113116
stat, # Value
114117
details, # Array
115118
):
@@ -123,9 +126,7 @@ def unsafe_execute(
123126
rmdir = os.rmdir
124127
chdir = os.chdir
125128
# Disable functionalities that can make destructive changes to the test.
126-
# allow only 128GB memory usage
127-
maximum_memory_bytes = 128 * 1024 * 1024 * 1024
128-
reliability_guard(maximum_memory_bytes=maximum_memory_bytes)
129+
reliability_guard(max_as_limit, max_data_limit, max_stack_limit)
129130
module_name = "__test__"
130131
new_module = types.ModuleType(module_name)
131132
# Set necessary attributes for the module
@@ -170,11 +171,14 @@ def untrusted_check(
170171
code: str,
171172
test_code: str,
172173
entry_point: str,
174+
max_as_limit: float,
175+
max_data_limit: float,
176+
max_stack_limit: float,
173177
min_time_limit: float = 10,
174178
gt_time_limit: float = 60
175179
) -> Tuple[str, np.ndarray]:
176180
time_limit = max(min_time_limit, gt_time_limit)
177-
timeout = max(os.getenv("EVALPLUS_TIMEOUT_PER_TASK", 120), time_limit) + 1
181+
timeout = max(os.getenv("BIGCODEBENCH_TIMEOUT_PER_TASK", 120), time_limit) + 1
178182
# shared memory objects
179183
stat = Value("i", _UNKNOWN)
180184
manager = Manager()
@@ -187,6 +191,9 @@ def untrusted_check(
187191
code,
188192
test_code,
189193
timeout,
194+
max_as_limit,
195+
max_data_limit,
196+
max_stack_limit,
190197
stat,
191198
details,
192199
),

bigcodebench/eval/utils.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ class redirect_stdin(contextlib._RedirectStream): # type: ignore
258258
_stream = "stdin"
259259

260260

261-
def reliability_guard(maximum_memory_bytes: Optional[int] = None):
261+
def reliability_guard(max_as_limit, max_data_limit, max_stack_limit):
262262
"""
263263
This disables various destructive functions and prevents the generated code
264264
from interfering with the test (e.g. fork bomb, killing other processes,
@@ -282,18 +282,18 @@ def reliability_guard(maximum_memory_bytes: Optional[int] = None):
282282
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "3"
283283
os.environ['TF_ENABLE_ONEDNN_OPTS'] = "0"
284284

285-
if maximum_memory_bytes is not None:
285+
if maximum_memory_bytes:
286286
import resource
287287

288288
resource.setrlimit(
289-
resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes)
289+
resource.RLIMIT_AS, (max_as_limit, max_as_limit)
290290
)
291291
resource.setrlimit(
292-
resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes)
292+
resource.RLIMIT_DATA, (max_data_limit, max_data_limit)
293293
)
294294
if not platform.uname().system == "Darwin":
295295
resource.setrlimit(
296-
resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes)
296+
resource.RLIMIT_STACK, (max_stack_limit, max_stack_limit)
297297
)
298298

299299
faulthandler.disable()

bigcodebench/evaluate.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
Result = Tuple[str, List[bool]]
3535

3636

37-
def get_groundtruth(problems, hashcode, check_gt_only):
37+
def get_groundtruth(problems, hashcode, check_gt_only, max_as_limit, max_data_limit, max_stack_limit):
3838
cache_file = os.path.join(CACHE_DIR, f"{hashcode}.pkl")
3939
if os.path.exists(cache_file):
4040
if check_gt_only:
@@ -53,6 +53,9 @@ def get_groundtruth(problems, hashcode, check_gt_only):
5353
problem["complete_prompt"] + "\n" + problem["canonical_solution"],
5454
problem["test"],
5555
problem["task_id"],
56+
max_as_limit,
57+
max_data_limit,
58+
max_stack_limit
5659
)
5760
print(f"Expected outputs computed in {time.time() - tbegin:.2f}s")
5861

@@ -65,9 +68,12 @@ def check_correctness(
6568
completion_id: int,
6669
problem: Dict[str, Any],
6770
solution: str,
71+
max_as_limit: float,
72+
max_data_limit: float,
73+
max_stack_limit: float,
6874
identifier=None,
6975
min_time_limit: float = 0.1,
70-
gt_time_limit: float = 2.0
76+
gt_time_limit: float = 2.0,
7177
) -> Dict[str, Result]: # {...}, "base" | "plus" -> (status, details)
7278
ret = {
7379
"completion_id": completion_id,
@@ -79,8 +85,11 @@ def check_correctness(
7985
solution,
8086
problem["test"],
8187
problem["entry_point"],
88+
max_as_limit,
89+
max_data_limit,
90+
max_stack_limit,
8291
min_time_limit,
83-
gt_time_limit
92+
gt_time_limit,
8493
)
8594
return ret
8695

@@ -150,6 +159,9 @@ def evaluate(flags):
150159
completion_id[task_id],
151160
problems[task_id],
152161
solution,
162+
flags.max_as_limit,
163+
flags.max_data_limit,
164+
flags.max_stack_limit,
153165
sample["_identifier"],
154166
flags.min_time_limit,
155167
expected_time[task_id] if expected_time else 20
@@ -240,6 +252,9 @@ def main():
240252
parser.add_argument("--samples", required=True, type=str)
241253
parser.add_argument("--parallel", default=None, type=int)
242254
parser.add_argument("--min-time-limit", default=1, type=float)
255+
parser.add_argument("--max-as-limit", default=128*1024, type=float)
256+
parser.add_argument("--max-data-limit", default=4*1024, type=float)
257+
parser.add_argument("--max-stack-limit", default=5, type=float)
243258
parser.add_argument(
244259
"--check-gt-only", action="store_true", help="Check the groundtruth"
245260
)

bigcodebench/gen/util/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
)
1313

1414

15-
def trusted_exec(code, test_code, task_id):
15+
def trusted_exec(code, test_code, task_id, max_as_limit, max_data_limit, max_stack_limit):
1616
"""Execute trusted code in place."""
1717

1818
with create_tempdir():
@@ -25,8 +25,7 @@ def trusted_exec(code, test_code, task_id):
2525
chdir = os.chdir
2626
module_name = "__test__"
2727
new_module = types.ModuleType(module_name)
28-
maximum_memory_bytes = 128 * 1024 * 1024 * 1024
29-
reliability_guard(maximum_memory_bytes=maximum_memory_bytes)
28+
reliability_guard(max_as_limit, max_data_limit, max_stack_limit)
3029
# Set necessary attributes for the module
3130
new_module.__dict__.update({
3231
'__builtins__': builtins,

0 commit comments

Comments
 (0)