@@ -125,7 +125,7 @@ def test_benchmark_evaluation_full_flow(self):
125125 benchmark = Benchmark .GEN_QA ,
126126 model = TEST_CONFIG ["model_package_arn" ],
127127 s3_output_path = TEST_CONFIG ["s3_output_path" ],
128- mlflow_resource_arn = TEST_CONFIG ["mlflow_tracking_server_arn" ],
128+ # mlflow_resource_arn=TEST_CONFIG["mlflow_tracking_server_arn"],
129129 dataset = TEST_CONFIG ["dataset_s3_uri" ],
130130 model_package_group = TEST_CONFIG ["model_package_group_arn" ],
131131 base_eval_name = "integ-test-gen-qa-eval" ,
@@ -245,7 +245,7 @@ def test_benchmark_evaluator_validation(self):
245245 benchmark = "invalid_benchmark" ,
246246 model = TEST_CONFIG ["model_package_arn" ],
247247 s3_output_path = TEST_CONFIG ["s3_output_path" ],
248- mlflow_resource_arn = TEST_CONFIG ["mlflow_tracking_server_arn" ],
248+ # mlflow_resource_arn=TEST_CONFIG["mlflow_tracking_server_arn"],
249249 dataset = "s3://bucket/dataset.jsonl" ,
250250 )
251251
@@ -270,7 +270,7 @@ def test_benchmark_subtasks_validation(self):
270270 benchmark = Benchmark .MMLU ,
271271 model = TEST_CONFIG ["model_package_arn" ],
272272 s3_output_path = TEST_CONFIG ["s3_output_path" ],
273- mlflow_resource_arn = TEST_CONFIG ["mlflow_tracking_server_arn" ],
273+ # mlflow_resource_arn=TEST_CONFIG["mlflow_tracking_server_arn"],
274274 dataset = "s3://bucket/dataset.jsonl" ,
275275 subtasks = ["abstract_algebra" , "anatomy" ],
276276 model_package_group = "arn:aws:sagemaker:us-west-2:123456789012:model-package-group/test" ,
@@ -283,7 +283,7 @@ def test_benchmark_subtasks_validation(self):
283283 benchmark = Benchmark .GEN_QA ,
284284 model = TEST_CONFIG ["model_package_arn" ],
285285 s3_output_path = TEST_CONFIG ["s3_output_path" ],
286- mlflow_resource_arn = TEST_CONFIG ["mlflow_tracking_server_arn" ],
286+ # mlflow_resource_arn=TEST_CONFIG["mlflow_tracking_server_arn"],
287287 dataset = "s3://bucket/dataset.jsonl" ,
288288 subtasks = ["invalid" ],
289289 model_package_group = "arn:aws:sagemaker:us-west-2:123456789012:model-package-group/test" ,
@@ -312,7 +312,7 @@ def test_benchmark_evaluation_base_model_only(self):
312312 benchmark = Benchmark .GEN_QA ,
313313 model = BASE_MODEL_ONLY_CONFIG ["base_model_id" ],
314314 s3_output_path = BASE_MODEL_ONLY_CONFIG ["s3_output_path" ],
315- mlflow_resource_arn = BASE_MODEL_ONLY_CONFIG ["mlflow_tracking_server_arn" ],
315+ # mlflow_resource_arn=BASE_MODEL_ONLY_CONFIG["mlflow_tracking_server_arn"],
316316 dataset = BASE_MODEL_ONLY_CONFIG ["dataset_s3_uri" ],
317317 base_eval_name = "integ-test-base-model-only" ,
318318 # Note: model_package_group not needed for JumpStart models
0 commit comments