From 77c134884b1b12016ee8e7d5318fb298ed5b260a Mon Sep 17 00:00:00 2001 From: Alex Wolf Date: Fri, 26 Sep 2025 20:54:35 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Track=20compute=5Fspec?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../run_loading_benchmark_on_collection.py | 3 +- src/arrayloader_benchmarks/compute_spec.py | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 src/arrayloader_benchmarks/compute_spec.py diff --git a/scripts/run_loading_benchmark_on_collection.py b/scripts/run_loading_benchmark_on_collection.py index ad03600..571d385 100644 --- a/scripts/run_loading_benchmark_on_collection.py +++ b/scripts/run_loading_benchmark_on_collection.py @@ -11,7 +11,7 @@ import pandas as pd from torch.utils.data import DataLoader -from arrayloader_benchmarks import benchmark_loader +from arrayloader_benchmarks import benchmark_loader, compute_spec if TYPE_CHECKING: from pathlib import Path @@ -254,6 +254,7 @@ def run( "num_workers": num_workers, "batch_size": batch_size, "chunk_size": chunk_size, + "compute_spec": compute_spec.get_aws_sagemaker_instance_type(), "run_uid": ln.context.run.uid, "timestamp": datetime.datetime.now(datetime.UTC), "user": ln.setup.settings.user.handle, diff --git a/src/arrayloader_benchmarks/compute_spec.py b/src/arrayloader_benchmarks/compute_spec.py new file mode 100644 index 0000000..ec1a662 --- /dev/null +++ b/src/arrayloader_benchmarks/compute_spec.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import json + +import boto3 + + +def get_aws_sagemaker_instance_type() -> str: + """Get the instance type of the current SageMaker Studio instance.""" + try: + # Read the metadata + with open("/opt/ml/metadata/resource-metadata.json") as f: # noqa + metadata = json.load(f) + + sagemaker = boto3.client("sagemaker", region_name="us-west-2") + + # Try to describe the space + space_response = sagemaker.describe_space( + DomainId=metadata["DomainId"], SpaceName=metadata["SpaceName"] + ) + + # Navigate through the nested settings to find instance type + space_settings = space_response.get("SpaceSettings", {}) + jupyter_settings = space_settings.get("JupyterLabAppSettings", {}) + default_resource_spec = jupyter_settings.get("DefaultResourceSpec", {}) + + return default_resource_spec.get("InstanceType", "unknown") + + except Exception: # noqa: BLE001 + return "unknown"