aws · aviruthen · Dec 7, 2025 · Dec 7, 2025 · Dec 8, 2025 · Dec 8, 2025
diff --git a/sagemaker-core/pyproject.toml b/sagemaker-core/pyproject.toml
@@ -9,7 +9,7 @@ description = "An python package for sagemaker core functionalities"
 authors = [
   {name = "AWS", email = "sagemaker-interests@amazon.com"}
 ]
-readme = "README.rst"
+readme = "README.rst" 
 dependencies = [
   # Add your dependencies here (Include lower and upper bounds as applicable)
     "boto3>=1.42.2,<2.0.0",
@@ -34,6 +34,9 @@ dependencies = [
     "omegaconf>=2.1.0",
     "torch>=1.9.0",
     "scipy>=1.5.0",
+    # Remote function dependencies
+    "cloudpickle>=2.0.0",
+    "paramiko>=2.11.0",
 ]
 requires-python = ">=3.9"
 classifiers = [

diff --git a/sagemaker-core/src/sagemaker/core/training/configs.py b/sagemaker-core/src/sagemaker/core/training/configs.py
@@ -257,15 +257,16 @@ class InputData(BaseConfig):
     Parameters:
         channel_name (StrPipeVar):
             The name of the input data source channel.
-        data_source (Union[str, S3DataSource, FileSystemDataSource, DatasetSource]):
+        data_source (Union[StrPipeVar, S3DataSource, FileSystemDataSource, DatasetSource]):
             The data source for the channel. Can be an S3 URI string, local file path string,
-            S3DataSource object, or FileSystemDataSource object.
+            S3DataSource object, FileSystemDataSource object, DatasetSource object, or a
+            pipeline variable (Properties) from a previous step.
         content_type (StrPipeVar):
             The MIME type of the data.
     """
 
     channel_name: StrPipeVar = None
-    data_source: Union[str, FileSystemDataSource, S3DataSource, DatasetSource] = None
+    data_source: Union[StrPipeVar, FileSystemDataSource, S3DataSource, DatasetSource] = None
     content_type: StrPipeVar = None
 
 

diff --git a/sagemaker-core/tests/integ/jumpstart/test_search_integ.py b/sagemaker-core/tests/integ/jumpstart/test_search_integ.py
@@ -19,6 +19,7 @@
 from sagemaker.core.resources import HubContent
 
 
+@pytest.mark.serial
 @pytest.mark.integ
 def test_search_public_hub_models_default_args():
     # Only query, uses default hub name and session
@@ -30,6 +31,7 @@ def test_search_public_hub_models_default_args():
     assert len(results) > 0, "Expected at least one matching model from the public hub"
 
 
+@pytest.mark.serial
 @pytest.mark.integ
 def test_search_public_hub_models_custom_session():
     # Provide a custom SageMaker session
@@ -41,6 +43,7 @@ def test_search_public_hub_models_custom_session():
     assert all(isinstance(m, HubContent) for m in results)
 
 
+@pytest.mark.serial
 @pytest.mark.integ
 def test_search_public_hub_models_custom_hub_name():
     # Using the default public hub but provided explicitly
@@ -51,6 +54,7 @@ def test_search_public_hub_models_custom_hub_name():
     assert all(isinstance(m, HubContent) for m in results)
 
 
+@pytest.mark.serial
 @pytest.mark.integ
 def test_search_public_hub_models_all_args():
     # Provide both hub_name and session explicitly

diff --git a/sagemaker-core/tests/unit/telemetry/test_telemetry_logging.py b/sagemaker-core/tests/unit/telemetry/test_telemetry_logging.py
@@ -30,7 +30,18 @@
     PYTHON_VERSION,
 )
 from sagemaker.core.user_agent import SDK_VERSION, process_studio_metadata_file
-from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException
+
+# Try to import sagemaker-serve exceptions, skip tests if not available
+try:
+    from sagemaker.serve.utils.exceptions import ModelBuilderException, LocalModelOutOfMemoryException
+    SAGEMAKER_SERVE_AVAILABLE = True
+except ImportError:
+    SAGEMAKER_SERVE_AVAILABLE = False
+    # Create mock exceptions for type hints
+    class ModelBuilderException(Exception):
+        pass
+    class LocalModelOutOfMemoryException(Exception):
+        pass
 
 MOCK_SESSION = Mock()
 MOCK_EXCEPTION = LocalModelOutOfMemoryException("mock raise ex")
@@ -147,6 +158,10 @@ def test_telemetry_emitter_decorator_success(
             1, [1, 2], MOCK_SESSION, None, None, expected_extra_str
         )
 
+    @pytest.mark.skipif(
+        not SAGEMAKER_SERVE_AVAILABLE,
+        reason="Requires sagemaker-serve package"
+    )
     @patch("sagemaker.core.telemetry.telemetry_logging._send_telemetry_request")
     @patch("sagemaker.core.telemetry.telemetry_logging.resolve_value_from_config")
     def test_telemetry_emitter_decorator_handle_exception_success(

diff --git a/sagemaker-core/tox.ini b/sagemaker-core/tox.ini
@@ -63,6 +63,7 @@ markers =
     release
     image_uris_unit_test
     timeout: mark a test as a timeout.
+    serial: marks tests that must run serially (not in parallel)
 
 [testenv]
 setenv =

diff --git a/sagemaker-mlops/pyproject.toml b/sagemaker-mlops/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 name = "sagemaker-mlops"
 dynamic = ["version"]
 description = "SageMaker MLOps package for workflow orchestration and model building"
-readme = "README.md"
+readme = "README.md" 
 requires-python = ">=3.9"
 authors = [
     {name = "Amazon Web Services"},

diff --git a/sagemaker-mlops/tests/integ/test_pipeline_train_registry.py b/sagemaker-mlops/tests/integ/test_pipeline_train_registry.py
@@ -215,7 +215,19 @@ def test_pipeline_with_train_and_registry(sagemaker_session, pipeline_session, r
                 assert execution_status == "Succeeded"
                 break
             elif execution_status in ["Failed", "Stopped"]:
-                pytest.fail(f"Pipeline execution {execution_status}")
+                # Get detailed failure information
+                steps = sagemaker_session.sagemaker_client.list_pipeline_execution_steps(
+                    PipelineExecutionArn=execution_desc["PipelineExecutionArn"]
+                )["PipelineExecutionSteps"]
+
+                failed_steps = []
+                for step in steps:
+                    if step.get("StepStatus") == "Failed":
+                        failure_reason = step.get("FailureReason", "Unknown reason")
+                        failed_steps.append(f"{step['StepName']}: {failure_reason}")
+
+                failure_details = "\n".join(failed_steps) if failed_steps else "No detailed failure information available"
+                pytest.fail(f"Pipeline execution {execution_status}. Failed steps:\n{failure_details}")
 
             time.sleep(60)
         else:

diff --git a/sagemaker-mlops/tox.ini b/sagemaker-mlops/tox.ini
@@ -87,8 +87,7 @@ allowlist_externals =
 commands =
     python -c "import os; os.system('install-custom-pkgs --install-boto-wheels')"
     pip install 'apache-airflow==2.10.4' --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.4/constraints-3.9.txt"
-    pip install 'torch==2.3.1+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html'
-    pip install 'torchvision==0.18.1+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html'
+    pip install 'torch==2.8.0' 'torchvision==0.23.0'
     pip install 'dill>=0.3.9'
 
     pytest {posargs}

diff --git a/sagemaker-serve/src/sagemaker/serve/model_server/in_process_model_server/app.py b/sagemaker-serve/src/sagemaker/serve/model_server/in_process_model_server/app.py
@@ -39,7 +39,6 @@ def __init__(
     ):
         self._thread = None
         self._loop = None
-        self._stop_event = asyncio.Event()
         self._shutdown_event = threading.Event()
         self._router = APIRouter()
         self._task = task

diff --git a/sagemaker-serve/tests/integ/test_tei_integration.py b/sagemaker-serve/tests/integ/test_tei_integration.py
@@ -31,9 +31,6 @@
 MODEL_NAME_PREFIX = "tei-test-model"
 ENDPOINT_NAME_PREFIX = "tei-test-endpoint"
 
-# Configuration from backup file
-AWS_REGION = "us-east-2"
-
 
 @pytest.mark.slow_test
 def test_tei_build_deploy_invoke_cleanup():
@@ -81,8 +78,6 @@ def build_and_deploy():
     hf_model_id = MODEL_ID
 
     schema_builder = create_schema_builder()
-    boto_session = boto3.Session(region_name=AWS_REGION)
-    sagemaker_session = Session(boto_session=boto_session)
     unique_id = str(uuid.uuid4())[:8]
 
     compute = Compute(
@@ -94,7 +89,6 @@ def build_and_deploy():
         model=hf_model_id,  # Use HuggingFace model string
         model_server=ModelServer.TEI,
         schema_builder=schema_builder,
-        sagemaker_session=sagemaker_session,
         compute=compute,
     )
 
@@ -104,7 +98,7 @@ def build_and_deploy():
 
     core_endpoint = model_builder.deploy(
         endpoint_name=f"{ENDPOINT_NAME_PREFIX}-{unique_id}",
-        initial_instance_count=1
+        initial_instance_count=1,
     )
     logger.info(f"Endpoint Successfully Created: {core_endpoint.endpoint_name}")
 

diff --git a/sagemaker-serve/tests/integ/test_tgi_integration.py b/sagemaker-serve/tests/integ/test_tgi_integration.py
@@ -31,9 +31,6 @@
 MODEL_NAME_PREFIX = "tgi-test-model"
 ENDPOINT_NAME_PREFIX = "tgi-test-endpoint"
 
-# Configuration from backup file
-AWS_REGION = "us-east-2"
-
 
 @pytest.mark.slow_test
 def test_tgi_build_deploy_invoke_cleanup():
@@ -81,8 +78,6 @@ def build_and_deploy():
     hf_model_id = MODEL_ID
 
     schema_builder = create_schema_builder()
-    boto_session = boto3.Session(region_name=AWS_REGION)
-    sagemaker_session = Session(boto_session=boto_session)
     unique_id = str(uuid.uuid4())[:8]
 
     compute = Compute(
@@ -101,7 +96,6 @@ def build_and_deploy():
         model=hf_model_id,  # Use HuggingFace model string
         model_server=ModelServer.TGI,
         schema_builder=schema_builder,
-        sagemaker_session=sagemaker_session,
         compute=compute,
         env_vars=env_vars
     )
@@ -112,7 +106,7 @@ def build_and_deploy():
 
     core_endpoint = model_builder.deploy(
         endpoint_name=f"{ENDPOINT_NAME_PREFIX}-{unique_id}",
-        initial_instance_count=1
+        initial_instance_count=1,
     )
     logger.info(f"Endpoint Successfully Created: {core_endpoint.endpoint_name}")
 

diff --git a/sagemaker-serve/tests/unit/test_model_builder_servers.py b/sagemaker-serve/tests/unit/test_model_builder_servers.py
@@ -414,20 +414,22 @@ def test_all_supported_model_servers_have_routes(self):
         """Test that all supported model servers have corresponding build methods."""
         from sagemaker.serve.model_builder_servers import _ModelBuilderServers
 
-        # Map of model servers to their expected build methods
-        server_method_map = {
-            ModelServer.TORCHSERVE: '_build_for_torchserve',
-            ModelServer.TRITON: '_build_for_triton',
-            ModelServer.TENSORFLOW_SERVING: '_build_for_tensorflow_serving',
-            ModelServer.DJL_SERVING: '_build_for_djl',
-            ModelServer.TEI: '_build_for_tei',
-            ModelServer.TGI: '_build_for_tgi',
-            ModelServer.MMS: '_build_for_transformers',
-            ModelServer.SMD: '_build_for_smd',
-        }
-
-        for model_server, method_name in server_method_map.items():
-            with self.subTest(model_server=model_server):
+        # Map of model servers to their expected build methods using string values
+        # to avoid enum serialization issues with pytest-xdist
+        server_method_map = [
+            (ModelServer.TORCHSERVE, '_build_for_torchserve'),
+            (ModelServer.TRITON, '_build_for_triton'),
+            (ModelServer.TENSORFLOW_SERVING, '_build_for_tensorflow_serving'),
+            (ModelServer.DJL_SERVING, '_build_for_djl'),
+            (ModelServer.TEI, '_build_for_tei'),
+            (ModelServer.TGI, '_build_for_tgi'),
+            (ModelServer.MMS, '_build_for_transformers'),
+            (ModelServer.SMD, '_build_for_smd'),
+        ]
+
+        for model_server, method_name in server_method_map:
+            # Use enum.name instead of enum itself for subTest to avoid serialization
+            with self.subTest(model_server=model_server.name):
                 self.mock_builder.model_server = model_server
 
                 # Mock the specific build method

diff --git a/sagemaker-serve/tests/unit/test_model_builder_utils_triton.py b/sagemaker-serve/tests/unit/test_model_builder_utils_triton.py
@@ -136,6 +136,14 @@ class TestExportPytorchToOnnx(unittest.TestCase):
     @patch('torch.onnx.export')
     def test_export_pytorch_to_onnx_success(self, mock_export):
         """Test successful PyTorch to ONNX export."""
+        try:
+            import ml_dtypes
+            # Skip test if ml_dtypes doesn't have required attribute
+            if not hasattr(ml_dtypes, 'float4_e2m1fn'):
+                self.skipTest("ml_dtypes version incompatible with current numpy/onnx")
+        except ImportError:
+            pass
+
         utils = _ModelBuilderUtils()
         mock_model = Mock()
         mock_schema = Mock()

diff --git a/sagemaker-serve/tox.ini b/sagemaker-serve/tox.ini
@@ -87,9 +87,10 @@ allowlist_externals =
 commands =
     python -c "import os; os.system('install-custom-pkgs --install-boto-wheels')"
     pip install 'apache-airflow==2.10.4' --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.10.4/constraints-3.9.txt"
-    pip install 'torch==2.3.1+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html'
-    pip install 'torchvision==0.18.1+cpu' -f 'https://download.pytorch.org/whl/torch_stable.html'
+    pip install 'torch==2.8.0' 'torchvision==0.23.0'
+    pip install 'onnx>=1.16.0,<1.17.0' 'onnxruntime>=1.19.0,<1.20.0'
     pip install 'dill>=0.3.9'
+    pip install 'tensorflow==2.16.2'
 
     pytest {posargs}
 deps =

diff --git a/sagemaker-train/pyproject.toml b/sagemaker-train/pyproject.toml
@@ -57,7 +57,8 @@ test = [
     "pandas",
     "scipy",
     "omegaconf",
-    "graphene"
+    "graphene",
+    "IPython"
 ]
 
 [tool.setuptools.packages.find]
@@ -71,6 +72,9 @@ version = { file = "VERSION"}
 [tool.pytest.ini_options]
 addopts = ["-vv"]
 testpaths = ["tests"]
+markers = [
+    "serial: marks tests that must run serially (not in parallel)",
+]
 
 [tool.black]
 line-length = 100

diff --git a/sagemaker-train/src/sagemaker/ai_registry/dataset.py b/sagemaker-train/src/sagemaker/ai_registry/dataset.py
@@ -179,7 +179,6 @@ def _validate_dataset_file(cls, file_path: str) -> None:
                 max_size_mb = DATASET_MAX_FILE_SIZE_BYTES / (1024 * 1024)
                 raise ValueError(f"File size {file_size_mb:.2f} MB exceeds maximum allowed size of {max_size_mb:.0f} MB")
 
-    @classmethod
     @classmethod
     @_telemetry_emitter(feature=Feature.MODEL_CUSTOMIZATION, func_name="DataSet.get")
     def get(cls, name: str, sagemaker_session=None) -> "DataSet":

diff --git a/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py b/sagemaker-train/src/sagemaker/train/evaluate/benchmark_evaluator.py
@@ -300,12 +300,18 @@ class BenchMarkEvaluator(BaseEvaluator):
     """
 
     benchmark: _Benchmark
+    dataset: Union[str, Any]  # Required field, must come before optional fields
     subtasks: Optional[Union[str, List[str]]] = None
+    evaluate_base_model: bool = True
     _hyperparameters: Optional[Any] = None
 
-    # Template-required fields
-    evaluate_base_model: bool = False
-
+    @validator('dataset', pre=True)
+    def _resolve_dataset(cls, v):
+        """Resolve dataset to string (S3 URI or ARN) and validate format.
+
+        Uses BaseEvaluator's common validation logic to avoid code duplication.
+        """
+        return BaseEvaluator._validate_and_resolve_dataset(v)
 
     @validator('benchmark')
     def _validate_benchmark_model_compatibility(cls, v, values):

diff --git a/sagemaker-train/tests/integ/ai_registry/test_dataset.py b/sagemaker-train/tests/integ/ai_registry/test_dataset.py
@@ -21,6 +21,7 @@
 from sagemaker.ai_registry.air_constants import HubContentStatus
 
 
+@pytest.mark.serial
 class TestDataSetIntegration:
     """Integration tests for DataSet operations."""
 

diff --git a/sagemaker-train/tests/integ/ai_registry/test_evaluator.py b/sagemaker-train/tests/integ/ai_registry/test_evaluator.py
@@ -19,6 +19,7 @@
 from sagemaker.ai_registry.air_constants import HubContentStatus, REWARD_FUNCTION, REWARD_PROMPT
 
 
+@pytest.mark.serial
 class TestEvaluatorIntegration:
     """Integration tests for Evaluator operations."""
 

diff --git a/sagemaker-train/tests/unit/train/local/test_data.py b/sagemaker-train/tests/unit/train/local/test_data.py
@@ -296,7 +296,7 @@ def test_pad_groups_records_within_size(self):
     def test_pad_splits_when_exceeding_size(self):
         """Test pad splits records when exceeding size."""
         splitter = MagicMock()
-        splitter.split.return_value = ["a" * 1000, "b" * 1000, "c" * 1000]
+        splitter.split.return_value = ["a" * 500, "b" * 500, "c" * 500]
 
         strategy = MultiRecordStrategy(splitter)
         result = list(strategy.pad("file.txt", size=0.001))  # Very small size

diff --git a/sagemaker-train/tox.ini b/sagemaker-train/tox.ini
@@ -63,6 +63,7 @@ markers =
     release
     image_uris_unit_test
     timeout: mark a test as a timeout.
+    serial: marks tests that must run serially (not in parallel)
 
 [testenv]
 setenv =