Skip to content
Open
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
fa63931
feat(sagemaker/sessions): add config to enable stateful sessions usin…
zhaozuy Nov 10, 2025
0a99c84
feat(sagemaker/sessions): add utility functions for getting session_m…
zhaozuy Nov 10, 2025
2e57cd5
chore(sagemaker/sessions): update tests
zhaozuy Nov 10, 2025
85e1687
feat: update env config to use pydantic model SageMakerConfig and use…
zhaozuy Nov 10, 2025
9087e47
Merge branch 'main' of github.com:aws/model-hosting-container-standar…
zhaozuy Nov 10, 2025
e040f2d
feat(sagemaker/sessions): add validation layer so if session_manager …
zhaozuy Nov 11, 2025
9c9791a
Update way of setting sessions_path.
zhaozuy Nov 11, 2025
1d9f7fd
Merge branch 'main' of github.com:aws/model-hosting-container-standar…
zhaozuy Nov 18, 2025
413dfd2
feat(initial - sagemaker/sessions): support engines with their own cr…
zhaozuy Nov 17, 2025
aaf7774
feat(initial - sagemaker/sessions): refactor create/close api transfo…
zhaozuy Nov 18, 2025
24838c2
Merge branch 'main' of github.com:aws/model-hosting-container-standar…
zhaozuy Dec 1, 2025
1a31cf2
import logger to sessions/transform.py
zhaozuy Dec 1, 2025
f2aecfb
Merge branch 'toggle-sticky-routing' of github.com:aws/model-hosting-…
zhaozuy Dec 1, 2025
ce4ab40
Remove manual logger setups.
zhaozuy Dec 1, 2025
4f62b3f
Update README.md
zhaozuy Dec 2, 2025
2975b10
Fix linting.
zhaozuy Dec 2, 2025
dfc6b97
Merge branch 'main' of github.com:aws/model-hosting-container-standar…
zhaozuy Dec 3, 2025
39a1af2
wip - update stateful sessions manager to move sm id header to target
zhaozuy Dec 3, 2025
c77013e
fix(sessions): Fix session ID injection and update tests
zhaozuy Dec 4, 2025
454ad34
Add unit tests
zhaozuy Dec 4, 2025
259ea2f
Update tests, improve how check for use default is done.
zhaozuy Dec 4, 2025
5c01442
Remove unnecessary bootstrap in integ tests.
zhaozuy Dec 5, 2025
c94f0e3
chore(sessions): clarify parameter naming and improve documentation
zhaozuy Dec 6, 2025
ecb6184
Update docs.
zhaozuy Dec 6, 2025
89e2f1d
Merge branch 'main' of github.com:aws/model-hosting-container-standar…
zhaozuy Dec 8, 2025
8de6d32
Merge branch 'main' of github.com:aws/model-hosting-container-standar…
zhaozuy Dec 9, 2025
ec0ea84
refactor(sagemaker/sessions): simplify custom session handler registr…
zhaozuy Dec 9, 2025
b7fb991
refactor(sagemaker/sessions): improve parameter naming clarity for cu…
zhaozuy Dec 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion python/model_hosting_container_standards/common/fastapi/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
from typing import Any, Dict, Optional, Union

from fastapi import Request
from fastapi import Request, Response
from fastapi.responses import JSONResponse
from pydantic import BaseModel


Expand Down Expand Up @@ -33,3 +35,29 @@ def serialize_request(
"query_params": raw_request.query_params,
"path_params": raw_request.path_params,
}


def serialize_response(response: Union[Response, JSONResponse]):
"""Create a structured data dictionary for JMESPath transformations.

Extracts and organizes response data into a standardized format that can be used
with JMESPath expressions to transform and extract specific data elements.

:param Union[Response, JSONResponse] response: Response body data - can be:
- FastAPI Response object
- JSONResponse object
:return Dict[str, Any]: Structured data with body, headers, status_code, and media_type

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

status_code, and media_type ?

"""
# Process response body based on type
body = response.body.decode(response.charset)
try:
body = json.loads(body)
except json.JSONDecodeError:
# If body is not JSON, keep it as a string
# logger.warning(f"Response body is not JSON, keeping as string: {e}")
pass

return {
"body": body,
"headers": response.headers,
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pydantic import BaseModel, Field

from ...logging_config import logger
from ..fastapi.utils import serialize_request
from ..fastapi.utils import serialize_request, serialize_response
from .utils import _compile_jmespath_expressions


Expand Down Expand Up @@ -103,6 +103,19 @@ async def transform_request(self, raw_request: Request):
"""
raise NotImplementedError()

def _transform_response(self, response: Response):
"""Transform the response based on the request processing results.

Subclasses must implement this method to handle request parsing, validation,
and transformation according to their specific operation requirements.

:param Response response: The response to transform
:param transform_request_output: Output from the request transformation
:raises NotImplementedError: Must be implemented by subclasses
"""
response_data = serialize_response(response)
return self._transform(response_data, self._response_shape)

def _transform_request(
self, request: Optional[BaseModel], raw_request: Request
) -> Dict[str, Any]:
Expand Down
40 changes: 37 additions & 3 deletions python/model_hosting_container_standards/sagemaker/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@
from .lora.models import AppendOperation
from .sagemaker_loader import SageMakerFunctionLoader
from .sagemaker_router import create_sagemaker_router
from .sessions import create_session_transform_decorator
from .sessions import (
create_session_transform_decorator,
register_engine_session_handler,
)
from .sessions.models import SageMakerSessionHeader

# SageMaker decorator instances - created using utility functions

Expand Down Expand Up @@ -118,17 +122,47 @@ def inject_adapter_id(
)


def stateful_session_manager():
def stateful_session_manager(request_session_id_path: Optional[str] = None):
"""Create a decorator for session-based sticky routing.

This decorator enables stateful session management without JMESPath transformations.
Pass empty dicts to enable transform infrastructure (for intercept functionality)
without requiring JMESPath expressions.

Args:
request_session_id_path: JMESPath target path where session ID should be
injected INTO the request body from the session header

Returns:
A decorator that can be applied to route handlers to enable session management
"""
return create_session_transform_decorator()(request_shape={}, response_shape={})
request_shape = {}
if request_session_id_path:
request_shape[request_session_id_path] = (
f'headers."{SageMakerSessionHeader.SESSION_ID}"'
)
return create_session_transform_decorator()(
request_shape=request_shape, response_shape={}
)


def register_create_session_handler(
request_shape, response_session_id_path: str, content_path: Optional[str] = None
):
return register_engine_session_handler(
"create_session",
request_shape=request_shape,
response_session_id_path=response_session_id_path,
content_path=content_path or "`successfully created session.`",
)


def register_close_session_handler(request_shape, content_path: Optional[str] = None):
return register_engine_session_handler(
"close_session",
request_shape=request_shape,
content_path=content_path or "`successfully closed session.`",
)


def bootstrap(app: FastAPI) -> FastAPI:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ def get_sagemaker_route_config(handler_type: str) -> Optional[RouteConfig]:
summary="Model inference endpoint",
)

if handler_type in ["create_session", "close_session"]:
# It's a request transformer, not a standalone API endpoint
# It modifies requests in-flight but doesn't expose its own route
return None

# Delegate to LoRA route resolver for LoRA-specific handlers
return get_lora_route_config(handler_type)

Expand Down
Loading