From 5e48ca2ada439ac3e2152bb4b6a67e9ba5afdca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florentin=20D=C3=B6rre?= Date: Thu, 4 Dec 2025 16:19:37 +0100 Subject: [PATCH 1/4] Reapply "Fix estimation endpoint take 3" This reverts commit 6ea56b526f60f668f54e21eff4d57707d79a2720. --- src/graphdatascience/session/aura_api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/graphdatascience/session/aura_api.py b/src/graphdatascience/session/aura_api.py index 4ec651377..f63ae2b21 100644 --- a/src/graphdatascience/session/aura_api.py +++ b/src/graphdatascience/session/aura_api.py @@ -314,7 +314,9 @@ def estimate_size( "instance_type": "dsenterprise", } - response = self._request_session.post(f"{self._base_uri}/{AuraApi.API_VERSION}/instances/sizing", json=data) + response = self._request_session.post( + f"{self._base_uri}/{AuraApi.API_VERSION}/graph-analytics/sessions/sizing", json=data + ) self._check_resp(response) return EstimationDetails.from_json(response.json()["data"]) From dd374fbe8f8736362190d55bea18917c5b1f1d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florentin=20D=C3=B6rre?= Date: Thu, 4 Dec 2025 16:51:45 +0100 Subject: [PATCH 2/4] Reapply "Use new estimate endpoint on Aura API" This reverts commit 5f1fbf163416c22c021c140232154e82d71bfd6b. --- changelog.md | 1 + .../pages/graph-analytics-serverless.adoc | 3 +++ src/graphdatascience/session/aura_api.py | 12 +++++++-- .../session/aura_api_responses.py | 23 +++++++++++++++-- .../session/dedicated_sessions.py | 16 +++++++++--- src/graphdatascience/session/gds_sessions.py | 20 ++++++++++++--- tests/unit/session/test_dedicated_sessions.py | 14 ++++++++--- tests/unit/test_aura_api.py | 25 ++++++++++++++++--- 8 files changed, 96 insertions(+), 18 deletions(-) diff --git a/changelog.md b/changelog.md index dae9c4de6..fbf918cd7 100644 --- a/changelog.md +++ b/changelog.md @@ -9,6 +9,7 @@ ## Improvements - `GdsSessions.get_or_create` now allows to specify the `aura_instance_id` instead of `uri` as part of the `db_connection`. This is required if the instance id could not be derived from the provided database connection URI such as for Multi-Database instances. +- `GdsSessions.estimate` now recommends smaller sizes such as `2GB`. Also allows specifying property and label counts for better estimates. ## Other changes diff --git a/doc/modules/ROOT/pages/graph-analytics-serverless.adoc b/doc/modules/ROOT/pages/graph-analytics-serverless.adoc index f5b86be8c..072c7e344 100644 --- a/doc/modules/ROOT/pages/graph-analytics-serverless.adoc +++ b/doc/modules/ROOT/pages/graph-analytics-serverless.adoc @@ -219,6 +219,9 @@ memory = sessions.estimate( node_count=20, relationship_count=50, algorithm_categories=[AlgorithmCategory.CENTRALITY, AlgorithmCategory.NODE_EMBEDDING], + node_label_count=1, + node_property_count=1, + relationship_property_count=1 ) ---- diff --git a/src/graphdatascience/session/aura_api.py b/src/graphdatascience/session/aura_api.py index f63ae2b21..ccdd1442f 100644 --- a/src/graphdatascience/session/aura_api.py +++ b/src/graphdatascience/session/aura_api.py @@ -305,13 +305,21 @@ def wait_for_instance_running( return WaitResult.from_error(f"Instance is not running after waiting for {waited_time} seconds") def estimate_size( - self, node_count: int, relationship_count: int, algorithm_categories: list[AlgorithmCategory] + self, + node_count: int, + node_label_count: int, + node_property_count: int, + relationship_count: int, + relationship_property_count: int, + algorithm_categories: list[AlgorithmCategory], ) -> EstimationDetails: data = { "node_count": node_count, + "node_label_count": node_label_count, + "node_property_count": node_property_count, "relationship_count": relationship_count, + "relationship_property_count": relationship_property_count, "algorithm_categories": [i.value for i in algorithm_categories], - "instance_type": "dsenterprise", } response = self._request_session.post( diff --git a/src/graphdatascience/session/aura_api_responses.py b/src/graphdatascience/session/aura_api_responses.py index ef03f2bcd..aae2bd4a7 100644 --- a/src/graphdatascience/session/aura_api_responses.py +++ b/src/graphdatascience/session/aura_api_responses.py @@ -169,9 +169,8 @@ def from_json(cls, json: dict[str, Any]) -> InstanceCreateDetails: @dataclass(repr=True, frozen=True) class EstimationDetails: - min_required_memory: str + estimated_memory: str recommended_size: str - did_exceed_maximum: bool @classmethod def from_json(cls, json: dict[str, Any]) -> EstimationDetails: @@ -181,6 +180,26 @@ def from_json(cls, json: dict[str, Any]) -> EstimationDetails: return cls(**{f.name: json[f.name] for f in fields}) + def exceeds_recommended(self) -> bool: + return EstimationDetails._parse_size(self.estimated_memory) > EstimationDetails._parse_size( + self.recommended_size + ) + + @staticmethod + def _parse_size(size: str) -> float: + size_str = size.upper().strip() + # treat GB, Gi and G the same as its only used for comparing it internally + size_str = size_str.removesuffix("B").removesuffix("I") + + if size_str.endswith("G"): + return float(size_str[:-1]) * 1024**3 # 1GB = 1024^3 bytes + elif size_str.endswith("M"): + return float(size_str[:-1]) * 1024**2 # 1MB = 1024^2 bytes + elif size_str.endswith("K"): + return float(size_str[:-1]) * 1024 # 1KB = 1024 bytes + else: + return float(size_str) # assume bytes + class WaitResult(NamedTuple): connection_url: str diff --git a/src/graphdatascience/session/dedicated_sessions.py b/src/graphdatascience/session/dedicated_sessions.py index ce8f8a75e..faa764b09 100644 --- a/src/graphdatascience/session/dedicated_sessions.py +++ b/src/graphdatascience/session/dedicated_sessions.py @@ -31,14 +31,24 @@ def estimate( node_count: int, relationship_count: int, algorithm_categories: list[AlgorithmCategory] | None = None, + node_label_count: int = 0, + node_property_count: int = 0, + relationship_property_count: int = 0, ) -> SessionMemory: if algorithm_categories is None: algorithm_categories = [] - estimation = self._aura_api.estimate_size(node_count, relationship_count, algorithm_categories) + estimation = self._aura_api.estimate_size( + node_count=node_count, + node_label_count=node_label_count, + node_property_count=node_property_count, + relationship_count=relationship_count, + relationship_property_count=relationship_property_count, + algorithm_categories=algorithm_categories, + ) - if estimation.did_exceed_maximum: + if estimation.exceeds_recommended(): warnings.warn( - f"The estimated memory `{estimation.min_required_memory}` exceeds the maximum size" + f"The estimated memory `{estimation.estimated_memory}` exceeds the maximum size" f" supported by your Aura project (`{estimation.recommended_size}`).", ResourceWarning, ) diff --git a/src/graphdatascience/session/gds_sessions.py b/src/graphdatascience/session/gds_sessions.py index 536bbe1e9..720ef1903 100644 --- a/src/graphdatascience/session/gds_sessions.py +++ b/src/graphdatascience/session/gds_sessions.py @@ -71,7 +71,10 @@ def estimate( self, node_count: int, relationship_count: int, - algorithm_categories: list[AlgorithmCategory] | None = None, + algorithm_categories: Optional[list[AlgorithmCategory]] = None, + node_label_count: int = 0, + node_property_count: int = 0, + relationship_property_count: int = 0, ) -> SessionMemory: """ Estimates the memory required for a session with the given node and relationship counts. @@ -79,14 +82,23 @@ def estimate( Args: node_count (int): The number of nodes. relationship_count (int): The number of relationships. - algorithm_categories (list[AlgorithmCategory] | None): The algorithm categories to consider. - + algorithm_categories (Optional[list[AlgorithmCategory]]): The algorithm categories to consider. + node_label_count (int): The number of node labels. + node_property_count (int): The number of node properties. + relationship_property_count (int): The number of relationship properties. Returns: SessionMemory: The estimated memory required for the session. """ if algorithm_categories is None: algorithm_categories = [] - return self._impl.estimate(node_count, relationship_count, algorithm_categories) + return self._impl.estimate( + node_count=node_count, + relationship_count=relationship_count, + algorithm_categories=algorithm_categories, + node_label_count=node_label_count, + node_property_count=node_property_count, + relationship_property_count=relationship_property_count, + ) def available_cloud_locations(self) -> list[CloudLocation]: """ diff --git a/tests/unit/session/test_dedicated_sessions.py b/tests/unit/session/test_dedicated_sessions.py index 9b01fd7ff..b049f3814 100644 --- a/tests/unit/session/test_dedicated_sessions.py +++ b/tests/unit/session/test_dedicated_sessions.py @@ -49,7 +49,7 @@ def __init__( self.id_counter = 0 self.time = 0 self._status_after_creating = status_after_creating - self._size_estimation = size_estimation or EstimationDetails("1GB", "8GB", False) + self._size_estimation = size_estimation or EstimationDetails("1GB", "8GB") self._console_user = console_user self._admin_user = admin_user @@ -225,7 +225,13 @@ def project_details(self) -> ProjectDetails: return ProjectDetails(id=self._project_id, cloud_locations={CloudLocation("aws", "leipzig-1")}) def estimate_size( - self, node_count: int, relationship_count: int, algorithm_categories: list[AlgorithmCategory] + self, + node_count: int, + node_label_count: int, + node_property_count: int, + relationship_count: int, + relationship_property_count: int, + algorithm_categories: list[AlgorithmCategory], ) -> EstimationDetails: return self._size_estimation @@ -893,14 +899,14 @@ def test_create_waiting_forever( def test_estimate_size() -> None: - aura_api = FakeAuraApi(size_estimation=EstimationDetails("1GB", "8GB", False)) + aura_api = FakeAuraApi(size_estimation=EstimationDetails("1GB", "8GB")) sessions = DedicatedSessions(aura_api) assert sessions.estimate(1, 1, [AlgorithmCategory.CENTRALITY]) == SessionMemory.m_8GB def test_estimate_size_exceeds() -> None: - aura_api = FakeAuraApi(size_estimation=EstimationDetails("16GB", "8GB", True)) + aura_api = FakeAuraApi(size_estimation=EstimationDetails("16GB", "8GB")) sessions = DedicatedSessions(aura_api) with pytest.warns( diff --git a/tests/unit/test_aura_api.py b/tests/unit/test_aura_api.py index 01276f1c1..1d3ef423d 100644 --- a/tests/unit/test_aura_api.py +++ b/tests/unit/test_aura_api.py @@ -1099,12 +1099,14 @@ def test_wait_for_instance_deleting(requests_mock: Mocker) -> None: def test_estimate_size(requests_mock: Mocker) -> None: mock_auth_token(requests_mock) requests_mock.post( - "https://api.neo4j.io/v1/instances/sizing", - json={"data": {"did_exceed_maximum": True, "min_required_memory": "307GB", "recommended_size": "96GB"}}, + "https://api.neo4j.io/v1/graph-analytics/sessions/sizing", + json={"data": {"estimated_memory": "3070GB", "recommended_size": "512GB"}}, ) api = AuraApi("", "", project_id="some-tenant") - assert api.estimate_size(100, 10, [AlgorithmCategory.NODE_EMBEDDING]) == EstimationDetails("307GB", "96GB", True) + assert api.estimate_size(100, 1, 1, 10, 1, [AlgorithmCategory.NODE_EMBEDDING]) == EstimationDetails( + estimated_memory="3070GB", recommended_size="512GB" + ) def test_extract_id() -> None: @@ -1215,3 +1217,20 @@ def test_parse_session_info_without_optionals() -> None: project_id="tenant-1", user_id="user-1", ) + + +def test_estimate_size_parsing() -> None: + assert EstimationDetails._parse_size("8GB") == 8589934592 + assert EstimationDetails._parse_size("8G") == 8589934592 + assert EstimationDetails._parse_size("512MB") == 536870912 + assert EstimationDetails._parse_size("256KB") == 262144 + assert EstimationDetails._parse_size("1024B") == 1024 + assert EstimationDetails._parse_size("12345") == 12345 + + +def test_estimate_exceeds_maximum() -> None: + estimation = EstimationDetails(estimated_memory="16Gi", recommended_size="8Gi") + assert estimation.exceeds_recommended() is True + + estimation = EstimationDetails(estimated_memory="8Gi", recommended_size="16Gi") + assert estimation.exceeds_recommended() is False From 2e47e5ab564aa756b9ee5bc9ba124b40a4462a52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florentin=20D=C3=B6rre?= Date: Thu, 4 Dec 2025 17:09:53 +0100 Subject: [PATCH 3/4] Allow list of str for algo categories as well --- .../session/dedicated_sessions.py | 6 ++- src/graphdatascience/session/gds_sessions.py | 37 +++++++++++++------ tests/unit/session/test_dedicated_sessions.py | 7 ++++ 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/src/graphdatascience/session/dedicated_sessions.py b/src/graphdatascience/session/dedicated_sessions.py index faa764b09..1dc705425 100644 --- a/src/graphdatascience/session/dedicated_sessions.py +++ b/src/graphdatascience/session/dedicated_sessions.py @@ -30,13 +30,17 @@ def estimate( self, node_count: int, relationship_count: int, - algorithm_categories: list[AlgorithmCategory] | None = None, + algorithm_categories: list[AlgorithmCategory] | list[str] | None = None, node_label_count: int = 0, node_property_count: int = 0, relationship_property_count: int = 0, ) -> SessionMemory: if algorithm_categories is None: algorithm_categories = [] + else: + algorithm_categories = [ + AlgorithmCategory(cat) if isinstance(cat, str) else cat for cat in algorithm_categories + ] estimation = self._aura_api.estimate_size( node_count=node_count, node_label_count=node_label_count, diff --git a/src/graphdatascience/session/gds_sessions.py b/src/graphdatascience/session/gds_sessions.py index 720ef1903..6e466cb9c 100644 --- a/src/graphdatascience/session/gds_sessions.py +++ b/src/graphdatascience/session/gds_sessions.py @@ -55,8 +55,10 @@ def __init__(self, api_credentials: AuraAPICredentials) -> None: """ Initializes a new instance of the GdsSessions class. - Args: - api_credentials (AuraAPICredentials): The Aura API credentials used for establishing a connection. + Parameters + ---------- + api_credentials + The Aura API credentials used for establishing a connection. """ aura_env = os.environ.get("AURA_ENV") aura_api = AuraApi( @@ -71,7 +73,7 @@ def estimate( self, node_count: int, relationship_count: int, - algorithm_categories: Optional[list[AlgorithmCategory]] = None, + algorithm_categories: list[AlgorithmCategory] | list[str] | None = None, node_label_count: int = 0, node_property_count: int = 0, relationship_property_count: int = 0, @@ -79,15 +81,26 @@ def estimate( """ Estimates the memory required for a session with the given node and relationship counts. - Args: - node_count (int): The number of nodes. - relationship_count (int): The number of relationships. - algorithm_categories (Optional[list[AlgorithmCategory]]): The algorithm categories to consider. - node_label_count (int): The number of node labels. - node_property_count (int): The number of node properties. - relationship_property_count (int): The number of relationship properties. - Returns: - SessionMemory: The estimated memory required for the session. + Parameters + ---------- + node_count + Number of nodes. + relationship_count + Number of relationships. + algorithm_categories + The algorithm categories to consider. + node_label_count + Number of node labels. + node_property_count + Number of node properties. + relationship_property_count + Number of relationship properties. + + + Returns + ------- + SessionMemory + The estimated memory required for the session. """ if algorithm_categories is None: algorithm_categories = [] diff --git a/tests/unit/session/test_dedicated_sessions.py b/tests/unit/session/test_dedicated_sessions.py index b049f3814..577ae2053 100644 --- a/tests/unit/session/test_dedicated_sessions.py +++ b/tests/unit/session/test_dedicated_sessions.py @@ -905,6 +905,13 @@ def test_estimate_size() -> None: assert sessions.estimate(1, 1, [AlgorithmCategory.CENTRALITY]) == SessionMemory.m_8GB +def test_estimate_str_categories_size() -> None: + aura_api = FakeAuraApi(size_estimation=EstimationDetails("1GB", "8GB")) + sessions = DedicatedSessions(aura_api) + + assert sessions.estimate(1, 1, ["centrality"]) == SessionMemory.m_8GB + + def test_estimate_size_exceeds() -> None: aura_api = FakeAuraApi(size_estimation=EstimationDetails("16GB", "8GB")) sessions = DedicatedSessions(aura_api) From f55075b33fd6b1e6a497fcfbc85c64cbde2e4485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florentin=20D=C3=B6rre?= Date: Mon, 15 Dec 2025 15:03:43 +0100 Subject: [PATCH 4/4] Improve size parsing testing + reduce time for unit test --- justfile | 4 +-- .../session/aura_api_responses.py | 4 +-- tests/unit/test_aura_api.py | 26 ++++++++++--------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/justfile b/justfile index 6b2b0d83d..cd0b81ef2 100644 --- a/justfile +++ b/justfile @@ -4,8 +4,8 @@ style skip_notebooks="false": convert-notebooks: ./scripts/nb2doc/convert.sh -unit-tests: - pytest tests/unit +unit-tests extra_options="": + pytest tests/unit {{extra_options}} # just it test true "--durations=20" it filter="" enterprise="true" extra_options="": diff --git a/src/graphdatascience/session/aura_api_responses.py b/src/graphdatascience/session/aura_api_responses.py index aae2bd4a7..eac8eab35 100644 --- a/src/graphdatascience/session/aura_api_responses.py +++ b/src/graphdatascience/session/aura_api_responses.py @@ -181,12 +181,12 @@ def from_json(cls, json: dict[str, Any]) -> EstimationDetails: return cls(**{f.name: json[f.name] for f in fields}) def exceeds_recommended(self) -> bool: - return EstimationDetails._parse_size(self.estimated_memory) > EstimationDetails._parse_size( + return EstimationDetails._memory_in_bytes(self.estimated_memory) > EstimationDetails._memory_in_bytes( self.recommended_size ) @staticmethod - def _parse_size(size: str) -> float: + def _memory_in_bytes(size: str) -> float: size_str = size.upper().strip() # treat GB, Gi and G the same as its only used for comparing it internally size_str = size_str.removesuffix("B").removesuffix("I") diff --git a/tests/unit/test_aura_api.py b/tests/unit/test_aura_api.py index 1d3ef423d..7f4f632ec 100644 --- a/tests/unit/test_aura_api.py +++ b/tests/unit/test_aura_api.py @@ -586,11 +586,11 @@ def test_dont_wait_forever_for_session(requests_mock: Mocker, caplog: LogCapture with caplog.at_level(logging.DEBUG): assert ( - "Session `id0` is not running after 0.2 seconds" - in api.wait_for_session_running("id0", sleep_time=0.05, max_wait_time=0.2).error + "Session `id0` is not running after 0.01 seconds" + in api.wait_for_session_running("id0", sleep_time=0.001, max_wait_time=0.01).error ) - assert "Session `id0` is not yet running. Current status: Creating Host: foo.bar. Retrying in 0.1" in caplog.text + assert "Session `id0` is not yet running. Current status: Creating Host: foo.bar. Retrying in 0.001" in caplog.text def test_wait_for_session_running(requests_mock: Mocker) -> None: @@ -1024,11 +1024,11 @@ def test_dont_wait_forever(requests_mock: Mocker, caplog: LogCaptureFixture) -> with caplog.at_level(logging.DEBUG): assert ( - "Instance is not running after waiting for 0.7" - in api.wait_for_instance_running("id0", max_wait_time=0.7).error + "Instance is not running after waiting for 0.01" + in api.wait_for_instance_running("id0", max_wait_time=0.01, sleep_time=0.001).error ) - assert "Instance `id0` is not yet running. Current status: creating. Retrying in 0.2 seconds..." in caplog.text + assert "Instance `id0` is not yet running. Current status: creating. Retrying in 0.001 seconds..." in caplog.text def test_wait_for_instance_running(requests_mock: Mocker) -> None: @@ -1220,12 +1220,14 @@ def test_parse_session_info_without_optionals() -> None: def test_estimate_size_parsing() -> None: - assert EstimationDetails._parse_size("8GB") == 8589934592 - assert EstimationDetails._parse_size("8G") == 8589934592 - assert EstimationDetails._parse_size("512MB") == 536870912 - assert EstimationDetails._parse_size("256KB") == 262144 - assert EstimationDetails._parse_size("1024B") == 1024 - assert EstimationDetails._parse_size("12345") == 12345 + assert EstimationDetails._memory_in_bytes("8GB") == 8589934592 + assert EstimationDetails._memory_in_bytes("8G") == 8589934592 + assert EstimationDetails._memory_in_bytes("512MB") == 536870912 + assert EstimationDetails._memory_in_bytes("256KB") == 262144 + assert EstimationDetails._memory_in_bytes("1024B") == 1024 + assert EstimationDetails._memory_in_bytes("12345") == 12345 + assert EstimationDetails._memory_in_bytes("8Gi") == 8589934592 + assert EstimationDetails._memory_in_bytes("8gb") == 8589934592 def test_estimate_exceeds_maximum() -> None: