diff --git a/CHANGELOG.md b/CHANGELOG.md index 9996c358..fcb13e37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## 0.64.0 - 2025-09-30 + +#### Enhancements +- Upgraded `databento-dbn` to 0.42.0 + - Added `ts_index` and `pretty_ts_index` properties for records in Python which provides the timestamp that is most appropriate for indexing + - Fixed type stub for `channel_id` to allow None + +#### Bug fixes +- Fixed type hint for `start` parameter in `Live.subscribe()` + ## 0.63.0 - 2025-09-02 #### Enhancements @@ -18,7 +28,7 @@ This release delivers a number of breaking changes to the Python interface for D - Removed `hd` property from records in Python. Header fields are accessible directly from the record - Removed ability to directly instantiate most enums from an `int` in Python and coercion - from `int` in `__eq__`. They can still be instantitated with the `from_int` class method. + from `int` in `__eq__`. They can still be instantiated with the `from_int` class method. Write `Side.from_int(66)` instead of `Side(66)` and `Side.BID == Side.from_int(66)` instead of `Side.BID == 66`. Affected enums: - `Side` @@ -215,7 +225,7 @@ was preventing `ts_out` from being correctly decoded in the Python DBNDecoder ## 0.52.0 - 2025-04-15 #### Enhancements -- Added new optional `id` field to `SubcriptionRequest` class which will be used for improved error messages +- Added new optional `id` field to `SubscriptionRequest` class which will be used for improved error messages - Upgraded `databento-dbn` to 0.32.0 - Fixed `RType` variant names in Python to match `Schema` - Added missing Python type declarations for `RType` variants @@ -892,7 +902,7 @@ This release includes updates to the fields in text encodings (CSV and JSON), yo - Removed `record_size` property from `DBNStore` - Removed `bad` condition variant from `batch.get_dataset_condition` - Removed unused `LiveGateway` enum -- Removed `STATSTICS` from `Schema` enum +- Removed `STATISTICS` from `Schema` enum - Removed `STATUS` from `Schema` enum - Removed `GATEWAY_ERROR` from `Schema` enum - Removed `SYMBOL_MAPPING` from `Schema` enum diff --git a/README.md b/README.md index 97c7c12e..0c926583 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ The library is fully compatible with the latest distribution of Anaconda 3.9 and The minimum dependencies as found in the `pyproject.toml` are also listed below: - python = "^3.9" - aiohttp = "^3.8.3" -- databento-dbn = "~0.36.1" +- databento-dbn = "~0.42.0" - numpy = ">=1.23.5" - pandas = ">=1.5.3" - pip-system-certs = ">=4.0" (Windows only) diff --git a/databento/__init__.py b/databento/__init__.py index 0b267c37..07f9455d 100644 --- a/databento/__init__.py +++ b/databento/__init__.py @@ -145,7 +145,6 @@ "SystemCode", "SystemMsg", "TBBOMsg", - "TBBOMsg", "TCBBOMsg", "TradeMsg", "TradingEvent", diff --git a/databento/common/parsing.py b/databento/common/parsing.py index 0c3caf87..7a3f4459 100644 --- a/databento/common/parsing.py +++ b/databento/common/parsing.py @@ -344,14 +344,14 @@ def optional_datetime_to_string( def datetime_to_unix_nanoseconds( - value: pd.Timestamp | date | str | int, + value: pd.Timestamp | datetime | date | str | int, ) -> int: """ Return a valid UNIX nanosecond timestamp from the given value. Parameters ---------- - value : pd.Timestamp, date, str, or int + value : pd.Timestamp, datetime, date, str, or int The value to parse. Returns @@ -378,7 +378,7 @@ def datetime_to_unix_nanoseconds( def optional_datetime_to_unix_nanoseconds( - value: pd.Timestamp | date | str | int | None, + value: pd.Timestamp | datetime | date | str | int | None, ) -> int | None: """ Return a valid UNIX nanosecond timestamp from the given value (if not @@ -386,7 +386,7 @@ def optional_datetime_to_unix_nanoseconds( Parameters ---------- - value : pd.Timestamp, date, str, or int + value : pd.Timestamp, datetime, date, str, or int The value to parse. Returns diff --git a/databento/live/client.py b/databento/live/client.py index 92ea39c3..768ef89c 100644 --- a/databento/live/client.py +++ b/databento/live/client.py @@ -8,10 +8,13 @@ import threading from collections.abc import Iterable from concurrent import futures +from datetime import date +from datetime import datetime from os import PathLike from typing import IO import databento_dbn +import pandas as pd from databento_dbn import Schema from databento_dbn import SType @@ -368,20 +371,22 @@ def start( self, ) -> None: """ - Start the live client session. + Start the session. - It is not necessary to call `Live.start` before iterating a `Live` client and doing so will result in an error. + It is not necessary to call this method before iterating a `Live` client and doing so + will result in an error. Raises ------ ValueError - If `Live.start` is called before a subscription has been made. - If `Live.start` is called after streaming has already started. - If `Live.start` is called after the live session has closed. + If called before a subscription has been made. + If called after the session has already started. + If called after the session has closed. See Also -------- Live.stop + Live.terminate """ logger.info("starting live client") @@ -396,17 +401,25 @@ def start( def stop(self) -> None: """ - Stop the live client session as soon as possible. Once stopped, a - client cannot be restarted. + Stop the session and finish processing received records. + + A client can only be stopped after a successful connection is made with `Live.start`. + + This method does not block waiting for the connection to close. + + The connection will eventually close after calling this method. Once the connection + is closed, the client can be reused, but the session state is not preserved. Raises ------ ValueError - If `Live.stop` is called before a connection has been made. + If called before a connection has started. See Also -------- - Live.start + Live.terminate + Live.block_for_close + Live.wait_for_close """ logger.info("stopping live client") @@ -424,17 +437,18 @@ def subscribe( schema: Schema | str, symbols: Iterable[str | int] | str | int = ALL_SYMBOLS, stype_in: SType | str = SType.RAW_SYMBOL, - start: str | int | None = None, + start: pd.Timestamp | datetime | date | str | int | None = None, snapshot: bool = False, ) -> None: """ - Subscribe to a data stream. Multiple subscription requests can be made - for a streaming session. Once one subscription has been made, future - subscriptions must all belong to the same dataset. + Add a new subscription to the session. + + All subscriptions must be for the same `dataset`. + + Multiple subscriptions for different schemas can be made. - When creating the first subscription this method will also create - the TCP connection to the remote gateway. All subscriptions must - have the same dataset. + When creating the first subscription, this method will also create + the TCP connection to the remote gateway. Parameters ---------- @@ -446,12 +460,14 @@ def subscribe( The symbols to subscribe to. stype_in : SType or str, default 'raw_symbol' The input symbology type to resolve from. - start : str or int, optional - UNIX nanosecond epoch timestamp to start streaming from (inclusive), based on `ts_event`. Must be within 24 hours except when requesting the mbo or definition schemas. + start : pd.Timestamp, datetime, date, str or int, optional + The inclusive start of subscription replay. + Pass `0` to request all available data. + Cannot be specified after the session is started. + See `Intraday Replay` https://databento.com/docs/api-reference-live/basics/intraday-replay. snapshot: bool, default to 'False' Request subscription with snapshot. The `start` parameter must be `None`. - - + Only supported with `mbo` schema. Raises ------ @@ -497,17 +513,23 @@ def subscribe( def terminate(self) -> None: """ - Terminate the live client session and stop processing records as soon - as possible. + Terminate the session and stop processing records immediately. + + A client can only be terminated after a connection is started with `Live.start`. + + Once terminated, the client can be reused, but the session state + is not preserved. Raises ------ ValueError - If the client is not connected. + If called before a connection has started. See Also -------- Live.stop + Live.block_for_close + Live.wait_for_close """ logger.info("terminating live client") @@ -521,11 +543,14 @@ def block_for_close( ) -> None: """ Block until the session closes or a timeout is reached. A session will - close after `Live.stop` is called or the remote gateway disconnects. + close after the remote gateway disconnects, or after `Live.stop` or + `Live.terminate` are called. - If a `timeout` is specified, `Live.stop` will be called when the + If a `timeout` is specified, `Live.terminate` will be called when the timeout is reached. + When this method unblocks, the session is guaranteed to be closed. + Parameters ---------- timeout : float, optional @@ -541,7 +566,7 @@ def block_for_close( See Also -------- - wait_for_close + Live.wait_for_close """ try: @@ -565,12 +590,14 @@ async def wait_for_close( ) -> None: """ Coroutine to wait until the session closes or a timeout is reached. A - session will close after `Live.stop` is called or the remote gateway - disconnects. + session will close when the remote gateway disconnects, or after + `Live.stop` or `Live.terminate` are called. - If a `timeout` is specified, `Live.stop` will be called when the + If a `timeout` is specified, `Live.terminate` will be called when the timeout is reached. + When this method unblocks, the session is guaranteed to be closed. + Parameters ---------- timeout : float, optional @@ -586,7 +613,7 @@ async def wait_for_close( See Also -------- - block_for_close + Live.block_for_close """ waiter = asyncio.wrap_future( diff --git a/databento/reference/api/adjustment.py b/databento/reference/api/adjustment.py index d43de6f5..5107f30b 100644 --- a/databento/reference/api/adjustment.py +++ b/databento/reference/api/adjustment.py @@ -50,14 +50,14 @@ def get_range( Parameters ---------- start : pd.Timestamp, datetime, date, str, or int - The start datetime of the request time range (inclusive) based on `ex_date`. + The inclusive start of the request time range based on `ex_date`. Assumes UTC as timezone unless passed a tz-aware object. If an integer is passed, then this represents nanoseconds since the UNIX epoch. end : pd.Timestamp, datetime, date, str, or int, optional - The end datetime of the request time range (exclusive) based on `ex_date`. + The exclusive end of the request time range based on `ex_date`. Assumes UTC as timezone unless passed a tz-aware object. If an integer is passed, then this represents nanoseconds since the UNIX epoch. - Defaults to the forward filled value of `start` based on the resolution provided. + If `None`, then will return **all** data available after `start`. symbols : Iterable[str] or str, optional The symbols to filter for. Takes up to 2,000 symbols per request. If more than 1 symbol is specified, the data is merged and sorted by time. diff --git a/databento/reference/api/corporate.py b/databento/reference/api/corporate.py index 57d1c9ac..1d725cf5 100644 --- a/databento/reference/api/corporate.py +++ b/databento/reference/api/corporate.py @@ -55,14 +55,14 @@ def get_range( Parameters ---------- start : pd.Timestamp, datetime, date, str, or int - The start datetime of the request time range (inclusive) based on `index`. + The inclusive start of the request range based on `index`. Assumes UTC as timezone unless passed a tz-aware object. If an integer is passed, then this represents nanoseconds since the UNIX epoch. end : pd.Timestamp, datetime, date, str, or int, optional - The end datetime of the request time range (exclusive) based on `index`. + The exclusive end of the request range based on `index`. Assumes UTC as timezone unless passed a tz-aware object. If an integer is passed, then this represents nanoseconds since the UNIX epoch. - Defaults to the forward filled value of `start` based on the resolution provided. + If `None`, then will return **all** data available after `start`. index : str, default 'event_date' The index column used for filtering the `start` and `end` time range and for record ordering. diff --git a/databento/reference/api/security.py b/databento/reference/api/security.py index 4ed44f51..f43ad582 100644 --- a/databento/reference/api/security.py +++ b/databento/reference/api/security.py @@ -51,14 +51,14 @@ def get_range( Parameters ---------- start : pd.Timestamp, datetime, date, str, or int - The start datetime of the request time range (inclusive) based on `index`. + The inclusive start datetime of the request range based on `index`. Assumes UTC as timezone unless passed a tz-aware object. If an integer is passed, then this represents nanoseconds since the UNIX epoch. end : pd.Timestamp, datetime, date, str, or int, optional - The end datetime of the request time range (exclusive) based on `index`. + The exclusive end of the request range based on `index`. Assumes UTC as timezone unless passed a tz-aware object. If an integer is passed, then this represents nanoseconds since the UNIX epoch. - Defaults to the forward filled value of `start` based on the resolution provided. + If `None`, then will return **all** data available after `start`. index : str, default 'ts_effective' The index column used for filtering the `start` and `end` time range and for record ordering. diff --git a/databento/version.py b/databento/version.py index c60bb1b9..d720aedf 100644 --- a/databento/version.py +++ b/databento/version.py @@ -1 +1 @@ -__version__ = "0.63.0" +__version__ = "0.64.0" diff --git a/examples/historical_timeseries_from_file.py b/examples/historical_timeseries_from_file.py index e6451385..60dd765b 100644 --- a/examples/historical_timeseries_from_file.py +++ b/examples/historical_timeseries_from_file.py @@ -4,10 +4,10 @@ if __name__ == "__main__": - ts_start = datetime.datetime.utcnow() + ts_start = datetime.datetime.now(tz=datetime.timezone.utc) # Can load from file path (if exists) data = DBNStore.from_file(path="my_data.dbn") print(data.to_df()) - print(datetime.datetime.utcnow() - ts_start) + print(datetime.datetime.now(tz=datetime.timezone.utc) - ts_start) diff --git a/notebooks/quickstart.ipynb b/notebooks/quickstart.ipynb index ab38264c..28b1e4c4 100644 --- a/notebooks/quickstart.ipynb +++ b/notebooks/quickstart.ipynb @@ -618,7 +618,7 @@ "## Working with the DBNStore\n", "\n", "All time series data requests include a metadata header with the following specifications:\n", - "- The original query paramaters (these can be used to re-request the data)\n", + "- The original query parameters (these can be used to re-request the data)\n", "- Symbology mappings" ] }, @@ -2028,7 +2028,7 @@ "\n", "It's possible to query for a list of your batch jobs, with optional filter parameters for `state` (the state of the batch job) and `since` (when the job was received). \n", "\n", - "This could help to programatically build and manage larger data pipelines. Once we see the batch job has completed processing (with a state of `done`), then we can download the files.\n", + "This could help to programmatically build and manage larger data pipelines. Once we see the batch job has completed processing (with a state of `done`), then we can download the files.\n", "\n", "Note the value of the batch job's `id` which we'll need to provide for download. This is saved to `new_job_id`." ] @@ -2150,7 +2150,7 @@ "metadata": {}, "source": [ "## Programmatic downloads\n", - "Now that the batch job has compeleted (with a state of `done`), we can download the files by providing an output directory path, and the `job_id` (found above):" + "Now that the batch job has completed (with a state of `done`), we can download the files by providing an output directory path, and the `job_id` (found above):" ] }, { diff --git a/pyproject.toml b/pyproject.toml index c1e3b30a..d70b6fb4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databento" -version = "0.63.0" +version = "0.64.0" description = "Official Python client library for Databento" authors = [ "Databento ", @@ -32,7 +32,7 @@ aiohttp = [ {version = "^3.8.3", python = "<3.12"}, {version = "^3.9.0", python = "^3.12"} ] -databento-dbn = "~=0.41.0" +databento-dbn = "~=0.42.0" numpy = [ {version = ">=1.23.5", python = "<3.12"}, {version = ">=1.26.0", python = "^3.12"} diff --git a/tests/test_common_parsing.py b/tests/test_common_parsing.py index f942e952..aefd0761 100644 --- a/tests/test_common_parsing.py +++ b/tests/test_common_parsing.py @@ -291,6 +291,11 @@ def test_maybe_datetime_to_string_give_valid_values_returns_expected_results( pytest.param(1680736543000000000, 1680736543000000000, id="int"), pytest.param("1680736543000000000", 1680736543000000000, id="str-int"), pytest.param(dt.date(2023, 4, 5), 1680652800000000000, id="date"), + pytest.param( + dt.datetime(2023, 4, 5, 23, 15, 43, tzinfo=dt.timezone.utc), + 1680736543000000000, + id="datetime", + ), pytest.param( pd.to_datetime("2023-04-05T00:00:00"), 1680652800000000000, @@ -304,7 +309,7 @@ def test_maybe_datetime_to_string_give_valid_values_returns_expected_results( ], ) def test_datetime_to_unix_nanoseconds( - value: pd.Timestamp | str | int, + value: pd.Timestamp | dt.datetime | dt.date | str | int, expected: int, ) -> None: """ diff --git a/tests/test_historical_data.py b/tests/test_historical_data.py index 8e45df5a..c015e51e 100644 --- a/tests/test_historical_data.py +++ b/tests/test_historical_data.py @@ -13,6 +13,7 @@ def test_mbo_fields() -> None: fields = set(f for f in dir(struct) if not f.startswith(("_", "pretty_"))) fields.remove("record_size") fields.remove("size_hint") + fields.remove("ts_index") # Act difference = fields.symmetric_difference(struct._ordered_fields) @@ -42,6 +43,7 @@ def test_mbp_fields( fields = set(f for f in dir(struct) if not f.startswith(("_", "pretty_"))) fields.remove("record_size") fields.remove("size_hint") + fields.remove("ts_index") # Act difference = fields.symmetric_difference(struct._ordered_fields) @@ -74,6 +76,7 @@ def test_ohlcv_fields( fields = set(f for f in dir(struct) if not f.startswith(("_", "pretty_"))) fields.remove("record_size") fields.remove("size_hint") + fields.remove("ts_index") # Act difference = fields.symmetric_difference(struct._ordered_fields) @@ -92,6 +95,7 @@ def test_trades_struct() -> None: fields = set(f for f in dir(struct) if not f.startswith(("_", "pretty_"))) fields.remove("record_size") fields.remove("size_hint") + fields.remove("ts_index") # Act difference = fields.symmetric_difference(struct._ordered_fields) @@ -110,6 +114,7 @@ def test_definition_struct() -> None: fields = set(f for f in dir(struct) if not f.startswith(("_", "pretty_"))) fields.remove("record_size") fields.remove("size_hint") + fields.remove("ts_index") # Act difference = fields.symmetric_difference(struct._ordered_fields) @@ -128,6 +133,7 @@ def test_imbalance_struct() -> None: fields = set(f for f in dir(struct) if not f.startswith(("_", "pretty_"))) fields.remove("record_size") fields.remove("size_hint") + fields.remove("ts_index") # Act difference = fields.symmetric_difference(struct._ordered_fields) @@ -146,6 +152,7 @@ def test_statistics_struct() -> None: fields = set(f for f in dir(struct) if not f.startswith(("_", "pretty_"))) fields.remove("record_size") fields.remove("size_hint") + fields.remove("ts_index") # Act difference = fields.symmetric_difference(struct._ordered_fields) diff --git a/tests/test_live_client.py b/tests/test_live_client.py index a6df5c20..f43510cb 100644 --- a/tests/test_live_client.py +++ b/tests/test_live_client.py @@ -380,7 +380,7 @@ async def test_live_start( mock_live_server: MockLiveServerInterface, ) -> None: """ - Test the live sends a SesssionStart message upon calling start(). + Test the live sends a SessionStart message upon calling start(). """ # Arrange live_client.subscribe(