Skip to content

Commit 212e942

Browse files
committed
Optimize ObjectId
From profiles of a mongoengine-using project, I’ve noticed that `ObjectId` creation happens a lot, around ~0.5% of total request time. This PR optimizes the class in two ways: 1. It inlines the `__generate()` and `__validate()` methods, removing their call overhead. The methods are small and not called anywhere else, and Python imposes a constant overhead for method calls, so inlining them saves time. 2. Checking for `str` values before `ObjectId` ones. I believe that typical usage patterns involve creating `ObjectId`s from strings more often than from existing `ObjectId` instances, so checking for `str` first should reduce the average number of checks needed. I validated these changes with the below benchmark script, run with [richbench](https://github.com/tonybaloney/rich-bench). <details> <summary><code>bench_objectid.py</code></summary> ```python from __future__ import annotations import os import struct import threading import time from random import SystemRandom from typing import Any, NoReturn, Optional, Union _MAX_COUNTER_VALUE = 0xFFFFFF _PACK_INT = struct.Struct(">I").pack _PACK_INT_RANDOM = struct.Struct(">I5s").pack def _raise_invalid_id(oid: str) -> NoReturn: raise ValueError(f"{oid!r} is not a valid ObjectId") def _random_bytes() -> bytes: return os.urandom(5) class ObjectIdBefore: _pid = os.getpid() _inc = SystemRandom().randint(0, _MAX_COUNTER_VALUE) _inc_lock = threading.Lock() __random = _random_bytes() __slots__ = ("__id",) def __init__(self, oid: Optional[Union[str, ObjectIdBefore, bytes]] = None) -> None: if oid is None: self.__generate() elif isinstance(oid, bytes) and len(oid) == 12: self.__id = oid else: self.__validate(oid) @classmethod def _random(cls) -> bytes: pid = os.getpid() if pid != cls._pid: cls._pid = pid cls.__random = _random_bytes() return cls.__random def __generate(self) -> None: with ObjectIdBefore._inc_lock: inc = ObjectIdBefore._inc ObjectIdBefore._inc = (inc + 1) % (_MAX_COUNTER_VALUE + 1) self.__id = ( _PACK_INT_RANDOM(int(time.time()), ObjectIdBefore._random()) + _PACK_INT(inc)[1:4] ) def __validate(self, oid: Any) -> None: if isinstance(oid, ObjectIdBefore): self.__id = oid.binary elif isinstance(oid, str): if len(oid) == 24: try: self.__id = bytes.fromhex(oid) except (TypeError, ValueError): _raise_invalid_id(oid) else: _raise_invalid_id(oid) else: raise TypeError(f"id must be an instance of (bytes, str, ObjectId), not {type(oid)}") @Property def binary(self) -> bytes: return self.__id class ObjectIdAfter: _pid = os.getpid() _inc = SystemRandom().randint(0, _MAX_COUNTER_VALUE) _inc_lock = threading.Lock() __random = _random_bytes() __slots__ = ("__id",) def __init__(self, oid: Optional[Union[str, ObjectIdAfter, bytes]] = None) -> None: if oid is None: with ObjectIdAfter._inc_lock: inc = ObjectIdAfter._inc ObjectIdAfter._inc = (inc + 1) % (_MAX_COUNTER_VALUE + 1) self.__id = ( _PACK_INT_RANDOM(int(time.time()), ObjectIdAfter._random()) + _PACK_INT(inc)[1:4] ) elif isinstance(oid, bytes) and len(oid) == 12: self.__id = oid elif isinstance(oid, str): if len(oid) == 24: try: self.__id = bytes.fromhex(oid) except (TypeError, ValueError): _raise_invalid_id(oid) else: _raise_invalid_id(oid) elif isinstance(oid, ObjectIdAfter): self.__id = oid.binary else: raise TypeError(f"id must be an instance of (bytes, str, ObjectId), not {type(oid)}") @classmethod def _random(cls) -> bytes: pid = os.getpid() if pid != cls._pid: cls._pid = pid cls.__random = _random_bytes() return cls.__random @Property def binary(self) -> bytes: return self.__id test_oid_str = "507f1f77bcf86cd799439011" test_oid_bytes = bytes.fromhex(test_oid_str) test_oid_obj_before = ObjectIdBefore(test_oid_bytes) test_oid_obj_after = ObjectIdAfter(test_oid_bytes) invalid_str_short = "507f1f77" invalid_str_long = "507f1f77bcf86cd799439011aabbccdd" invalid_str_non_hex = "507f1f77bcf86cd799439ZZZ" invalid_bytes_short = b"short" invalid_bytes_long = b"toolongbytes1234" invalid_type = 12345 def bench_none_before(): for _ in range(1_000_000): ObjectIdBefore() def bench_none_after(): for _ in range(1_000_000): ObjectIdAfter() def bench_str_before(): for _ in range(1_000_000): ObjectIdBefore(test_oid_str) def bench_str_after(): for _ in range(1_000_000): ObjectIdAfter(test_oid_str) def bench_objectid_before(): for _ in range(1_000_000): ObjectIdBefore(test_oid_obj_before) def bench_objectid_after(): for _ in range(1_000_000): ObjectIdAfter(test_oid_obj_after) def bench_invalid_str_short_before(): for _ in range(1_000_000): try: ObjectIdBefore(invalid_str_short) except Exception: pass def bench_invalid_str_short_after(): for _ in range(10_000): try: ObjectIdAfter(invalid_str_short) except Exception: pass def bench_invalid_type_before(): for _ in range(10_000): try: ObjectIdBefore(invalid_type) except Exception: pass def bench_invalid_type_after(): for _ in range(10_000): try: ObjectIdAfter(invalid_type) except Exception: pass __benchmarks__ = [ (bench_none_before, bench_none_after, "ObjectId() - generate new"), (bench_str_before, bench_str_after, "ObjectId(str) - 24-char hex"), (bench_objectid_before, bench_objectid_after, "ObjectId(ObjectId) - copy"), ( bench_invalid_str_short_before, bench_invalid_str_short_after, "ObjectId(str) - invalid short", ), (bench_invalid_type_before, bench_invalid_type_after, "ObjectId(int) - invalid type"), ] ``` </details> Results: ``` $ uvx -p 3.13 richbench . Benchmarks, repeat=5, number=5 ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Benchmark ┃ Min ┃ Max ┃ Mean ┃ Min (+) ┃ Max (+) ┃ Mean (+) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ ObjectId() - generate new │ 1.920 │ 2.021 │ 1.990 │ 1.865 (1.0x) │ 1.868 (1.1x) │ 1.866 (1.1x) │ │ ObjectId(str) - 24-char hex │ 0.938 │ 0.952 │ 0.946 │ 0.716 (1.3x) │ 0.725 (1.3x) │ 0.720 (1.3x) │ │ ObjectId(ObjectId) - copy │ 0.845 │ 0.857 │ 0.850 │ 0.818 (1.0x) │ 0.828 (1.0x) │ 0.822 (1.0x) │ │ ObjectId(str) - invalid short │ 1.839 │ 1.857 │ 1.845 │ 0.014 (127.9x) │ 0.015 (125.7x) │ 0.015 (126.4x) │ │ ObjectId(int) - invalid type │ 0.020 │ 0.021 │ 0.021 │ 0.016 (1.2x) │ 0.017 (1.3x) │ 0.017 (1.2x) │ └───────────────────────────────┴─────────┴─────────┴─────────┴─────────────────┴─────────────────┴─────────────────┘ ``` The speedups for normal usage are like 1.1x to 1.3x, which is pretty decent for a small change. The invalid string case is much faster, but that’s less relevant since it’s an error path.
1 parent 1e78bd4 commit 212e942

File tree

1 file changed

+18
-33
lines changed

1 file changed

+18
-33
lines changed

bson/objectid.py

Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,27 @@ def __init__(self, oid: Optional[Union[str, ObjectId, bytes]] = None) -> None:
9898
objectid.rst>`_.
9999
"""
100100
if oid is None:
101-
self.__generate()
101+
# Generate a new value for this ObjectId.
102+
with ObjectId._inc_lock:
103+
inc = ObjectId._inc
104+
ObjectId._inc = (inc + 1) % (_MAX_COUNTER_VALUE + 1)
105+
106+
# 4 bytes current time, 5 bytes random, 3 bytes inc.
107+
self.__id = _PACK_INT_RANDOM(int(time.time()), ObjectId._random()) + _PACK_INT(inc)[1:4]
102108
elif isinstance(oid, bytes) and len(oid) == 12:
103109
self.__id = oid
110+
elif isinstance(oid, str):
111+
if len(oid) == 24:
112+
try:
113+
self.__id = bytes.fromhex(oid)
114+
except (TypeError, ValueError):
115+
_raise_invalid_id(oid)
116+
else:
117+
_raise_invalid_id(oid)
118+
elif isinstance(oid, ObjectId):
119+
self.__id = oid.binary
104120
else:
105-
self.__validate(oid)
121+
raise TypeError(f"id must be an instance of (bytes, str, ObjectId), not {type(oid)}")
106122

107123
@classmethod
108124
def from_datetime(cls: Type[ObjectId], generation_time: datetime.datetime) -> ObjectId:
@@ -163,37 +179,6 @@ def _random(cls) -> bytes:
163179
cls.__random = _random_bytes()
164180
return cls.__random
165181

166-
def __generate(self) -> None:
167-
"""Generate a new value for this ObjectId."""
168-
with ObjectId._inc_lock:
169-
inc = ObjectId._inc
170-
ObjectId._inc = (inc + 1) % (_MAX_COUNTER_VALUE + 1)
171-
172-
# 4 bytes current time, 5 bytes random, 3 bytes inc.
173-
self.__id = _PACK_INT_RANDOM(int(time.time()), ObjectId._random()) + _PACK_INT(inc)[1:4]
174-
175-
def __validate(self, oid: Any) -> None:
176-
"""Validate and use the given id for this ObjectId.
177-
178-
Raises TypeError if id is not an instance of :class:`str`,
179-
:class:`bytes`, or ObjectId. Raises InvalidId if it is not a
180-
valid ObjectId.
181-
182-
:param oid: a valid ObjectId
183-
"""
184-
if isinstance(oid, ObjectId):
185-
self.__id = oid.binary
186-
elif isinstance(oid, str):
187-
if len(oid) == 24:
188-
try:
189-
self.__id = bytes.fromhex(oid)
190-
except (TypeError, ValueError):
191-
_raise_invalid_id(oid)
192-
else:
193-
_raise_invalid_id(oid)
194-
else:
195-
raise TypeError(f"id must be an instance of (bytes, str, ObjectId), not {type(oid)}")
196-
197182
@property
198183
def binary(self) -> bytes:
199184
"""12-byte binary representation of this ObjectId."""

0 commit comments

Comments
 (0)