Commit 212e942
committed
Optimize
From profiles of a mongoengine-using project, I’ve noticed that `ObjectId` creation happens a lot, around ~0.5% of total request time. This PR optimizes the class in two ways:
1. It inlines the `__generate()` and `__validate()` methods, removing their call overhead.
The methods are small and not called anywhere else, and Python imposes a constant overhead for method calls, so inlining them saves time.
2. Checking for `str` values before `ObjectId` ones.
I believe that typical usage patterns involve creating `ObjectId`s from strings more often than from existing `ObjectId` instances, so checking for `str` first should reduce the average number of checks needed.
I validated these changes with the below benchmark script, run with [richbench](https://github.com/tonybaloney/rich-bench).
<details>
<summary><code>bench_objectid.py</code></summary>
```python
from __future__ import annotations
import os
import struct
import threading
import time
from random import SystemRandom
from typing import Any, NoReturn, Optional, Union
_MAX_COUNTER_VALUE = 0xFFFFFF
_PACK_INT = struct.Struct(">I").pack
_PACK_INT_RANDOM = struct.Struct(">I5s").pack
def _raise_invalid_id(oid: str) -> NoReturn:
raise ValueError(f"{oid!r} is not a valid ObjectId")
def _random_bytes() -> bytes:
return os.urandom(5)
class ObjectIdBefore:
_pid = os.getpid()
_inc = SystemRandom().randint(0, _MAX_COUNTER_VALUE)
_inc_lock = threading.Lock()
__random = _random_bytes()
__slots__ = ("__id",)
def __init__(self, oid: Optional[Union[str, ObjectIdBefore, bytes]] = None) -> None:
if oid is None:
self.__generate()
elif isinstance(oid, bytes) and len(oid) == 12:
self.__id = oid
else:
self.__validate(oid)
@classmethod
def _random(cls) -> bytes:
pid = os.getpid()
if pid != cls._pid:
cls._pid = pid
cls.__random = _random_bytes()
return cls.__random
def __generate(self) -> None:
with ObjectIdBefore._inc_lock:
inc = ObjectIdBefore._inc
ObjectIdBefore._inc = (inc + 1) % (_MAX_COUNTER_VALUE + 1)
self.__id = (
_PACK_INT_RANDOM(int(time.time()), ObjectIdBefore._random()) + _PACK_INT(inc)[1:4]
)
def __validate(self, oid: Any) -> None:
if isinstance(oid, ObjectIdBefore):
self.__id = oid.binary
elif isinstance(oid, str):
if len(oid) == 24:
try:
self.__id = bytes.fromhex(oid)
except (TypeError, ValueError):
_raise_invalid_id(oid)
else:
_raise_invalid_id(oid)
else:
raise TypeError(f"id must be an instance of (bytes, str, ObjectId), not {type(oid)}")
@Property
def binary(self) -> bytes:
return self.__id
class ObjectIdAfter:
_pid = os.getpid()
_inc = SystemRandom().randint(0, _MAX_COUNTER_VALUE)
_inc_lock = threading.Lock()
__random = _random_bytes()
__slots__ = ("__id",)
def __init__(self, oid: Optional[Union[str, ObjectIdAfter, bytes]] = None) -> None:
if oid is None:
with ObjectIdAfter._inc_lock:
inc = ObjectIdAfter._inc
ObjectIdAfter._inc = (inc + 1) % (_MAX_COUNTER_VALUE + 1)
self.__id = (
_PACK_INT_RANDOM(int(time.time()), ObjectIdAfter._random()) + _PACK_INT(inc)[1:4]
)
elif isinstance(oid, bytes) and len(oid) == 12:
self.__id = oid
elif isinstance(oid, str):
if len(oid) == 24:
try:
self.__id = bytes.fromhex(oid)
except (TypeError, ValueError):
_raise_invalid_id(oid)
else:
_raise_invalid_id(oid)
elif isinstance(oid, ObjectIdAfter):
self.__id = oid.binary
else:
raise TypeError(f"id must be an instance of (bytes, str, ObjectId), not {type(oid)}")
@classmethod
def _random(cls) -> bytes:
pid = os.getpid()
if pid != cls._pid:
cls._pid = pid
cls.__random = _random_bytes()
return cls.__random
@Property
def binary(self) -> bytes:
return self.__id
test_oid_str = "507f1f77bcf86cd799439011"
test_oid_bytes = bytes.fromhex(test_oid_str)
test_oid_obj_before = ObjectIdBefore(test_oid_bytes)
test_oid_obj_after = ObjectIdAfter(test_oid_bytes)
invalid_str_short = "507f1f77"
invalid_str_long = "507f1f77bcf86cd799439011aabbccdd"
invalid_str_non_hex = "507f1f77bcf86cd799439ZZZ"
invalid_bytes_short = b"short"
invalid_bytes_long = b"toolongbytes1234"
invalid_type = 12345
def bench_none_before():
for _ in range(1_000_000):
ObjectIdBefore()
def bench_none_after():
for _ in range(1_000_000):
ObjectIdAfter()
def bench_str_before():
for _ in range(1_000_000):
ObjectIdBefore(test_oid_str)
def bench_str_after():
for _ in range(1_000_000):
ObjectIdAfter(test_oid_str)
def bench_objectid_before():
for _ in range(1_000_000):
ObjectIdBefore(test_oid_obj_before)
def bench_objectid_after():
for _ in range(1_000_000):
ObjectIdAfter(test_oid_obj_after)
def bench_invalid_str_short_before():
for _ in range(1_000_000):
try:
ObjectIdBefore(invalid_str_short)
except Exception:
pass
def bench_invalid_str_short_after():
for _ in range(10_000):
try:
ObjectIdAfter(invalid_str_short)
except Exception:
pass
def bench_invalid_type_before():
for _ in range(10_000):
try:
ObjectIdBefore(invalid_type)
except Exception:
pass
def bench_invalid_type_after():
for _ in range(10_000):
try:
ObjectIdAfter(invalid_type)
except Exception:
pass
__benchmarks__ = [
(bench_none_before, bench_none_after, "ObjectId() - generate new"),
(bench_str_before, bench_str_after, "ObjectId(str) - 24-char hex"),
(bench_objectid_before, bench_objectid_after, "ObjectId(ObjectId) - copy"),
(
bench_invalid_str_short_before,
bench_invalid_str_short_after,
"ObjectId(str) - invalid short",
),
(bench_invalid_type_before, bench_invalid_type_after, "ObjectId(int) - invalid type"),
]
```
</details>
Results:
```
$ uvx -p 3.13 richbench .
Benchmarks, repeat=5, number=5
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Benchmark ┃ Min ┃ Max ┃ Mean ┃ Min (+) ┃ Max (+) ┃ Mean (+) ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ ObjectId() - generate new │ 1.920 │ 2.021 │ 1.990 │ 1.865 (1.0x) │ 1.868 (1.1x) │ 1.866 (1.1x) │
│ ObjectId(str) - 24-char hex │ 0.938 │ 0.952 │ 0.946 │ 0.716 (1.3x) │ 0.725 (1.3x) │ 0.720 (1.3x) │
│ ObjectId(ObjectId) - copy │ 0.845 │ 0.857 │ 0.850 │ 0.818 (1.0x) │ 0.828 (1.0x) │ 0.822 (1.0x) │
│ ObjectId(str) - invalid short │ 1.839 │ 1.857 │ 1.845 │ 0.014 (127.9x) │ 0.015 (125.7x) │ 0.015 (126.4x) │
│ ObjectId(int) - invalid type │ 0.020 │ 0.021 │ 0.021 │ 0.016 (1.2x) │ 0.017 (1.3x) │ 0.017 (1.2x) │
└───────────────────────────────┴─────────┴─────────┴─────────┴─────────────────┴─────────────────┴─────────────────┘
```
The speedups for normal usage are like 1.1x to 1.3x, which is pretty decent for a small change. The invalid string case is much faster, but that’s less relevant since it’s an error path.ObjectId
1 parent 1e78bd4 commit 212e942
1 file changed
+18
-33
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
98 | 98 | | |
99 | 99 | | |
100 | 100 | | |
101 | | - | |
| 101 | + | |
| 102 | + | |
| 103 | + | |
| 104 | + | |
| 105 | + | |
| 106 | + | |
| 107 | + | |
102 | 108 | | |
103 | 109 | | |
| 110 | + | |
| 111 | + | |
| 112 | + | |
| 113 | + | |
| 114 | + | |
| 115 | + | |
| 116 | + | |
| 117 | + | |
| 118 | + | |
| 119 | + | |
104 | 120 | | |
105 | | - | |
| 121 | + | |
106 | 122 | | |
107 | 123 | | |
108 | 124 | | |
| |||
163 | 179 | | |
164 | 180 | | |
165 | 181 | | |
166 | | - | |
167 | | - | |
168 | | - | |
169 | | - | |
170 | | - | |
171 | | - | |
172 | | - | |
173 | | - | |
174 | | - | |
175 | | - | |
176 | | - | |
177 | | - | |
178 | | - | |
179 | | - | |
180 | | - | |
181 | | - | |
182 | | - | |
183 | | - | |
184 | | - | |
185 | | - | |
186 | | - | |
187 | | - | |
188 | | - | |
189 | | - | |
190 | | - | |
191 | | - | |
192 | | - | |
193 | | - | |
194 | | - | |
195 | | - | |
196 | | - | |
197 | 182 | | |
198 | 183 | | |
199 | 184 | | |
| |||
0 commit comments