Skip to content

Commit 7349b35

Browse files
authored
Merge pull request #55 from maxmind/greg/faster-decoder
Decoder improvements
2 parents 423217b + e850475 commit 7349b35

File tree

2 files changed

+61
-49
lines changed

2 files changed

+61
-49
lines changed

maxminddb/decoder.py

Lines changed: 60 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import struct
1111

12-
from maxminddb.compat import byte_from_int, int_from_bytes
12+
from maxminddb.compat import byte_from_int, int_from_byte, int_from_bytes
1313
from maxminddb.errors import InvalidDatabaseError
1414

1515

@@ -41,22 +41,30 @@ def _decode_bytes(self, size, offset):
4141
new_offset = offset + size
4242
return self._buffer[offset:new_offset], new_offset
4343

44-
# pylint: disable=no-self-argument
45-
# |-> I am open to better ways of doing this as long as it doesn't involve
46-
# lots of code duplication.
47-
def _decode_packed_type(type_code, type_size, pad=False):
48-
# pylint: disable=protected-access, missing-docstring
49-
def unpack_type(self, size, offset):
50-
if not pad:
51-
self._verify_size(size, type_size)
52-
new_offset = offset + size
53-
packed_bytes = self._buffer[offset:new_offset]
54-
if pad:
55-
packed_bytes = packed_bytes.rjust(type_size, b'\x00')
56-
(value, ) = struct.unpack(type_code, packed_bytes)
57-
return value, new_offset
58-
59-
return unpack_type
44+
def _decode_double(self, size, offset):
45+
self._verify_size(size, 8)
46+
new_offset = offset + size
47+
packed_bytes = self._buffer[offset:new_offset]
48+
(value, ) = struct.unpack(b'!d', packed_bytes)
49+
return value, new_offset
50+
51+
def _decode_float(self, size, offset):
52+
self._verify_size(size, 4)
53+
new_offset = offset + size
54+
packed_bytes = self._buffer[offset:new_offset]
55+
(value, ) = struct.unpack(b'!f', packed_bytes)
56+
return value, new_offset
57+
58+
def _decode_int32(self, size, offset):
59+
if size == 0:
60+
return 0, offset
61+
new_offset = offset + size
62+
packed_bytes = self._buffer[offset:new_offset]
63+
64+
if size != 4:
65+
packed_bytes = packed_bytes.rjust(4, b'\x00')
66+
(value, ) = struct.unpack(b'!i', packed_bytes)
67+
return value, new_offset
6068

6169
def _decode_map(self, size, offset):
6270
container = {}
@@ -66,22 +74,25 @@ def _decode_map(self, size, offset):
6674
container[key] = value
6775
return container, offset
6876

69-
_pointer_value_offset = {
70-
1: 0,
71-
2: 2048,
72-
3: 526336,
73-
4: 0,
74-
}
75-
7677
def _decode_pointer(self, size, offset):
77-
pointer_size = ((size >> 3) & 0x3) + 1
78+
pointer_size = (size >> 3) + 1
79+
80+
buf = self._buffer[offset:offset + pointer_size]
7881
new_offset = offset + pointer_size
79-
pointer_bytes = self._buffer[offset:new_offset]
80-
packed = pointer_bytes if pointer_size == 4 else struct.pack(
81-
b'!c', byte_from_int(size & 0x7)) + pointer_bytes
82-
unpacked = int_from_bytes(packed)
83-
pointer = unpacked + self._pointer_base + \
84-
self._pointer_value_offset[pointer_size]
82+
83+
if pointer_size == 1:
84+
buf = byte_from_int(size & 0x7) + buf
85+
pointer = struct.unpack(b'!H', buf)[0] + self._pointer_base
86+
elif pointer_size == 2:
87+
buf = b'\x00' + byte_from_int(size & 0x7) + buf
88+
pointer = struct.unpack(b'!I', buf)[0] + 2048 + self._pointer_base
89+
elif pointer_size == 3:
90+
buf = byte_from_int(size & 0x7) + buf
91+
pointer = struct.unpack(b'!I',
92+
buf)[0] + 526336 + self._pointer_base
93+
else:
94+
pointer = struct.unpack(b'!I', buf)[0] + self._pointer_base
95+
8596
if self._pointer_test:
8697
return pointer, new_offset
8798
(value, _) = self.decode(pointer)
@@ -99,17 +110,17 @@ def _decode_utf8_string(self, size, offset):
99110
_type_decoder = {
100111
1: _decode_pointer,
101112
2: _decode_utf8_string,
102-
3: _decode_packed_type(b'!d', 8), # double,
113+
3: _decode_double,
103114
4: _decode_bytes,
104115
5: _decode_uint, # uint16
105116
6: _decode_uint, # uint32
106117
7: _decode_map,
107-
8: _decode_packed_type(b'!i', 4, pad=True), # int32
118+
8: _decode_int32,
108119
9: _decode_uint, # uint64
109120
10: _decode_uint, # uint128
110121
11: _decode_array,
111122
14: _decode_boolean,
112-
15: _decode_packed_type(b'!f', 4), # float,
123+
15: _decode_float,
113124
}
114125

115126
def decode(self, offset):
@@ -119,7 +130,7 @@ def decode(self, offset):
119130
offset -- the location of the data structure to decode
120131
"""
121132
new_offset = offset + 1
122-
(ctrl_byte, ) = struct.unpack(b'!B', self._buffer[offset:new_offset])
133+
ctrl_byte = int_from_byte(self._buffer[offset])
123134
type_num = ctrl_byte >> 5
124135
# Extended type
125136
if not type_num:
@@ -136,7 +147,7 @@ def decode(self, offset):
136147
return decoder(self, size, new_offset)
137148

138149
def _read_extended(self, offset):
139-
(next_byte, ) = struct.unpack(b'!B', self._buffer[offset:offset + 1])
150+
next_byte = int_from_byte(self._buffer[offset])
140151
type_num = next_byte + 7
141152
if type_num < 7:
142153
raise InvalidDatabaseError(
@@ -153,21 +164,22 @@ def _verify_size(self, expected, actual):
153164

154165
def _size_from_ctrl_byte(self, ctrl_byte, offset, type_num):
155166
size = ctrl_byte & 0x1f
156-
if type_num == 1:
167+
if type_num == 1 or size < 29:
157168
return size, offset
158-
bytes_to_read = 0 if size < 29 else size - 28
159-
160-
new_offset = offset + bytes_to_read
161-
size_bytes = self._buffer[offset:new_offset]
162169

163-
# Using unpack rather than int_from_bytes as it is about 200 lookups
164-
# per second faster here.
165170
if size == 29:
166-
size = 29 + struct.unpack(b'!B', size_bytes)[0]
167-
elif size == 30:
171+
size = 29 + int_from_byte(self._buffer[offset])
172+
return size, offset + 1
173+
174+
# Using unpack rather than int_from_bytes as it is faster
175+
# here and below.
176+
if size == 30:
177+
new_offset = offset + 2
178+
size_bytes = self._buffer[offset:new_offset]
168179
size = 285 + struct.unpack(b'!H', size_bytes)[0]
169-
elif size > 30:
170-
size = struct.unpack(b'!I', size_bytes.rjust(4,
171-
b'\x00'))[0] + 65821
180+
return size, new_offset
172181

182+
new_offset = offset + 3
183+
size_bytes = self._buffer[offset:new_offset]
184+
size = struct.unpack(b'!I', b'\x00' + size_bytes)[0] + 65821
173185
return size, new_offset

maxminddb/file.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def __getitem__(self, key):
2121
if isinstance(key, slice):
2222
return self._read(key.stop - key.start, key.start)
2323
if isinstance(key, int):
24-
return self._read(1, key)
24+
return self._read(1, key)[0]
2525
raise TypeError("Invalid argument type.")
2626

2727
def rfind(self, needle, start):

0 commit comments

Comments
 (0)