Skip to content

Commit e8507a8

Browse files
committed
Merge pull request #4 from maxmind/greg/pure-python-reader
Added pure Python implementation
2 parents 1ece7f0 + 9eddc72 commit e8507a8

File tree

12 files changed

+896
-90
lines changed

12 files changed

+896
-90
lines changed

.pylintrc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[MESSAGES CONTROL]
2+
disable=R0201,W0105
3+
4+
[BASIC]
5+
6+
no-docstring-rgx=_.*

.travis.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ python:
44
- 2.6
55
- 2.7
66
- 3.3
7+
- pypy
78

89
before_install:
910
- git submodule update --init --recursive
@@ -15,10 +16,13 @@ before_install:
1516
- sudo make install
1617
- sudo ldconfig
1718
- cd ..
19+
- pip install pylint
1820
- if [[ $TRAVIS_PYTHON_VERSION == '2.6' ]]; then pip install unittest2; fi
1921

2022
script:
2123
- CFLAGS="-Werror -Wall -Wextra" python setup.py test
24+
- MAXMINDDB_PURE_PYTHON=1 python setup.py test
25+
- if [[ $TRAVIS_PYTHON_VERSION == '2.7' ]]; then pylint --rcfile .pylintrc maxminddb/*.py; fi
2226

2327
notifications:
2428
email:

README.rst

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
===========================
2-
MaxMind DB Python Extension
3-
===========================
1+
========================
2+
MaxMind DB Python Module
3+
========================
44

55
Beta Note
66
---------
@@ -10,17 +10,21 @@ release.
1010
Description
1111
-----------
1212

13-
This is a Python extension for reading MaxMind DB files. MaxMind DB is a
14-
binary file format that stores data indexed by IP address subnets (IPv4 or
15-
IPv6).
13+
This is a Python module for reading MaxMind DB files. The module includes both
14+
a pure Python reader and an optional C extension.
15+
16+
MaxMind DB is a binary file format that stores data indexed by IP address
17+
subnets (IPv4 or IPv6).
1618

1719
Installation
1820
------------
1921

20-
You must have the `libmaxminddb <https://github.com/maxmind/libmaxminddb>`_ C
21-
library installed before installing this extension.
22+
If you want to use the C extension, you must first install `libmaxminddb
23+
<https://github.com/maxmind/libmaxminddb>`_ C library installed before
24+
installing this extension. If the library is not available, the module will
25+
fall-back to a pure Python implementation.
2226

23-
To install the MaxMind DB extension, type:
27+
To install maxminddb, type:
2428

2529
.. code-block:: bash
2630
@@ -74,13 +78,16 @@ invalid IP address or an IPv6 address in an IPv4 database.
7478
Requirements
7579
------------
7680

77-
This code requires CPython 2.6+ or 3.3+. Older versions are not supported.
81+
This code requires Python 2.6+ or 3.3+. The C extension requires CPython. The
82+
pure Python implementation has been tested with PyPy.
83+
84+
On Python 2, the `ipaddr module <https://code.google.com/p/ipaddr-py/>`_ is
85+
required.
7886

7987
Versioning
8088
----------
8189

82-
The MaxMind DB Python extension uses
83-
`Semantic Versioning <http://semver.org/>`_.
90+
The MaxMind DB Python module uses `Semantic Versioning <http://semver.org/>`_.
8491

8592
Support
8693
-------

maxminddb/__init__.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# pylint:disable=C0111
2+
import os
3+
4+
try:
5+
if os.environ.get('MAXMINDDB_PURE_PYTHON'):
6+
raise ImportError()
7+
from maxminddb.extension import Reader, InvalidDatabaseError
8+
except ImportError:
9+
from maxminddb.decoder import InvalidDatabaseError
10+
from maxminddb.reader import Reader
11+
12+
13+
__title__ = 'maxminddb'
14+
__version__ = '0.3.0'
15+
__author__ = 'Gregory Oschwald'
16+
__license__ = 'LGPLv2.1+'
17+
__copyright__ = 'Copyright 2014 Maxmind, Inc.'

maxminddb/compat.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import sys
2+
3+
# pylint: skip-file
4+
5+
is_py2 = sys.version_info[0] == 2
6+
7+
is_py3_3_or_better = (
8+
sys.version_info[0] >= 3 and sys.version_info[1] >= 3)
9+
10+
if is_py2 and not is_py3_3_or_better:
11+
import ipaddr as ipaddress # pylint:disable=F0401
12+
ipaddress.ip_address = ipaddress.IPAddress
13+
else:
14+
import ipaddress # pylint:disable=F0401
15+
16+
17+
if is_py2:
18+
int_from_byte = ord
19+
20+
FileNotFoundError = IOError
21+
22+
def int_from_bytes(b):
23+
if b:
24+
return int(b.encode("hex"), 16)
25+
return 0
26+
27+
byte_from_int = chr
28+
29+
else:
30+
int_from_byte = lambda x: x
31+
32+
FileNotFoundError = FileNotFoundError
33+
34+
int_from_bytes = lambda x: int.from_bytes(x, 'big')
35+
36+
byte_from_int = lambda x: bytes([x])

maxminddb/decoder.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
"""
2+
maxminddb.decoder
3+
~~~~~~~~~~~~~~~~~
4+
5+
This package contains code for decoding the MaxMind DB data section.
6+
7+
"""
8+
from __future__ import unicode_literals
9+
10+
import struct
11+
12+
from maxminddb.compat import byte_from_int, int_from_bytes
13+
from maxminddb.errors import InvalidDatabaseError
14+
15+
16+
class Decoder(object): # pylint: disable=too-few-public-methods
17+
18+
"""Decoder for the data section of the MaxMind DB"""
19+
20+
def __init__(self, database_buffer, pointer_base=0, pointer_test=False):
21+
"""Created a Decoder for a MaxMind DB
22+
23+
Arguments:
24+
database_buffer -- an mmap'd MaxMind DB file.
25+
pointer_base -- the base number to use when decoding a pointer
26+
pointer_test -- used for internal unit testing of pointer code
27+
"""
28+
self._pointer_test = pointer_test
29+
self._buffer = database_buffer
30+
self._pointer_base = pointer_base
31+
32+
def _decode_array(self, size, offset):
33+
array = []
34+
for _ in range(size):
35+
(value, offset) = self.decode(offset)
36+
array.append(value)
37+
return array, offset
38+
39+
def _decode_boolean(self, size, offset):
40+
return size != 0, offset
41+
42+
def _decode_bytes(self, size, offset):
43+
new_offset = offset + size
44+
return self._buffer[offset:new_offset], new_offset
45+
46+
# pylint: disable=no-self-argument
47+
# |-> I am open to better ways of doing this as long as it doesn't involve
48+
# lots of code duplication.
49+
def _decode_packed_type(type_code, type_size, pad=False):
50+
# pylint: disable=protected-access, missing-docstring
51+
def unpack_type(self, size, offset):
52+
if not pad:
53+
self._verify_size(size, type_size)
54+
new_offset = offset + type_size
55+
packed_bytes = self._buffer[offset:new_offset]
56+
if pad:
57+
packed_bytes = packed_bytes.rjust(type_size, b'\x00')
58+
(value,) = struct.unpack(type_code, packed_bytes)
59+
return value, new_offset
60+
return unpack_type
61+
62+
def _decode_map(self, size, offset):
63+
container = {}
64+
for _ in range(size):
65+
(key, offset) = self.decode(offset)
66+
(value, offset) = self.decode(offset)
67+
container[key] = value
68+
return container, offset
69+
70+
_pointer_value_offset = {
71+
1: 0,
72+
2: 2048,
73+
3: 526336,
74+
4: 0,
75+
}
76+
77+
def _decode_pointer(self, size, offset):
78+
pointer_size = ((size >> 3) & 0x3) + 1
79+
new_offset = offset + pointer_size
80+
pointer_bytes = self._buffer[offset:new_offset]
81+
packed = pointer_bytes if pointer_size == 4 else struct.pack(
82+
b'!c', byte_from_int(size & 0x7)) + pointer_bytes
83+
unpacked = int_from_bytes(packed)
84+
pointer = unpacked + self._pointer_base + \
85+
self._pointer_value_offset[pointer_size]
86+
if self._pointer_test:
87+
return pointer, new_offset
88+
(value, _) = self.decode(pointer)
89+
return value, new_offset
90+
91+
def _decode_uint(self, size, offset):
92+
new_offset = offset + size
93+
uint_bytes = self._buffer[offset:new_offset]
94+
return int_from_bytes(uint_bytes), new_offset
95+
96+
def _decode_utf8_string(self, size, offset):
97+
new_offset = offset + size
98+
return self._buffer[offset:new_offset].decode('utf-8'), new_offset
99+
100+
_type_decoder = {
101+
1: _decode_pointer,
102+
2: _decode_utf8_string,
103+
3: _decode_packed_type(b'!d', 8), # double,
104+
4: _decode_bytes,
105+
5: _decode_uint, # uint16
106+
6: _decode_uint, # uint32
107+
7: _decode_map,
108+
8: _decode_packed_type(b'!i', 4, pad=True), # int32
109+
9: _decode_uint, # uint64
110+
10: _decode_uint, # uint128
111+
11: _decode_array,
112+
14: _decode_boolean,
113+
15: _decode_packed_type(b'!f', 4), # float,
114+
}
115+
116+
def decode(self, offset):
117+
"""Decode a section of the data section starting at offset
118+
119+
Arguments:
120+
offset -- the location of the data structure to decode
121+
"""
122+
new_offset = offset + 1
123+
(ctrl_byte,) = struct.unpack(b'!B', self._buffer[offset:new_offset])
124+
type_num = ctrl_byte >> 5
125+
# Extended type
126+
if not type_num:
127+
(type_num, new_offset) = self._read_extended(new_offset)
128+
129+
(size, new_offset) = self._size_from_ctrl_byte(
130+
ctrl_byte, new_offset, type_num)
131+
return self._type_decoder[type_num](self, size, new_offset)
132+
133+
def _read_extended(self, offset):
134+
(next_byte,) = struct.unpack(b'!B', self._buffer[offset:offset + 1])
135+
type_num = next_byte + 7
136+
if type_num < 7:
137+
raise InvalidDatabaseError(
138+
'Something went horribly wrong in the decoder. An '
139+
'extended type resolved to a type number < 8 '
140+
'({type})'.format(type=type_num))
141+
return next_byte + 7, offset + 1
142+
143+
def _verify_size(self, expected, actual):
144+
if expected != actual:
145+
raise InvalidDatabaseError(
146+
'The MaxMind DB file\'s data section contains bad data '
147+
'(unknown data type or corrupt data)'
148+
)
149+
150+
def _size_from_ctrl_byte(self, ctrl_byte, offset, type_num):
151+
size = ctrl_byte & 0x1f
152+
if type_num == 1:
153+
return size, offset
154+
bytes_to_read = 0 if size < 29 else size - 28
155+
size_bytes = self._buffer[offset:offset + bytes_to_read]
156+
157+
# Using unpack rather than int_from_bytes as it is about 200 lookups
158+
# per second faster here.
159+
if size == 29:
160+
size = 29 + struct.unpack(b'!B', size_bytes)[0]
161+
elif size == 30:
162+
size = 285 + struct.unpack(b'!H', size_bytes)[0]
163+
elif size > 30:
164+
size = struct.unpack(
165+
b'!I', size_bytes.rjust(4, b'\x00'))[0] + 65821
166+
167+
return size, offset + bytes_to_read

maxminddb/errors.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
"""
2+
maxminddb.errors
3+
~~~~~~~~~~~~~~~~
4+
5+
This module contains custom errors for the MaxMind DB reader
6+
"""
7+
8+
9+
class InvalidDatabaseError(RuntimeError):
10+
11+
"""This error is thrown when unexpected data is found in the database."""

0 commit comments

Comments
 (0)