Skip to content

Commit 201c3d2

Browse files
committed
PYTHON-1916 Don't decode RawBSONDocument during insert operations
This change improves the performance of inserting RawBSONDocuments with insert_one, insert_many, and bulk_write.
1 parent 6d8c1ce commit 201c3d2

File tree

2 files changed

+70
-36
lines changed

2 files changed

+70
-36
lines changed

bson/_cbsonmodule.c

Lines changed: 53 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
123123
unsigned char in_custom_call,
124124
unsigned char in_fallback_call);
125125

126+
/* Write a RawBSONDocument to the buffer.
127+
* Returns the number of bytes written or 0 on failure.
128+
*/
129+
static int write_raw_doc(buffer_t buffer, PyObject* raw);
130+
126131
/* Date stuff */
127132
static PyObject* datetime_from_millis(long long millis) {
128133
/* To encode a datetime instance like datetime(9999, 12, 31, 23, 59, 59, 999999)
@@ -1031,39 +1036,10 @@ static int _write_element_to_buffer(PyObject* self, buffer_t buffer,
10311036
case 101:
10321037
{
10331038
/* RawBSONDocument */
1034-
char* raw_bson_document_bytes;
1035-
Py_ssize_t raw_bson_document_bytes_len;
1036-
int raw_bson_document_bytes_len_int;
1037-
PyObject* raw_bson_document_bytes_obj = PyObject_GetAttrString(value, "raw");
1038-
if (!raw_bson_document_bytes_obj) {
1039-
return 0;
1040-
}
1041-
1042-
#if PY_MAJOR_VERSION >= 3
1043-
if (-1 == PyBytes_AsStringAndSize(raw_bson_document_bytes_obj,
1044-
&raw_bson_document_bytes,
1045-
&raw_bson_document_bytes_len)) {
1046-
#else
1047-
if (-1 == PyString_AsStringAndSize(raw_bson_document_bytes_obj,
1048-
&raw_bson_document_bytes,
1049-
&raw_bson_document_bytes_len)) {
1050-
#endif
1051-
Py_DECREF(raw_bson_document_bytes_obj);
1052-
return 0;
1053-
}
1054-
raw_bson_document_bytes_len_int = _downcast_and_check(
1055-
raw_bson_document_bytes_len, 0);
1056-
if (-1 == raw_bson_document_bytes_len_int) {
1057-
Py_DECREF(raw_bson_document_bytes_obj);
1058-
return 0;
1059-
}
1060-
if(!buffer_write_bytes(buffer, raw_bson_document_bytes,
1061-
raw_bson_document_bytes_len_int)) {
1062-
Py_DECREF(raw_bson_document_bytes_obj);
1039+
if (!write_raw_doc(buffer, value)) {
10631040
return 0;
10641041
}
10651042
*(buffer_get_buffer(buffer) + type_byte) = 0x03;
1066-
Py_DECREF(raw_bson_document_bytes_obj);
10671043
return 1;
10681044
}
10691045
case 255:
@@ -1619,6 +1595,38 @@ int decode_and_write_pair(PyObject* self, buffer_t buffer,
16191595
return 1;
16201596
}
16211597

1598+
1599+
/* Write a RawBSONDocument to the buffer.
1600+
* Returns the number of bytes written or 0 on failure.
1601+
*/
1602+
static int write_raw_doc(buffer_t buffer, PyObject* raw) {
1603+
char* bytes;
1604+
Py_ssize_t len;
1605+
int len_int;
1606+
int bytes_written = 0;
1607+
PyObject* bytes_obj = NULL;
1608+
1609+
bytes_obj = PyObject_GetAttrString(raw, "raw");
1610+
if (!bytes_obj) {
1611+
goto fail;
1612+
}
1613+
1614+
if (-1 == PyBytes_AsStringAndSize(bytes_obj, &bytes, &len)) {
1615+
goto fail;
1616+
}
1617+
len_int = _downcast_and_check(len, 0);
1618+
if (-1 == len_int) {
1619+
goto fail;
1620+
}
1621+
if (!buffer_write_bytes(buffer, bytes, len_int)) {
1622+
goto fail;
1623+
}
1624+
bytes_written = len_int;
1625+
fail:
1626+
Py_XDECREF(bytes_obj);
1627+
return bytes_written;
1628+
}
1629+
16221630
/* returns the number of bytes written or 0 on failure */
16231631
int write_dict(PyObject* self, buffer_t buffer,
16241632
PyObject* dict, unsigned char check_keys,
@@ -1629,12 +1637,23 @@ int write_dict(PyObject* self, buffer_t buffer,
16291637
int length;
16301638
int length_location;
16311639
struct module_state *state = GETSTATE(self);
1640+
PyObject* mapping_type;
1641+
long type_marker;
1642+
1643+
/* check for RawBSONDocument */
1644+
type_marker = _type_marker(dict);
1645+
if (type_marker < 0) {
1646+
return 0;
1647+
}
1648+
1649+
if (101 == type_marker) {
1650+
return write_raw_doc(buffer, dict);
1651+
}
1652+
16321653
#if PY_MAJOR_VERSION >= 3
1633-
PyObject* mapping_type = _get_object(state->Mapping,
1634-
"collections.abc", "Mapping");
1654+
mapping_type = _get_object(state->Mapping, "collections.abc", "Mapping");
16351655
#else
1636-
PyObject* mapping_type = _get_object(state->Mapping,
1637-
"collections", "Mapping");
1656+
mapping_type = _get_object(state->Mapping, "collections", "Mapping");
16381657
#endif
16391658

16401659
if (mapping_type) {

test/test_raw_bson.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
import uuid
1717

1818
from bson import BSON
19-
from bson.binary import JAVA_LEGACY
19+
from bson.binary import Binary, JAVA_LEGACY
2020
from bson.codec_options import CodecOptions
2121
from bson.errors import InvalidBSON
22-
from bson.raw_bson import RawBSONDocument
22+
from bson.raw_bson import RawBSONDocument, DEFAULT_RAW_BSON_OPTIONS
2323
from bson.son import SON
2424
from test import client_context, unittest
2525

@@ -78,6 +78,21 @@ def test_round_trip(self):
7878
self.assertIsInstance(result, RawBSONDocument)
7979
self.assertEqual(dict(self.document.items()), dict(result.items()))
8080

81+
@client_context.require_connection
82+
def test_round_trip_raw_uuid(self):
83+
coll = self.client.get_database('pymongo_test').test_raw
84+
uid = uuid.uuid4()
85+
doc = {'_id': 1,
86+
'bin4': Binary(uid.bytes, 4),
87+
'bin3': Binary(uid.bytes, 3)}
88+
raw = RawBSONDocument(BSON.encode(doc))
89+
coll.insert_one(raw)
90+
self.assertEqual(coll.find_one(), {'_id': 1, 'bin4': uid, 'bin3': uid})
91+
92+
# Test that the raw bytes haven't changed.
93+
raw_coll = coll.with_options(codec_options=DEFAULT_RAW_BSON_OPTIONS)
94+
self.assertEqual(raw_coll.find_one(), raw)
95+
8196
def test_with_codec_options(self):
8297
# {u'date': datetime.datetime(2015, 6, 3, 18, 40, 50, 826000),
8398
# u'_id': UUID('026fab8f-975f-4965-9fbf-85ad874c60ff')}

0 commit comments

Comments
 (0)