Skip to content

Commit 0b80f48

Browse files
jseysterevergreen
authored andcommitted
SERVER-42836 Fast path for sort key generation of WorkingSetMembers
Includes a small change to external_sort_find.js, which was failing when this change was initially submitted. This reverts commit 72d8bff.
1 parent b612950 commit 0b80f48

File tree

7 files changed

+149
-136
lines changed

7 files changed

+149
-136
lines changed

jstests/aggregation/group_conversion_to_distinct_scan.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ assert.commandWorked(coll.insert([
4444
{_id: 3, a: 1, b: 3, c: 2},
4545
{_id: 4, a: 2, b: 2, c: 2},
4646
{_id: 5, b: 1, c: 1},
47-
{_id: 6, a: null, b: 1, c: 1},
47+
{_id: 6, a: null, b: 1, c: 1.5},
4848

4949
{_id: 7, aa: 1, mkB: 2, bb: 2},
5050
{_id: 8, aa: 1, mkB: [1, 3], bb: 1},
@@ -224,9 +224,9 @@ assert.eq(null, getAggPlanStage(explain, "SORT"), explain);
224224
// Verify that a $sort-$group pipeline can use DISTINCT_SCAN when a $first accumulator needs the
225225
// entire document.
226226
//
227-
pipeline = [{$sort: {a: -1, b: -1}}, {$group: {_id: "$a", accum: {$first: "$$ROOT"}}}];
227+
pipeline = [{$sort: {a: -1, b: -1, c: -1}}, {$group: {_id: "$a", accum: {$first: "$$ROOT"}}}];
228228
assertResultsMatchWithAndWithoutHintandIndexes(pipeline, [
229-
{_id: null, accum: {_id: 6, a: null, b: 1, c: 1}},
229+
{_id: null, accum: {_id: 6, a: null, b: 1, c: 1.5}},
230230
{_id: 1, accum: {_id: 3, a: 1, b: 3, c: 2}},
231231
{_id: 2, accum: {_id: 4, a: 2, b: 2, c: 2}}
232232
]);

jstests/noPassthrough/external_sort_find.js

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@ load("jstests/libs/analyze_plan.js");
1414
// Only allow blocking sort execution to use 100 kB of memory.
1515
const kMaxMemoryUsageBytes = 100 * 1024;
1616

17-
const kNumDocsWithinMemLimit = 70;
18-
const kNumDocsExceedingMemLimit = 100;
17+
// TODO (SERVER-43993): Documents can occupy twice as much memory as needed to just store the BSON
18+
// contents of the documents, which is why we can exceed the 100kB memory limit with just 50
19+
// documents, instead of the roughly 100 documents we might expect.
20+
const kNumDocsWithinMemLimit = 35;
21+
const kNumDocsExceedingMemLimit = 50;
1922

2023
const kMemoryLimitExceededErrCode = 16819;
2124

jstests/sharding/fts_score_sort_sharded.js

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ assert.commandWorked(coll.ensureIndex({a: "text"}));
3434
var results = coll.find({$text: {$search: "pizza"}}, {s: {$meta: "textScore"}})
3535
.sort({s: {$meta: "textScore"}})
3636
.toArray();
37-
assert.eq(results.length, 4);
38-
assert.eq(results[0]._id, -2);
39-
assert.eq(results[1]._id, 2);
40-
assert.eq(results[2]._id, -1);
41-
assert.eq(results[3]._id, 1);
37+
assert.eq(results.length, 4, results);
38+
assert.eq(results[0]._id, -2, results);
39+
assert.eq(results[1]._id, 2, results);
40+
assert.eq(results[2]._id, -1, results);
41+
assert.eq(results[3]._id, 1, results);
4242

4343
//
4444
// Verify that mongos requires the text metadata sort to be specified in the projection.
@@ -68,6 +68,36 @@ assert.throws(function() {
6868
cursor.next();
6969
});
7070

71-
// TODO Test sort on compound key.
71+
//
72+
// Execute query with a compound sort that includes the text score along with a multikey field.
73+
//
74+
75+
coll.drop();
76+
assert.commandWorked(coll.insert({_id: 0, a: "pizza", b: [1, 4]}));
77+
assert.commandWorked(coll.insert({_id: 1, a: "pizza pizza", b: [6, 7]}));
78+
assert.commandWorked(coll.insert({_id: 2, a: "pizza", b: [2, 3]}));
79+
assert.commandWorked(coll.insert({_id: 3, a: "pizza pizza", b: [5, 8]}));
80+
assert.commandWorked(coll.ensureIndex({a: "text"}));
81+
82+
results = coll.find({$text: {$search: "pizza"}}, {s: {$meta: "textScore"}})
83+
.sort({s: {$meta: "textScore"}, b: 1})
84+
.toArray();
85+
assert.eq(results.length, 4, results);
86+
assert.eq(results[0]._id, 3, results);
87+
assert.eq(results[1]._id, 1, results);
88+
assert.eq(results[2]._id, 0, results);
89+
assert.eq(results[3]._id, 2, results);
90+
91+
//
92+
// Repeat the query with an aggregation pipeline and verify that the result is the same.
93+
//
94+
95+
var aggResults = coll.aggregate([
96+
{$match: {$text: {$search: "pizza"}}},
97+
{$addFields: {s: {$meta: "textScore"}}},
98+
{$sort: {s: {$meta: "textScore"}, b: 1}}
99+
])
100+
.toArray();
101+
assert.eq(results, aggResults);
72102

73103
st.stop();

src/mongo/db/exec/sort_key_generator.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,16 @@ PlanStage::StageState SortKeyGeneratorStage::doWork(WorkingSetID* out) {
6868
if (stageState == PlanStage::ADVANCED) {
6969
WorkingSetMember* member = _ws->get(*out);
7070

71-
auto sortKey = _sortKeyGen.computeSortKey(*member);
72-
if (!sortKey.isOK()) {
73-
*out = WorkingSetCommon::allocateStatusMember(_ws, sortKey.getStatus());
71+
try {
72+
auto sortKey = _sortKeyGen.computeSortKey(*member);
73+
74+
// Add the sort key to the WSM as metadata.
75+
member->metadata().setSortKey(std::move(sortKey), _sortKeyGen.isSingleElementKey());
76+
} catch (const DBException& computeSortKeyException) {
77+
*out = WorkingSetCommon::allocateStatusMember(_ws, computeSortKeyException.toStatus());
7478
return PlanStage::FAILURE;
7579
}
7680

77-
// Add the sort key to the WSM as metadata.
78-
member->metadata().setSortKey(std::move(sortKey.getValue()),
79-
_sortKeyGen.isSingleElementKey());
80-
8181
return PlanStage::ADVANCED;
8282
}
8383

src/mongo/db/index/sort_key_generator.cpp

Lines changed: 41 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -76,30 +76,15 @@ SortKeyGenerator::SortKeyGenerator(SortPattern sortPattern, const CollatorInterf
7676
Ordering::make(_sortSpecWithoutMeta));
7777
}
7878

79-
// TODO (SERVER-42836): Once WorkingSetMember objects store a Document (SERVER-42181), this function
80-
// will be able to use the Document overload of computeSortKeyFromDocument, and it will be able to
81-
// store the text score with the Document instead of in a separate SortKeyGenerator::Metadata
82-
// object.
83-
StatusWith<Value> SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const {
79+
Value SortKeyGenerator::computeSortKey(const WorkingSetMember& wsm) const {
8480
if (wsm.hasObj()) {
85-
SortKeyGenerator::Metadata metadata;
86-
if (_sortHasMeta && wsm.metadata().hasTextScore()) {
87-
metadata.textScore = wsm.metadata().getTextScore();
88-
}
89-
auto statusWithSortKeyObj = computeSortKeyFromDocument(wsm.doc.value().toBson(), &metadata);
90-
if (!statusWithSortKeyObj.isOK()) {
91-
return statusWithSortKeyObj.getStatus();
92-
}
93-
94-
return DocumentMetadataFields::deserializeSortKey(isSingleElementKey(),
95-
statusWithSortKeyObj.getValue());
81+
return computeSortKeyFromDocument(wsm.doc.value(), wsm.metadata());
9682
}
9783

9884
return computeSortKeyFromIndexKey(wsm);
9985
}
10086

101-
StatusWith<Value> SortKeyGenerator::computeSortKeyFromIndexKey(
102-
const WorkingSetMember& member) const {
87+
Value SortKeyGenerator::computeSortKeyFromIndexKey(const WorkingSetMember& member) const {
10388
invariant(member.getState() == WorkingSetMember::RID_AND_IDX);
10489
invariant(!_sortHasMeta);
10590

@@ -118,16 +103,9 @@ StatusWith<Value> SortKeyGenerator::computeSortKeyFromIndexKey(
118103
return DocumentMetadataFields::deserializeSortKey(isSingleElementKey(), objBuilder.obj());
119104
}
120105

121-
StatusWith<BSONObj> SortKeyGenerator::computeSortKeyFromDocument(const BSONObj& obj,
122-
const Metadata* metadata) const {
123-
if (_sortHasMeta) {
124-
invariant(metadata);
125-
}
126-
127-
auto sortKeyNoMetadata = computeSortKeyFromDocumentWithoutMetadata(obj);
128-
if (!sortKeyNoMetadata.isOK()) {
129-
return sortKeyNoMetadata;
130-
}
106+
BSONObj SortKeyGenerator::computeSortKeyFromDocument(const BSONObj& obj,
107+
const DocumentMetadataFields& metadata) const {
108+
auto sortKeyNoMetadata = uassertStatusOK(computeSortKeyFromDocumentWithoutMetadata(obj));
131109

132110
if (!_sortHasMeta) {
133111
// We don't have to worry about $meta sort, so the index key becomes the sort key.
@@ -137,24 +115,27 @@ StatusWith<BSONObj> SortKeyGenerator::computeSortKeyFromDocument(const BSONObj&
137115
BSONObjBuilder mergedKeyBob;
138116

139117
// Merge metadata into the key.
140-
BSONObjIterator sortKeyIt(sortKeyNoMetadata.getValue());
118+
BSONObjIterator sortKeyIt(sortKeyNoMetadata);
141119
for (auto& part : _sortPattern) {
142120
if (part.fieldPath) {
143121
invariant(sortKeyIt.more());
144122
mergedKeyBob.append(sortKeyIt.next());
145123
continue;
146124
}
125+
126+
// Create a Document that represents the input object and its metadata together, so we can
127+
// use it to evaluate the ExpressionMeta for this part of the sort pattern. This operation
128+
// copies the data in 'metadata' but not any of the data in the 'obj' BSON.
129+
MutableDocument documentWithMetdata(Document{obj});
130+
documentWithMetdata.setMetadata(DocumentMetadataFields(metadata));
131+
147132
invariant(part.expression);
148-
switch (part.expression->getMetaType()) {
149-
case DocumentMetadataFields::MetaType::kTextScore: {
150-
mergedKeyBob.append("", metadata->textScore);
151-
continue;
152-
}
153-
case DocumentMetadataFields::MetaType::kRandVal: {
154-
mergedKeyBob.append("", metadata->randVal);
155-
continue;
156-
}
157-
default: { MONGO_UNREACHABLE; }
133+
auto value =
134+
part.expression->evaluate(documentWithMetdata.freeze(), nullptr /* variables */);
135+
if (!value.missing()) {
136+
value.addToBsonObj(&mergedKeyBob, ""_sd);
137+
} else {
138+
mergedKeyBob.appendNull("");
158139
}
159140
}
160141

@@ -231,7 +212,9 @@ Value SortKeyGenerator::getCollationComparisonKey(const Value& val) const {
231212
}
232213

233214
StatusWith<Value> SortKeyGenerator::extractKeyPart(
234-
const Document& doc, const SortPattern::SortPatternPart& patternPart) const {
215+
const Document& doc,
216+
const DocumentMetadataFields& metadata,
217+
const SortPattern::SortPatternPart& patternPart) const {
235218
Value plainKey;
236219
if (patternPart.fieldPath) {
237220
invariant(!patternPart.expression);
@@ -243,22 +226,28 @@ StatusWith<Value> SortKeyGenerator::extractKeyPart(
243226
plainKey = key.getValue();
244227
} else {
245228
invariant(patternPart.expression);
229+
// ExpressionMeta expects metadata to be attached to the document.
230+
MutableDocument documentWithMetadata(doc);
231+
documentWithMetadata.setMetadata(DocumentMetadataFields(metadata));
232+
246233
// ExpressionMeta does not use Variables.
247-
plainKey = patternPart.expression->evaluate(doc, nullptr /* variables */);
234+
plainKey = patternPart.expression->evaluate(documentWithMetadata.freeze(),
235+
nullptr /* variables */);
248236
}
249237

250-
return getCollationComparisonKey(plainKey);
238+
return plainKey.missing() ? Value{BSONNULL} : getCollationComparisonKey(plainKey);
251239
}
252240

253-
StatusWith<Value> SortKeyGenerator::extractKeyFast(const Document& doc) const {
241+
StatusWith<Value> SortKeyGenerator::extractKeyFast(const Document& doc,
242+
const DocumentMetadataFields& metadata) const {
254243
if (_sortPattern.isSingleElementKey()) {
255-
return extractKeyPart(doc, _sortPattern[0]);
244+
return extractKeyPart(doc, metadata, _sortPattern[0]);
256245
}
257246

258247
std::vector<Value> keys;
259248
keys.reserve(_sortPattern.size());
260249
for (auto&& keyPart : _sortPattern) {
261-
auto extractedKey = extractKeyPart(doc, keyPart);
250+
auto extractedKey = extractKeyPart(doc, metadata, keyPart);
262251
if (!extractedKey.isOK()) {
263252
// We can't use the fast path, so bail out.
264253
return extractedKey;
@@ -269,24 +258,18 @@ StatusWith<Value> SortKeyGenerator::extractKeyFast(const Document& doc) const {
269258
return Value{std::move(keys)};
270259
}
271260

272-
BSONObj SortKeyGenerator::extractKeyWithArray(const Document& doc) const {
273-
SortKeyGenerator::Metadata metadata;
274-
if (doc.metadata().hasTextScore()) {
275-
metadata.textScore = doc.metadata().getTextScore();
276-
}
277-
if (doc.metadata().hasRandVal()) {
278-
metadata.randVal = doc.metadata().getRandVal();
279-
}
280-
261+
BSONObj SortKeyGenerator::extractKeyWithArray(const Document& doc,
262+
const DocumentMetadataFields& metadata) const {
281263
// Convert the Document to a BSONObj, but only do the conversion for the paths we actually need.
282264
// Then run the result through the SortKeyGenerator to obtain the final sort key.
283265
auto bsonDoc = _sortPattern.documentToBsonWithSortPaths(doc);
284-
return uassertStatusOK(computeSortKeyFromDocument(bsonDoc, &metadata));
266+
return computeSortKeyFromDocument(bsonDoc, metadata);
285267
}
286268

287-
Value SortKeyGenerator::computeSortKeyFromDocument(const Document& doc) const {
269+
Value SortKeyGenerator::computeSortKeyFromDocument(const Document& doc,
270+
const DocumentMetadataFields& metadata) const {
288271
// This fast pass directly generates a Value.
289-
auto fastKey = extractKeyFast(doc);
272+
auto fastKey = extractKeyFast(doc, metadata);
290273
if (fastKey.isOK()) {
291274
return std::move(fastKey.getValue());
292275
}
@@ -295,7 +278,7 @@ Value SortKeyGenerator::computeSortKeyFromDocument(const Document& doc) const {
295278
// form like BSONObj {'': 1, '': [2, 3]}) and converts it to a Value (Value [1, [2, 3]] in the
296279
// earlier example).
297280
return DocumentMetadataFields::deserializeSortKey(_sortPattern.isSingleElementKey(),
298-
extractKeyWithArray(doc));
281+
extractKeyWithArray(doc, metadata));
299282
}
300283

301284
} // namespace mongo

0 commit comments

Comments
 (0)