Skip to content

Commit dd209a7

Browse files
add predicate cache (#6024)
* introduce BuildTantivyAstContext * implement CacheNode * implement predicate cache * emit cachenode for search_after
1 parent 656a904 commit dd209a7

File tree

30 files changed

+1235
-392
lines changed

30 files changed

+1235
-392
lines changed

quickwit/Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ async-trait = "0.1"
8787
backtrace = "0.3"
8888
base64 = "0.22"
8989
binggan = { version = "0.14" }
90+
bitpacking = "0.9.2"
9091
bytes = { version = "1", features = ["serde"] }
9192
bytesize = { version = "1.3", features = ["serde"] }
9293
bytestring = "1.4"

quickwit/quickwit-config/src/node_config/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ pub struct SearcherConfig {
267267
pub fast_field_cache_capacity: ByteSize,
268268
pub split_footer_cache_capacity: ByteSize,
269269
pub partial_request_cache_capacity: ByteSize,
270+
pub predicate_cache_capacity: ByteSize,
270271
pub max_num_concurrent_split_searches: usize,
271272
// Deprecated: stream search requests are no longer supported.
272273
#[serde(alias = "max_num_concurrent_split_streams", default, skip_serializing)]
@@ -324,6 +325,7 @@ impl Default for SearcherConfig {
324325
fast_field_cache_capacity: ByteSize::gb(1),
325326
split_footer_cache_capacity: ByteSize::mb(500),
326327
partial_request_cache_capacity: ByteSize::mb(64),
328+
predicate_cache_capacity: ByteSize::mb(256),
327329
max_num_concurrent_split_searches: 100,
328330
_max_num_concurrent_split_streams: None,
329331
aggregation_memory_limit: ByteSize::mb(500),

quickwit/quickwit-config/src/node_config/serialize.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,7 @@ mod tests {
661661
fast_field_cache_capacity: ByteSize::gb(10),
662662
split_footer_cache_capacity: ByteSize::gb(1),
663663
partial_request_cache_capacity: ByteSize::mb(64),
664+
predicate_cache_capacity: ByteSize::mb(256),
664665
max_num_concurrent_split_searches: 150,
665666
_max_num_concurrent_split_streams: Some(serde::de::IgnoredAny),
666667
split_cache: None,

quickwit/quickwit-doc-mapper/src/doc_mapper/doc_mapper_impl.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@
1414

1515
use std::collections::{BTreeMap, BTreeSet, HashSet};
1616
use std::num::NonZeroU32;
17+
use std::sync::Arc;
1718

1819
use anyhow::{Context, bail};
1920
use fnv::FnvHashSet;
2021
use quickwit_proto::types::DocMappingUid;
2122
use quickwit_query::create_default_quickwit_tokenizer_manager;
22-
use quickwit_query::query_ast::QueryAst;
23+
use quickwit_query::query_ast::{BuildTantivyAstContext, QueryAst};
2324
use quickwit_query::tokenizers::TokenizerManager;
2425
use serde::{Deserialize, Serialize};
2526
use serde_json::{self, Value as JsonValue};
@@ -636,15 +637,19 @@ impl DocMapper {
636637
pub fn query(
637638
&self,
638639
split_schema: Schema,
639-
query_ast: &QueryAst,
640+
query_ast: QueryAst,
640641
with_validation: bool,
642+
cache_context: Option<(Arc<dyn quickwit_query::query_ast::PredicateCache>, String)>,
641643
) -> Result<(Box<dyn Query>, WarmupInfo), QueryParserError> {
642644
build_query(
643645
query_ast,
644-
split_schema,
645-
self.tokenizer_manager(),
646-
&self.default_search_field_names[..],
647-
with_validation,
646+
&BuildTantivyAstContext {
647+
schema: &split_schema,
648+
tokenizer_manager: self.tokenizer_manager(),
649+
search_fields: &self.default_search_field_names[..],
650+
with_validation,
651+
},
652+
cache_context,
648653
)
649654
}
650655

@@ -2068,7 +2073,7 @@ mod tests {
20682073
.parse_user_query(doc_mapper.default_search_fields())
20692074
.map_err(|err| err.to_string())?;
20702075
let (query, _) = doc_mapper
2071-
.query(doc_mapper.schema(), &query_ast, true)
2076+
.query(doc_mapper.schema(), query_ast, true, None)
20722077
.map_err(|err| err.to_string())?;
20732078
Ok(format!("{query:?}"))
20742079
}

quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ mod tests {
290290
}
291291
.parse_user_query(&[])
292292
.unwrap();
293-
let (query, _) = doc_mapper.query(schema, &query_ast, true).unwrap();
293+
let (query, _) = doc_mapper.query(schema, query_ast, true, None).unwrap();
294294
assert_eq!(
295295
format!("{query:?}"),
296296
r#"TermQuery(Term(field=2, type=Json, path=toto.titi, type=Str, "hello"))"#
@@ -304,7 +304,7 @@ mod tests {
304304
let query_ast = query_ast_from_user_text("toto.titi:hello", None)
305305
.parse_user_query(doc_mapper.default_search_fields())
306306
.unwrap();
307-
let (query, _) = doc_mapper.query(schema, &query_ast, true).unwrap();
307+
let (query, _) = doc_mapper.query(schema, query_ast, true, None).unwrap();
308308
assert_eq!(
309309
format!("{query:?}"),
310310
r#"TermQuery(Term(field=1, type=Json, path=toto.titi, type=Str, "hello"))"#
@@ -318,7 +318,7 @@ mod tests {
318318
let query_ast = query_ast_from_user_text("toto:5", None)
319319
.parse_user_query(&[])
320320
.unwrap();
321-
let (query, _) = doc_mapper.query(schema, &query_ast, true).unwrap();
321+
let (query, _) = doc_mapper.query(schema, query_ast, true, None).unwrap();
322322
assert_eq!(
323323
format!("{query:?}"),
324324
r#"BooleanQuery { subqueries: [(Should, TermQuery(Term(field=1, type=Json, path=toto, type=I64, 5))), (Should, TermQuery(Term(field=1, type=Json, path=toto, type=Str, "5")))], minimum_number_should_match: 1 }"#
@@ -857,7 +857,7 @@ mod tests {
857857
field: "multilang".to_string(),
858858
value: "JPN:す".to_string(),
859859
});
860-
let (query, _) = doc_mapper.query(schema, &query_ast, false).unwrap();
860+
let (query, _) = doc_mapper.query(schema, query_ast, false, None).unwrap();
861861
assert_eq!(
862862
format!("{query:?}"),
863863
r#"TermQuery(Term(field=2, type=Str, "JPN:す"))"#

quickwit/quickwit-doc-mapper/src/query_builder.rs

Lines changed: 37 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
use std::collections::{HashMap, HashSet};
1616
use std::convert::Infallible;
1717
use std::ops::Bound;
18+
use std::sync::Arc;
1819

20+
use quickwit_proto::types::SplitId;
1921
use quickwit_query::query_ast::{
20-
FieldPresenceQuery, FullTextQuery, PhrasePrefixQuery, QueryAst, QueryAstVisitor, RangeQuery,
21-
RegexQuery, TermSetQuery, WildcardQuery,
22+
BuildTantivyAstContext, FieldPresenceQuery, FullTextQuery, PhrasePrefixQuery, QueryAst,
23+
QueryAstTransformer, QueryAstVisitor, RangeQuery, RegexQuery, TermSetQuery, WildcardQuery,
2224
};
2325
use quickwit_query::tokenizers::TokenizerManager;
2426
use quickwit_query::{InvalidQuery, find_field_or_hit_dynamic};
@@ -154,17 +156,24 @@ impl<'a, 'f> QueryAstVisitor<'a> for ExistsQueryFastFields<'f> {
154156

155157
/// Build a `Query` with field resolution & forbidding range clauses.
156158
pub(crate) fn build_query(
157-
query_ast: &QueryAst,
158-
schema: Schema,
159-
tokenizer_manager: &TokenizerManager,
160-
search_fields: &[String],
161-
with_validation: bool,
159+
query_ast: QueryAst,
160+
context: &BuildTantivyAstContext,
161+
cache_context: Option<(Arc<dyn quickwit_query::query_ast::PredicateCache>, SplitId)>,
162162
) -> Result<(Box<dyn Query>, WarmupInfo), QueryParserError> {
163163
let mut fast_fields: HashSet<FastFieldWarmupInfo> = HashSet::new();
164164

165+
let query_ast = if let Some((cache, split_id)) = cache_context {
166+
let Ok(query_ast) = quickwit_query::query_ast::PredicateCacheInjector { cache, split_id }
167+
.transform(query_ast);
168+
// this transformer isn't supposed to ever remove a node
169+
query_ast.unwrap_or(QueryAst::MatchAll)
170+
} else {
171+
query_ast
172+
};
173+
165174
let mut range_query_fields = RangeQueryFields::default();
166175
// This cannot fail. The error type is Infallible.
167-
let Ok(_) = range_query_fields.visit(query_ast);
176+
let Ok(_) = range_query_fields.visit(&query_ast);
168177
let range_query_fast_fields =
169178
range_query_fields
170179
.range_query_field_names
@@ -177,31 +186,30 @@ pub(crate) fn build_query(
177186

178187
let Ok(_) = TermSearchOnColumnar {
179188
fields: &mut fast_fields,
180-
schema: schema.clone(),
189+
schema: context.schema.clone(),
181190
}
182-
.visit(query_ast);
191+
.visit(&query_ast);
183192

184193
let Ok(_) = ExistsQueryFastFields {
185194
fields: &mut fast_fields,
186-
schema: schema.clone(),
195+
schema: context.schema.clone(),
187196
}
188-
.visit(query_ast);
197+
.visit(&query_ast);
189198

190-
let query = query_ast.build_tantivy_query(
191-
&schema,
192-
tokenizer_manager,
193-
search_fields,
194-
with_validation,
195-
)?;
199+
let query = query_ast.build_tantivy_query(context)?;
196200

197-
let term_set_query_fields = extract_term_set_query_fields(query_ast, &schema)?;
201+
let term_set_query_fields = extract_term_set_query_fields(&query_ast, context.schema)?;
198202
let (term_ranges_grouped_by_field, automatons_grouped_by_field) =
199-
extract_prefix_term_ranges_and_automaton(query_ast, &schema, tokenizer_manager)?;
203+
extract_prefix_term_ranges_and_automaton(
204+
&query_ast,
205+
context.schema,
206+
context.tokenizer_manager,
207+
)?;
200208

201209
let mut terms_grouped_by_field: HashMap<Field, HashMap<_, bool>> = Default::default();
202210
query.query_terms(&mut |term, need_position| {
203211
let field = term.field();
204-
if !schema.get_field_entry(field).is_indexed() {
212+
if !context.schema.get_field_entry(field).is_indexed() {
205213
return;
206214
}
207215
*terms_grouped_by_field
@@ -419,8 +427,8 @@ mod test {
419427

420428
use quickwit_common::shared_consts::FIELD_PRESENCE_FIELD_NAME;
421429
use quickwit_query::query_ast::{
422-
FullTextMode, FullTextParams, PhrasePrefixQuery, QueryAstVisitor, UserInputQuery,
423-
query_ast_from_user_text,
430+
BuildTantivyAstContext, FullTextMode, FullTextParams, PhrasePrefixQuery, QueryAstVisitor,
431+
UserInputQuery, query_ast_from_user_text,
424432
};
425433
use quickwit_query::{
426434
BooleanOperand, MatchAllOrNone, create_default_quickwit_tokenizer_manager,
@@ -506,13 +514,7 @@ mod test {
506514
.parse_user_query(&[])
507515
.map_err(|err| err.to_string())?;
508516
let schema = make_schema(dynamic_mode);
509-
let query_result = build_query(
510-
&query_ast,
511-
schema,
512-
&create_default_quickwit_tokenizer_manager(),
513-
&[],
514-
true,
515-
);
517+
let query_result = build_query(query_ast, &BuildTantivyAstContext::for_test(&schema), None);
516518
query_result
517519
.map(|query| format!("{query:?}"))
518520
.map_err(|err| err.to_string())
@@ -886,29 +888,18 @@ mod test {
886888
.parse_user_query(&[])
887889
.unwrap();
888890

889-
let (_, warmup_info) = build_query(
890-
&query_with_set,
891-
make_schema(true),
892-
&create_default_quickwit_tokenizer_manager(),
893-
&[],
894-
true,
895-
)
896-
.unwrap();
891+
let schema = make_schema(true);
892+
let context = BuildTantivyAstContext::for_test(&schema);
893+
894+
let (_, warmup_info) = build_query(query_with_set, &context, None).unwrap();
897895
assert_eq!(warmup_info.term_dict_fields.len(), 1);
898896
assert!(
899897
warmup_info
900898
.term_dict_fields
901899
.contains(&tantivy::schema::Field::from_field_id(2))
902900
);
903901

904-
let (_, warmup_info) = build_query(
905-
&query_without_set,
906-
make_schema(true),
907-
&create_default_quickwit_tokenizer_manager(),
908-
&[],
909-
true,
910-
)
911-
.unwrap();
902+
let (_, warmup_info) = build_query(query_without_set, &context, None).unwrap();
912903
assert!(warmup_info.term_dict_fields.is_empty());
913904
}
914905

quickwit/quickwit-doc-mapper/src/tag_pruning.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ fn extract_unsimplified_tags_filter_ast(query_ast: QueryAst) -> UnsimplifiedTagF
114114
}
115115
QueryAst::FieldPresence(_) => UnsimplifiedTagFilterAst::Uninformative,
116116
QueryAst::Regex(_) => UnsimplifiedTagFilterAst::Uninformative,
117+
QueryAst::Cache(cache_node) => extract_unsimplified_tags_filter_ast(*cache_node.inner),
117118
}
118119
}
119120

quickwit/quickwit-indexing/src/actors/merge_executor.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ impl MergeExecutor {
534534
parsed_query_ast
535535
);
536536
let (query, _) =
537-
doc_mapper.query(union_index.schema(), &parsed_query_ast, false)?;
537+
doc_mapper.query(union_index.schema(), parsed_query_ast, false, None)?;
538538
index_writer.delete_query(query)?;
539539
}
540540
debug!("commit-delete-operations");

quickwit/quickwit-query/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ license.workspace = true
1313
[dependencies]
1414
anyhow = { workspace = true }
1515
base64 = { workspace = true }
16+
bitpacking = { workspace = true }
1617
hex = { workspace = true }
1718
lindera-core = { workspace = true, optional = true }
1819
lindera-dictionary = { workspace = true, optional = true }
@@ -24,13 +25,15 @@ serde_json = { workspace = true }
2425
serde_with = { workspace = true }
2526
tantivy = { workspace = true }
2627
tantivy-fst = { workspace = true }
28+
tracing = { workspace = true }
2729
time = { workspace = true }
2830
thiserror = { workspace = true }
2931
rustc-hash = { workspace = true }
3032
whichlang = { workspace = true, optional = true }
3133

3234
quickwit-common = { workspace = true }
3335
quickwit-datetime = { workspace = true }
36+
quickwit-proto = { workspace = true }
3437

3538
[dev-dependencies]
3639
criterion = { workspace = true }

0 commit comments

Comments
 (0)