Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit bec5621

Browse files
authored
Search: solidify conversion to Zoekt queries (#60349)
For the 'search-content-based-lang-detection' feature, this PR improves how filters are converted to Zoekt queries: * Only convert filters to Zoekt lang filters, instead of also including file filters * Handle excluded filters as well, for example `-lang:go` * Use 'and' instead of 'or' to combine multiple filters
1 parent bb926a6 commit bec5621

File tree

2 files changed

+108
-79
lines changed

2 files changed

+108
-79
lines changed

internal/search/zoekt/query.go

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,30 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
3030
}
3131
}
3232

33+
var and []zoekt.Q
34+
if q != nil {
35+
and = append(and, q)
36+
}
37+
3338
// Handle file: and -file: filters.
3439
filesInclude, filesExclude := b.IncludeExcludeValues(query.FieldFile)
40+
3541
// Handle lang: and -lang: filters.
42+
// By default, languages are converted to file filters. When the 'search-content-based-lang-detection'
43+
// feature is enabled, we use Zoekt's native language filters, which are based on the actual language
44+
// of the file (as determined by go-enry).
3645
langInclude, langExclude := b.IncludeExcludeValues(query.FieldLang)
37-
filesInclude = append(filesInclude, mapSlice(langInclude, query.LangToFileRegexp)...)
38-
filesExclude = append(filesExclude, mapSlice(langExclude, query.LangToFileRegexp)...)
39-
40-
var and []zoekt.Q
41-
if q != nil {
42-
and = append(and, q)
46+
if feat.ContentBasedLangFilters {
47+
for _, lang := range langInclude {
48+
and = append(and, toLangFilter(lang))
49+
}
50+
for _, lang := range langExclude {
51+
filter := toLangFilter(lang)
52+
and = append(and, &zoekt.Not{Child: filter})
53+
}
54+
} else {
55+
filesInclude = append(filesInclude, mapSlice(langInclude, query.LangToFileRegexp)...)
56+
filesExclude = append(filesExclude, mapSlice(langExclude, query.LangToFileRegexp)...)
4357
}
4458

4559
// zoekt also uses regular expressions for file paths
@@ -68,26 +82,14 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
6882
and = append(and, zoekt.NewAnd(repoHasFilters...))
6983
}
7084

71-
// Languages are already partially expressed with IncludePatterns, but Zoekt creates
72-
// more precise language metadata based on file contents analyzed by go-enry, so it's
73-
// useful to pass lang: queries down.
74-
//
75-
// Currently, negated lang queries create filename-based ExcludePatterns that cannot be
76-
// corrected by the more precise language metadata. If this is a problem, indexed search
77-
// queries should have a special query converter that produces *only* Language predicates
78-
// instead of filepatterns.
79-
if len(langInclude) > 0 && feat.ContentBasedLangFilters {
80-
or := &zoekt.Or{}
81-
for _, lang := range langInclude {
82-
lang, _ = enry.GetLanguageByAlias(lang) // Invariant: lang is valid.
83-
or.Children = append(or.Children, &zoekt.Language{Language: lang})
84-
}
85-
and = append(and, or)
86-
}
87-
8885
return zoekt.Simplify(zoekt.NewAnd(and...)), nil
8986
}
9087

88+
func toLangFilter(lang string) zoekt.Q {
89+
lang, _ = enry.GetLanguageByAlias(lang) // Invariant: lang is valid.
90+
return &zoekt.Language{Language: lang}
91+
}
92+
9193
func QueryForFileContentArgs(opt query.RepoHasFileContentArgs, caseSensitive bool) zoekt.Q {
9294
var children []zoekt.Q
9395
if opt.Path != "" {

internal/search/zoekt/query_test.go

Lines changed: 83 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -16,91 +16,118 @@ import (
1616

1717
func TestQueryToZoektQuery(t *testing.T) {
1818
cases := []struct {
19-
Name string
20-
Type search.IndexedRequestType
21-
Pattern string
22-
Features search.Features
23-
Query string
19+
Name string
20+
Type search.IndexedRequestType
21+
Query string
22+
Features search.Features
23+
WantZoektQuery string
2424
}{
2525
{
26-
Name: "substr",
27-
Type: search.TextRequest,
28-
Pattern: `foo patterntype:regexp`,
29-
Query: "foo case:no",
26+
Name: "substr",
27+
Type: search.TextRequest,
28+
Query: `foo patterntype:regexp`,
29+
WantZoektQuery: "foo case:no",
3030
},
3131
{
32-
Name: "symbol substr",
33-
Type: search.SymbolRequest,
34-
Pattern: `foo patterntype:regexp type:symbol`,
35-
Query: "sym:foo case:no",
32+
Name: "symbol substr",
33+
Type: search.SymbolRequest,
34+
Query: `foo patterntype:regexp type:symbol`,
35+
WantZoektQuery: "sym:foo case:no",
3636
},
3737
{
38-
Name: "regex",
39-
Type: search.TextRequest,
40-
Pattern: `(foo).*?(bar) patterntype:regexp`,
41-
Query: "(foo).*?(bar) case:no",
38+
Name: "regex",
39+
Type: search.TextRequest,
40+
Query: `(foo).*?(bar) patterntype:regexp`,
41+
WantZoektQuery: "(foo).*?(bar) case:no",
4242
},
4343
{
44-
Name: "path",
45-
Type: search.TextRequest,
46-
Pattern: `foo file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
47-
Query: `foo case:no f:\.go$ f:\.yaml$ -f:\bvendor\b`,
44+
Name: "path",
45+
Type: search.TextRequest,
46+
Query: `foo file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
47+
WantZoektQuery: `foo case:no f:\.go$ f:\.yaml$ -f:\bvendor\b`,
4848
},
4949
{
50-
Name: "case",
51-
Type: search.TextRequest,
52-
Pattern: `foo case:yes patterntype:regexp file:\.go$ file:yaml`,
53-
Query: `foo case:yes f:\.go$ f:yaml`,
50+
Name: "case",
51+
Type: search.TextRequest,
52+
Query: `foo case:yes patterntype:regexp file:\.go$ file:yaml`,
53+
WantZoektQuery: `foo case:yes f:\.go$ f:yaml`,
5454
},
5555
{
56-
Name: "casepath",
57-
Type: search.TextRequest,
58-
Pattern: `foo case:yes file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
59-
Query: `foo case:yes f:\.go$ f:\.yaml$ -f:\bvendor\b`,
56+
Name: "casepath",
57+
Type: search.TextRequest,
58+
Query: `foo case:yes file:\.go$ file:\.yaml$ -file:\bvendor\b patterntype:regexp`,
59+
WantZoektQuery: `foo case:yes f:\.go$ f:\.yaml$ -f:\bvendor\b`,
6060
},
6161
{
62-
Name: "path matches only",
63-
Type: search.TextRequest,
64-
Pattern: `test type:path`,
65-
Query: `f:test`,
62+
Name: "path matches only",
63+
Type: search.TextRequest,
64+
Query: `test type:path`,
65+
WantZoektQuery: `f:test`,
6666
},
6767
{
68-
Name: "content matches only",
69-
Type: search.TextRequest,
70-
Pattern: `test type:file patterntype:literal`,
71-
Query: `c:test`,
68+
Name: "content matches only",
69+
Type: search.TextRequest,
70+
Query: `test type:file patterntype:literal`,
71+
WantZoektQuery: `c:test`,
7272
},
7373
{
74-
Name: "content and path matches",
75-
Type: search.TextRequest,
76-
Pattern: `test`,
77-
Query: `test`,
74+
Name: "content and path matches",
75+
Type: search.TextRequest,
76+
Query: `test`,
77+
WantZoektQuery: `test`,
7878
},
7979
{
80-
Name: "Just file",
81-
Type: search.TextRequest,
82-
Pattern: `file:\.go$`,
83-
Query: `file:"\\.go(?m:$)"`,
80+
Name: "Just file",
81+
Type: search.TextRequest,
82+
Query: `file:\.go$`,
83+
WantZoektQuery: `file:"\\.go(?m:$)"`,
8484
},
8585
{
86-
Name: "Languages is ignored",
87-
Type: search.TextRequest,
88-
Pattern: `file:\.go$ lang:go`,
89-
Query: `file:"\\.go(?m:$)" file:"\\.go(?m:$)"`,
86+
Name: "Languages get passed as file filter",
87+
Type: search.TextRequest,
88+
Query: `file:\.go$ lang:go`,
89+
WantZoektQuery: `file:"\\.go(?m:$)" file:"\\.go(?m:$)"`,
9090
},
9191
{
92-
Name: "language gets passed as both file include and lang: predicate",
93-
Type: search.TextRequest,
94-
Pattern: `file:\.go$ lang:go`,
92+
Name: "Language get passed as lang: query",
93+
Type: search.TextRequest,
94+
Query: `lang:go`,
9595
Features: search.Features{
9696
ContentBasedLangFilters: true,
9797
},
98-
Query: `file:"\\.go(?m:$)" file:"\\.go(?m:$)" lang:Go`,
98+
WantZoektQuery: `lang:go`,
99+
},
100+
{
101+
Name: "Multiple languages get passed as lang queries",
102+
Type: search.TextRequest,
103+
Query: `lang:go lang:typescript`,
104+
Features: search.Features{
105+
ContentBasedLangFilters: true,
106+
},
107+
WantZoektQuery: `lang:go lang:typescript`,
108+
},
109+
{
110+
Name: "Excluded languages get passed as lang: query",
111+
Type: search.TextRequest,
112+
Query: `lang:go -lang:typescript -lang:markdown`,
113+
Features: search.Features{
114+
ContentBasedLangFilters: true,
115+
},
116+
WantZoektQuery: `lang:go -lang:typescript -lang:markdown`,
117+
},
118+
{
119+
Name: "Mixed file and lang filters",
120+
Type: search.TextRequest,
121+
Query: `file:\.go$ lang:go lang:typescript`,
122+
Features: search.Features{
123+
ContentBasedLangFilters: true,
124+
},
125+
WantZoektQuery: `file:"\\.go(?m:$)" lang:go lang:typescript`,
99126
},
100127
}
101128
for _, tt := range cases {
102129
t.Run(tt.Name, func(t *testing.T) {
103-
sourceQuery, _ := query.ParseRegexp(tt.Pattern)
130+
sourceQuery, _ := query.ParseRegexp(tt.Query)
104131
b, _ := query.ToBasicQuery(sourceQuery)
105132

106133
types, _ := b.ToParseTree().StringValues(query.FieldType)
@@ -110,9 +137,9 @@ func TestQueryToZoektQuery(t *testing.T) {
110137
t.Fatal("QueryToZoektQuery failed:", err)
111138
}
112139

113-
zoektQuery, err := zoekt.Parse(tt.Query)
140+
zoektQuery, err := zoekt.Parse(tt.WantZoektQuery)
114141
if err != nil {
115-
t.Fatalf("failed to parse %q: %v", tt.Query, err)
142+
t.Fatalf("failed to parse %q: %v", tt.WantZoektQuery, err)
116143
}
117144

118145
if !queryEqual(got, zoektQuery) {

0 commit comments

Comments
 (0)