Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0449e48
Fix that negated search words are filtered out in the splitQuery.
cglukas Sep 13, 2025
dbfbd46
Fix that negated search words create a SyntaxError: Invalid or unexpe…
cglukas Sep 14, 2025
7a8d46e
Fix that excluded words would abort the entire search if matched in o…
cglukas Sep 15, 2025
cfdfda4
Fix format and add contribution information.
cglukas Sep 15, 2025
80d6ad1
Fix long line.
cglukas Sep 15, 2025
9b06fd5
Add a dedicated fixture for testing excluded words in searches.
cglukas Sep 19, 2025
11d2e2e
Is the pipeline flaky?
cglukas Sep 19, 2025
6cc4055
Add missing file.
cglukas Sep 19, 2025
ffc193b
Remove debug change.
cglukas Sep 19, 2025
7b79f42
Merge branch 'master' into work/fix-excluded-search-terms
AA-Turner Oct 5, 2025
a4764b1
Reduce line length to fix doclinter.
cglukas Oct 7, 2025
234fedc
Merge remote-tracking branch 'origin/master' into work/fix-excluded-s…
cglukas Oct 7, 2025
8e41004
Add regression test for TypeError in excluded words comparison.
cglukas Nov 2, 2025
da297e5
Fix wording of test.
cglukas Nov 2, 2025
1fd12ab
Update searchindex by running `python .\utils\generate_js_fixtures.py`
cglukas Nov 2, 2025
577098d
Simplify regex code
cglukas Nov 2, 2025
cb1b3cf
Merge branch 'master' into work/fix-excluded-search-terms
cglukas Nov 2, 2025
a2139df
Merge branch 'refs/heads/master' into work/fix-excluded-search-terms
cglukas Nov 9, 2025
8a21a56
Fxi searchindex.js of fixtures by rerunning generation with the newes…
cglukas Nov 9, 2025
f1e5e8b
Test commit to re-trigger the actions on github.
cglukas Nov 9, 2025
76b375e
Revert "Test commit to re-trigger the actions on github."
cglukas Nov 9, 2025
35232a3
Test commit to re-trigger the actions on github.
cglukas Nov 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ Contributors
* Lars Hupfeldt Nielsen - OpenSSL FIPS mode md5 bug fix
* Louis Maddox -- better docstrings
* Łukasz Langa -- partial support for autodoc
* Lukas Wieg -- JavaScript search improvement
* Marco Buttu -- doctest extension (pyversion option)
* Mark Ostroth -- semantic HTML contributions
* Martin Hans -- autodoc improvements
Expand Down
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ Bugs fixed
for objects documented as ``:py:data:`` to be hyperlinked in function signatures.
* #13858: doctest: doctest blocks are now correctly added to a group defined by the
configuration variable ``doctest_test_doctest_blocks``.
* #13892: search: support word exclusion in the search by prefixing words with "-".


Testing
Expand Down
46 changes: 31 additions & 15 deletions sphinx/themes/basic/static/searchtools.js
Original file line number Diff line number Diff line change
Expand Up @@ -171,15 +171,28 @@ const _orderResultsByScoreThenName = (a, b) => {
* Default splitQuery function. Can be overridden in ``sphinx.search`` with a
* custom function per language.
*
* The regular expression works by splitting the string on consecutive characters
* that are not Unicode letters, numbers, underscores, or emoji characters.
* This is the same as ``\W+`` in Python, preserving the surrogate pair area.
* The `consecutiveLetters` regular expression works by matching consecutive characters
* that are Unicode letters, numbers, underscores, or emoji characters.
*
* The `searchWords` regular expression works by matching a word like structure
* that matches the `consecutiveLetters` with or without a leading hyphen '-' which is
* used to exclude search terms later on.
*/
if (typeof splitQuery === "undefined") {
var splitQuery = (query) =>
query
.split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu)
.filter((term) => term); // remove remaining empty strings
var splitQuery = (query) => {
const consecutiveLetters =
/[\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu;
const searchWords = new RegExp(
`(${consecutiveLetters.source})|\\s(-${consecutiveLetters.source})`,
"gu",
);
return Array.from(
query
.matchAll(searchWords)
.map((results) => results[1] ?? results[2]) // select one of the possible groups (e.g. "word" or "-word").
.filter((term) => term), // remove remaining empty strings.
);
};
}

/**
Expand Down Expand Up @@ -627,15 +640,18 @@ const Search = {

// ensure that none of the excluded terms is in the search result
if (
[...excludedTerms].some(
(term) =>
terms[term] === file
|| titleTerms[term] === file
|| (terms[term] || []).includes(file)
|| (titleTerms[term] || []).includes(file),
)
[...excludedTerms].some((excludedTerm) => {
// Both mappings will contain either a single integer or a list of integers.
// Converting them to lists makes the comparison more readable.
let excludedTermFiles = [].concat(terms[excludedTerm]);
let excludedTitleFiles = [].concat(titleTerms[excludedTerm]);
return (
excludedTermFiles.includes(file)
|| excludedTitleFiles.includes(file)
);
})
Comment on lines +633 to +642
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
[...excludedTerms].some((excludedTerm) => {
// Both mappings will contain either a single integer or a list of integers.
// Converting them to lists makes the comparison more readable.
let excludedTermFiles = [].concat(terms[excludedTerm]);
let excludedTitleFiles = [].concat(titleTerms[excludedTerm]);
return (
excludedTermFiles.includes(file)
|| excludedTitleFiles.includes(file)
);
})
[...excludedTerms].some(
(term) =>
terms[term] === file
|| titleTerms[term] === file
|| (terms[term] || []).includes(file)
|| (titleTerms[term] || []).includes(file),
)

Do we need this set of excludedTerms filtering changes? I would suggest not modifying these lines unless strictly necessary. Tests continue to pass when I revert this.

I acknowledge that the break to continue fixup is important though.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I'll need to add another test then. The last conditions raise an error in some cases. I'll add it soon.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @jayaddison,
I added the "should exclude results where the file index is above 0." test case which raises an TypeError with the old code. It's very specific for the old implementation.
The error occurs every time the terms[term] statement is a positive integer and not equal to the file variable. I think this bug was introduced when the datastructures were changed from actual file paths/names to file ids.
The strings usually evaluate to false in this condition, but integers above 0 evaluate to true.

)
break;
continue;

// select one (max) score for the file.
const score = Math.max(...wordList.map((w) => scoreMap.get(file).get(w)));
Expand Down
36 changes: 36 additions & 0 deletions tests/js/searchtools.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,32 @@ describe("Basic html theme search", function () {
expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits);
});

it("should find results when excluded terms are used", function () {
// This fixture already existed and has the right data to make this test work.
// Replace with another matching fixture if necessary.
eval(loadFixture("titles/searchindex.js"));

// It's important that the searchterm is included in multiple pages while the
// excluded term is not included in all pages.
// In this case the ``for`` is included in the two existing pages while the ``ask``
// is only included in one page.
[_searchQuery, searchterms, excluded, ..._remainingItems] =
Search._parseQuery("for -ask");

// prettier-ignore
hits = [[
'relevance',
'Relevance',
'',
null,
2,
'relevance.rst',
'text'
]];
expect(excluded).toEqual(new Set(["ask"]));
expect(Search.performTermsSearch(searchterms, excluded)).toEqual(hits);
});

it('should partially-match "sphinx" when in title index', function () {
eval(loadFixture("partial/searchindex.js"));

Expand Down Expand Up @@ -295,6 +321,16 @@ describe("splitQuery regression tests", () => {
expect(parts).toEqual(["Pin", "Code"]);
});

it("can keep underscores in words", () => {
const parts = splitQuery("python_function");
expect(parts).toEqual(["python_function"]);
});

it("can maintain negated search words", () => {
const parts = splitQuery("Pin -Code");
expect(parts).toEqual(["Pin", "-Code"]);
});

it("can split Chinese characters", () => {
const parts = splitQuery("Hello from 中国 上海");
expect(parts).toEqual(["Hello", "from", "中国", "上海"]);
Expand Down
Loading