Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New exact phrase searching feature (for HTML) #12552

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ Contributors
* Łukasz Langa -- partial support for autodoc
* Marco Buttu -- doctest extension (pyversion option)
* Martin Hans -- autodoc improvements
* Marvin Herbold -- HTML exact phrase search support
* Martin Larralde -- additional napoleon admonitions
* Martin Mahner -- nature theme
* Matthew Fernandez -- todo extension fix
Expand Down
30 changes: 24 additions & 6 deletions sphinx/themes/basic/static/searchtools.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ const _removeChildren = (element) => {
const _escapeRegExp = (string) =>
string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string

const _displayItem = (item, searchTerms, highlightTerms) => {
const _displayItem = (item, searchTerms, highlightTerms, exactSearchPhrases) => {
const docBuilder = DOCUMENTATION_OPTIONS.BUILDER;
const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX;
const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX;
Expand Down Expand Up @@ -97,6 +97,14 @@ const _displayItem = (item, searchTerms, highlightTerms) => {
fetch(requestUrl)
.then((responseData) => responseData.text())
.then((data) => {

// exclude results that don't contain exact phrases if we are searching for them
if (data) {
const lowercaseData = data.toLowerCase();
const mismatch = (s) => !lowercaseData.includes(s);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: perhaps we could/should add word boundaries around the match?

Suggested change
const mismatch = (s) => !lowercaseData.includes(s);
const mismatch = (s) => !s.match(`\b${lowercaseData}\b`);

Reasoning:

  • Could make it easier to exact-search for strings that are substrings of other phrases/words.
  • Although regex usage can introduce some overhead, there's also optimization opportunity if the matching can skip over non-word boundary match positions.

if (exactSearchPhrases.some(mismatch)) return;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps every would be better than some here?

If searching for two phrases, it could be frustrating not to find any results at all, despite the fact that some pages do include one of the phrases.

}

if (data)
listItem.appendChild(
Search.makeSearchSummary(data, searchTerms, anchor)
Expand Down Expand Up @@ -124,13 +132,14 @@ const _displayNextItem = (
resultCount,
searchTerms,
highlightTerms,
exactSearchPhrases,
) => {
// results left, load the summary and display it
// this is intended to be dynamic (don't sub resultsCount)
if (results.length) {
_displayItem(results.pop(), searchTerms, highlightTerms);
_displayItem(results.pop(), searchTerms, highlightTerms, exactSearchPhrases);
setTimeout(
() => _displayNextItem(results, resultCount, searchTerms, highlightTerms),
() => _displayNextItem(results, resultCount, searchTerms, highlightTerms, exactSearchPhrases),
5
);
}
Expand Down Expand Up @@ -275,6 +284,15 @@ const Search = {
const excludedTerms = new Set();
const highlightTerms = new Set();
const objectTerms = new Set(splitQuery(query.toLowerCase().trim()));

// prepare the exact phrase search feature
const exactSearchPhrases = [];
const exactSearchPattern = /"([^"]+?)"/g;
let match;
while (match = exactSearchPattern.exec(query)) {
exactSearchPhrases.push(match[1].toLowerCase());
}

splitQuery(query.trim()).forEach((queryTerm) => {
const queryTermLower = queryTerm.toLowerCase();

Expand Down Expand Up @@ -304,7 +322,7 @@ const Search = {
// console.info("required: ", [...searchTerms]);
// console.info("excluded: ", [...excludedTerms]);

return [query, searchTerms, excludedTerms, highlightTerms, objectTerms];
return [query, searchTerms, excludedTerms, highlightTerms, objectTerms, exactSearchPhrases];
},

/**
Expand Down Expand Up @@ -403,15 +421,15 @@ const Search = {
},

query: (query) => {
const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query);
const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms, exactSearchPhrases] = Search._parseQuery(query);
const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms);

// for debugging
//Search.lastresults = results.slice(); // a copy
// console.info("search results:", Search.lastresults);

// print the results
_displayNextItem(results, results.length, searchTerms, highlightTerms);
_displayNextItem(results, results.length, searchTerms, highlightTerms, exactSearchPhrases);
},

/**
Expand Down