diff --git a/app/utils/results.py b/app/utils/results.py index 8141074..e38993f 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -2,6 +2,7 @@ from bs4 import BeautifulSoup import os import urllib.parse as urlparse from urllib.parse import parse_qs +import re SKIP_ARGS = ['ref_src', 'utm'] @@ -34,6 +35,44 @@ SITE_ALTS = { } +def bold_search_terms(response: str, query: str) -> BeautifulSoup: + """Wraps all search terms in bold tags (). If any terms are wrapped + in quotes, only that exact phrase will be made bold. + + Args: + response: The initial response body for the query + query: The original search query + + Returns: + BeautifulSoup: modified soup object with bold items + """ + response = BeautifulSoup(response, 'html.parser') + + def replace_any_case(element: NavigableString, target_word: str) -> None: + # Replace all instances of the word, but maintaining the same case in + # the replacement + element.replace_with( + element.replace( + target_word.lower(), f'{target_word.lower()}' + ).replace( + target_word.capitalize(), f'{target_word.capitalize()}' + ).replace( + target_word.title(), f'{target_word.title()}' + ).replace( + target_word.upper(), f'{target_word.upper()}' + ) + ) + + # Split all words out of query, grouping the ones wrapped in quotes + for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query): + word = re.sub(r'[^A-Za-z0-9 ]+', '', word) + target = response.find_all( + text=re.compile(r'' + re.escape(word), re.I)) + for nav_str in target: + replace_any_case(nav_str, word) + + return response + def has_ad_content(element: str) -> bool: """Inspects an HTML element for ad related content