Add bold function to results.py
This commit is contained in:
parent
196ff3254e
commit
fd482c69fb
|
@ -2,6 +2,7 @@ from bs4 import BeautifulSoup
|
||||||
import os
|
import os
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
from urllib.parse import parse_qs
|
from urllib.parse import parse_qs
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
SKIP_ARGS = ['ref_src', 'utm']
|
SKIP_ARGS = ['ref_src', 'utm']
|
||||||
|
@ -34,6 +35,44 @@ SITE_ALTS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def bold_search_terms(response: str, query: str) -> BeautifulSoup:
|
||||||
|
"""Wraps all search terms in bold tags (<b>). If any terms are wrapped
|
||||||
|
in quotes, only that exact phrase will be made bold.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
response: The initial response body for the query
|
||||||
|
query: The original search query
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
BeautifulSoup: modified soup object with bold items
|
||||||
|
"""
|
||||||
|
response = BeautifulSoup(response, 'html.parser')
|
||||||
|
|
||||||
|
def replace_any_case(element: NavigableString, target_word: str) -> None:
|
||||||
|
# Replace all instances of the word, but maintaining the same case in
|
||||||
|
# the replacement
|
||||||
|
element.replace_with(
|
||||||
|
element.replace(
|
||||||
|
target_word.lower(), f'<b>{target_word.lower()}</b>'
|
||||||
|
).replace(
|
||||||
|
target_word.capitalize(), f'<b>{target_word.capitalize()}</b>'
|
||||||
|
).replace(
|
||||||
|
target_word.title(), f'<b>{target_word.title()}</b>'
|
||||||
|
).replace(
|
||||||
|
target_word.upper(), f'<b>{target_word.upper()}</b>'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Split all words out of query, grouping the ones wrapped in quotes
|
||||||
|
for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query):
|
||||||
|
word = re.sub(r'[^A-Za-z0-9 ]+', '', word)
|
||||||
|
target = response.find_all(
|
||||||
|
text=re.compile(r'' + re.escape(word), re.I))
|
||||||
|
for nav_str in target:
|
||||||
|
replace_any_case(nav_str, word)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
def has_ad_content(element: str) -> bool:
|
def has_ad_content(element: str) -> bool:
|
||||||
"""Inspects an HTML element for ad related content
|
"""Inspects an HTML element for ad related content
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user