Add bold function to results.py

This commit is contained in:
DUO Labs 2021-10-26 07:33:43 -04:00 committed by GitHub
parent 196ff3254e
commit fd482c69fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,6 +2,7 @@ from bs4 import BeautifulSoup
import os
import urllib.parse as urlparse
from urllib.parse import parse_qs
import re
SKIP_ARGS = ['ref_src', 'utm']
@ -34,6 +35,44 @@ SITE_ALTS = {
}
def bold_search_terms(response: str, query: str) -> BeautifulSoup:
"""Wraps all search terms in bold tags (<b>). If any terms are wrapped
in quotes, only that exact phrase will be made bold.
Args:
response: The initial response body for the query
query: The original search query
Returns:
BeautifulSoup: modified soup object with bold items
"""
response = BeautifulSoup(response, 'html.parser')
def replace_any_case(element: NavigableString, target_word: str) -> None:
# Replace all instances of the word, but maintaining the same case in
# the replacement
element.replace_with(
element.replace(
target_word.lower(), f'<b>{target_word.lower()}</b>'
).replace(
target_word.capitalize(), f'<b>{target_word.capitalize()}</b>'
).replace(
target_word.title(), f'<b>{target_word.title()}</b>'
).replace(
target_word.upper(), f'<b>{target_word.upper()}</b>'
)
)
# Split all words out of query, grouping the ones wrapped in quotes
for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query):
word = re.sub(r'[^A-Za-z0-9 ]+', '', word)
target = response.find_all(
text=re.compile(r'' + re.escape(word), re.I))
for nav_str in target:
replace_any_case(nav_str, word)
return response
def has_ad_content(element: str) -> bool:
"""Inspects an HTML element for ad related content