diff --git a/app/utils/results.py b/app/utils/results.py deleted file mode 100644 index c9c2f12..0000000 --- a/app/utils/results.py +++ /dev/null @@ -1,158 +0,0 @@ -from bs4 import BeautifulSoup -import os -import urllib.parse as urlparse -from urllib.parse import parse_qs - -# For .env reading -from dotenv import load_dotenv -load_dotenv(os.path.join(os.path.abspath(os.getcwd()), '.env')) - -SKIP_ARGS = ['ref_src', 'utm'] -SKIP_PREFIX = ['//www.', '//mobile.', '//m.'] -GOOG_STATIC = 'www.gstatic.com' -GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' -LOGO_URL = GOOG_IMG + '_desk' -BLANK_B64 = ('data:image/png;base64,' - 'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw' - 'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC') - - -# Ad keywords -BLACKLIST = [ - 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', - 'Реклама', 'Anunț', '광고', 'annons', 'Annonse', 'Iklan', '広告', 'Augl.', - 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', 'آگهی', - 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio' -] - -# Setting up ALTs site Links -SITE_ALTS = {} -def get_alt_links(): - print("Setting up alternative social media site..") - tw,yt,ig,rd = os.getenv("twitter"),os.getenv("youtube"),os.getenv("instagram"),os.getenv("reddit") - if tw != None and tw != "": - SITE_ALTS['twitter.com']=tw - else: - SITE_ALTS['twitter.com']=os.getenv('WHOOGLE_ALT_TW', 'nitter.net') - - if yt != None and yt != "": - SITE_ALTS['youtube.com']=yt - else: - SITE_ALTS['youtube.com']=os.getenv('WHOOGLE_ALT_YT', 'invalid.com') - - if ig != None and ig != "": - SITE_ALTS['instagram.com']=os.getenv("instagram") - else: - SITE_ALTS['instagram.com']=os.getenv('WHOOGLE_ALT_IG', 'bibliogram.art/u') - - if rd != None and rd != "": - SITE_ALTS['reddit.com']=os.getenv("reddit") - else: - SITE_ALTS['reddit.com']=os.getenv('WHOOGLE_ALT_RD', 'libredd.it') - -get_alt_links() - -def has_ad_content(element: str) -> bool: - """Inspects an HTML element for ad related content - - Args: - element: The HTML element to inspect - - Returns: - bool: True/False for the element containing an ad - - """ - return (element.upper() in (value.upper() for value in BLACKLIST) - or 'ⓘ' in element) - - -def get_first_link(soup: BeautifulSoup) -> str: - """Retrieves the first result link from the query response - - Args: - soup: The BeautifulSoup response body - - Returns: - str: A str link to the first result - - """ - # Replace hrefs with only the intended destination (no "utm" type tags) - for a in soup.find_all('a', href=True): - # Return the first search result URL - if 'url?q=' in a['href']: - return filter_link_args(a['href']) - - -def get_site_alt(link: str) -> str: - """Returns an alternative to a particular site, if one is configured - - Args: - link: A string result URL to check against the SITE_ALTS map - - Returns: - str: An updated (or ignored) result link - - """ - - for site_key in SITE_ALTS.keys(): - if site_key not in link: - continue - - link = link.replace(site_key, SITE_ALTS[site_key]) - break - - for prefix in SKIP_PREFIX: - link = link.replace(prefix, '//') - - return link - - -def filter_link_args(link: str) -> str: - """Filters out unnecessary URL args from a result link - - Args: - link: The string result link to check for extraneous URL params - - Returns: - str: An updated (or ignored) result link - - """ - parsed_link = urlparse.urlparse(link) - link_args = parse_qs(parsed_link.query) - safe_args = {} - - if len(link_args) == 0 and len(parsed_link) > 0: - return link - - for arg in link_args.keys(): - if arg in SKIP_ARGS: - continue - - safe_args[arg] = link_args[arg] - - # Remove original link query and replace with filtered args - link = link.replace(parsed_link.query, '') - if len(safe_args) > 0: - link = link + urlparse.urlencode(safe_args, doseq=True) - else: - link = link.replace('?', '') - - return link - - -def append_nojs(result: BeautifulSoup) -> None: - """Appends a no-Javascript alternative for a search result - - Args: - result: The search result to append a no-JS link to - - Returns: - None - - """ - nojs_link = BeautifulSoup(features='html.parser').new_tag('a') - nojs_link['href'] = '/window?location=' + result['href'] - nojs_link['style'] = 'display:block;width:100%;' - nojs_link.string = 'NoJS Link: ' + nojs_link['href'] - result.append(BeautifulSoup('


', 'html.parser')) - result.append(nojs_link)