updating results.py for social-media-site-using-.env
This commit is contained in:
parent
1a1782e3a0
commit
99e60b2f1e
|
@ -3,6 +3,10 @@ import os
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
from urllib.parse import parse_qs
|
from urllib.parse import parse_qs
|
||||||
|
|
||||||
|
# For .env reading
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv(os.path.join(os.path.abspath(os.getcwd()), '.env'))
|
||||||
|
|
||||||
SKIP_ARGS = ['ref_src', 'utm']
|
SKIP_ARGS = ['ref_src', 'utm']
|
||||||
SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
|
SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
|
||||||
GOOG_STATIC = 'www.gstatic.com'
|
GOOG_STATIC = 'www.gstatic.com'
|
||||||
|
@ -12,6 +16,7 @@ BLANK_B64 = ('data:image/png;base64,'
|
||||||
'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw'
|
'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw'
|
||||||
'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC')
|
'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC')
|
||||||
|
|
||||||
|
|
||||||
# Ad keywords
|
# Ad keywords
|
||||||
BLACKLIST = [
|
BLACKLIST = [
|
||||||
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama',
|
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama',
|
||||||
|
@ -20,20 +25,42 @@ BLACKLIST = [
|
||||||
'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio'
|
'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio'
|
||||||
]
|
]
|
||||||
|
|
||||||
SITE_ALTS = {
|
# Setting up ALTs site Links
|
||||||
'twitter.com': os.getenv('WHOOGLE_ALT_TW', 'nitter.net'),
|
SITE_ALTS = {}
|
||||||
'youtube.com': os.getenv('WHOOGLE_ALT_YT', 'invidious.snopyta.org'),
|
def get_alt_links():
|
||||||
'instagram.com': os.getenv('WHOOGLE_ALT_IG', 'bibliogram.art/u'),
|
print("Setting up alternative social media site..")
|
||||||
'reddit.com': os.getenv('WHOOGLE_ALT_RD', 'libredd.it')
|
tw,yt,ig,rd = os.getenv("twitter"),os.getenv("youtube"),os.getenv("instagram"),os.getenv("reddit")
|
||||||
}
|
if tw != None and tw != "":
|
||||||
|
SITE_ALTS['twitter.com']=tw
|
||||||
|
else:
|
||||||
|
SITE_ALTS['twitter.com']=os.getenv('WHOOGLE_ALT_TW', 'nitter.net')
|
||||||
|
|
||||||
|
if yt != None and yt != "":
|
||||||
|
SITE_ALTS['youtube.com']=yt
|
||||||
|
else:
|
||||||
|
SITE_ALTS['youtube.com']=os.getenv('WHOOGLE_ALT_YT', 'invalid.com')
|
||||||
|
|
||||||
|
if ig != None and ig != "":
|
||||||
|
SITE_ALTS['instagram.com']=os.getenv("instagram")
|
||||||
|
else:
|
||||||
|
SITE_ALTS['instagram.com']=os.getenv('WHOOGLE_ALT_IG', 'bibliogram.art/u')
|
||||||
|
|
||||||
|
if rd != None and rd != "":
|
||||||
|
SITE_ALTS['reddit.com']=os.getenv("reddit")
|
||||||
|
else:
|
||||||
|
SITE_ALTS['reddit.com']=os.getenv('WHOOGLE_ALT_RD', 'libredd.it')
|
||||||
|
|
||||||
|
get_alt_links()
|
||||||
|
|
||||||
def has_ad_content(element: str) -> bool:
|
def has_ad_content(element: str) -> bool:
|
||||||
"""Inspects an HTML element for ad related content
|
"""Inspects an HTML element for ad related content
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
element: The HTML element to inspect
|
element: The HTML element to inspect
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
bool: True/False for the element containing an ad
|
bool: True/False for the element containing an ad
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return (element.upper() in (value.upper() for value in BLACKLIST)
|
return (element.upper() in (value.upper() for value in BLACKLIST)
|
||||||
or 'ⓘ' in element)
|
or 'ⓘ' in element)
|
||||||
|
@ -41,10 +68,13 @@ def has_ad_content(element: str) -> bool:
|
||||||
|
|
||||||
def get_first_link(soup: BeautifulSoup) -> str:
|
def get_first_link(soup: BeautifulSoup) -> str:
|
||||||
"""Retrieves the first result link from the query response
|
"""Retrieves the first result link from the query response
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
soup: The BeautifulSoup response body
|
soup: The BeautifulSoup response body
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: A str link to the first result
|
str: A str link to the first result
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||||
for a in soup.find_all('a', href=True):
|
for a in soup.find_all('a', href=True):
|
||||||
|
@ -55,11 +85,15 @@ def get_first_link(soup: BeautifulSoup) -> str:
|
||||||
|
|
||||||
def get_site_alt(link: str) -> str:
|
def get_site_alt(link: str) -> str:
|
||||||
"""Returns an alternative to a particular site, if one is configured
|
"""Returns an alternative to a particular site, if one is configured
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
link: A string result URL to check against the SITE_ALTS map
|
link: A string result URL to check against the SITE_ALTS map
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: An updated (or ignored) result link
|
str: An updated (or ignored) result link
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
for site_key in SITE_ALTS.keys():
|
for site_key in SITE_ALTS.keys():
|
||||||
if site_key not in link:
|
if site_key not in link:
|
||||||
continue
|
continue
|
||||||
|
@ -75,10 +109,13 @@ def get_site_alt(link: str) -> str:
|
||||||
|
|
||||||
def filter_link_args(link: str) -> str:
|
def filter_link_args(link: str) -> str:
|
||||||
"""Filters out unnecessary URL args from a result link
|
"""Filters out unnecessary URL args from a result link
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
link: The string result link to check for extraneous URL params
|
link: The string result link to check for extraneous URL params
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: An updated (or ignored) result link
|
str: An updated (or ignored) result link
|
||||||
|
|
||||||
"""
|
"""
|
||||||
parsed_link = urlparse.urlparse(link)
|
parsed_link = urlparse.urlparse(link)
|
||||||
link_args = parse_qs(parsed_link.query)
|
link_args = parse_qs(parsed_link.query)
|
||||||
|
@ -105,10 +142,13 @@ def filter_link_args(link: str) -> str:
|
||||||
|
|
||||||
def append_nojs(result: BeautifulSoup) -> None:
|
def append_nojs(result: BeautifulSoup) -> None:
|
||||||
"""Appends a no-Javascript alternative for a search result
|
"""Appends a no-Javascript alternative for a search result
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
result: The search result to append a no-JS link to
|
result: The search result to append a no-JS link to
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
None
|
None
|
||||||
|
|
||||||
"""
|
"""
|
||||||
nojs_link = BeautifulSoup(features='html.parser').new_tag('a')
|
nojs_link = BeautifulSoup(features='html.parser').new_tag('a')
|
||||||
nojs_link['href'] = '/window?location=' + result['href']
|
nojs_link['href'] = '/window?location=' + result['href']
|
||||||
|
|
Loading…
Reference in New Issue
Block a user