Moved get_first_url outside of filter class
Signed-off-by: Paul Rothrock <paul@movetoiceland.com>
This commit is contained in:
parent
e5b24ac102
commit
2ee1cfa830
|
@ -13,6 +13,40 @@ BLANK_B64 = '''
|
|||
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC
|
||||
'''
|
||||
|
||||
def get_first_link(soup):
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
href = a['href'].replace('https://www.google.com', '')
|
||||
|
||||
result_link = urlparse.urlparse(href)
|
||||
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
|
||||
|
||||
# Return the first search result URL
|
||||
if 'url?q=' in href:
|
||||
return filter_link_args(href)
|
||||
|
||||
def filter_link_args(query_link):
|
||||
parsed_link = urlparse.urlparse(query_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
if len(link_args) == 0 and len(parsed_link) > 0:
|
||||
return query_link
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
query_link = query_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 0:
|
||||
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
|
||||
else:
|
||||
query_link = query_link.replace('?', '')
|
||||
|
||||
return query_link
|
||||
|
||||
class Filter:
|
||||
def __init__(self, mobile=False, config=None, secret_key=''):
|
||||
|
@ -58,40 +92,6 @@ class Filter:
|
|||
|
||||
return soup
|
||||
|
||||
def get_first_url(self, soup):
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
href = a['href'].replace('https://www.google.com', '')
|
||||
|
||||
result_link = urlparse.urlparse(href)
|
||||
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
|
||||
|
||||
# Return the first search result URL
|
||||
if 'url?q=' in href:
|
||||
parsed_link = urlparse.urlparse(query_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
if len(link_args) == 0 and len(parsed_link) > 0:
|
||||
return query_link
|
||||
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
query_link = query_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 0:
|
||||
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
|
||||
else:
|
||||
query_link = query_link.replace('?', '')
|
||||
|
||||
print(query_link)
|
||||
return query_link
|
||||
|
||||
def remove_ads(self, soup):
|
||||
main_divs = soup.find('div', {'id': 'main'})
|
||||
if main_divs is None:
|
||||
|
@ -174,27 +174,7 @@ class Filter:
|
|||
a['href'] = new_search
|
||||
elif 'url?q=' in href:
|
||||
# Strip unneeded arguments
|
||||
parsed_link = urlparse.urlparse(query_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
if len(link_args) == 0 and len(parsed_link) > 0:
|
||||
a['href'] = query_link
|
||||
continue
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
query_link = query_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 0:
|
||||
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
|
||||
else:
|
||||
query_link = query_link.replace('?', '')
|
||||
|
||||
query_link = filter_link_args(query_link)
|
||||
a['href'] = query_link
|
||||
|
||||
# Add no-js option
|
||||
|
@ -210,4 +190,4 @@ def gen_nojs(soup, link, sibling):
|
|||
nojs_link['style'] = 'display:block;width:100%;'
|
||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||
sibling.append(nojs_link)
|
||||
sibling.append(nojs_link)
|
|
@ -1,5 +1,5 @@
|
|||
from app import app
|
||||
from app.filter import Filter
|
||||
from app.filter import Filter, get_first_link
|
||||
from app.models.config import Config
|
||||
from app.request import Request, gen_query
|
||||
import argparse
|
||||
|
@ -74,9 +74,9 @@ def search():
|
|||
except InvalidToken:
|
||||
pass
|
||||
|
||||
feeling_lucky = q.startswith("! ")
|
||||
feeling_lucky = q.startswith('! ')
|
||||
|
||||
if feeling_lucky:
|
||||
if feeling_lucky: # Well do you, punk?
|
||||
q = q[2:]
|
||||
|
||||
user_agent = request.headers.get('User-Agent')
|
||||
|
@ -90,7 +90,7 @@ def search():
|
|||
dirty_soup = BeautifulSoup(results, 'html.parser')
|
||||
|
||||
if feeling_lucky:
|
||||
redirect_url = content_filter.get_first_url(dirty_soup)
|
||||
redirect_url = get_first_link(dirty_soup)
|
||||
return redirect(redirect_url, 303) # Using 303 so the browser performs a GET request for the URL
|
||||
else:
|
||||
formatted_results = content_filter.clean(dirty_soup)
|
||||
|
|
Loading…
Reference in New Issue
Block a user