From 2ee1cfa830a425e1f0298597e3d484893371d46e Mon Sep 17 00:00:00 2001 From: Paul Rothrock Date: Sat, 16 May 2020 19:57:43 -0400 Subject: [PATCH] Moved get_first_url outside of filter class Signed-off-by: Paul Rothrock --- app/filter.py | 92 ++++++++++++++++++++------------------------------- app/routes.py | 8 ++--- 2 files changed, 40 insertions(+), 60 deletions(-) diff --git a/app/filter.py b/app/filter.py index 93cb812..6827f44 100644 --- a/app/filter.py +++ b/app/filter.py @@ -13,6 +13,40 @@ BLANK_B64 = '''  ''' +def get_first_link(soup): + # Replace hrefs with only the intended destination (no "utm" type tags) + for a in soup.find_all('a', href=True): + href = a['href'].replace('https://www.google.com', '') + + result_link = urlparse.urlparse(href) + query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' + + # Return the first search result URL + if 'url?q=' in href: + return filter_link_args(href) + +def filter_link_args(query_link): + parsed_link = urlparse.urlparse(query_link) + link_args = parse_qs(parsed_link.query) + safe_args = {} + + if len(link_args) == 0 and len(parsed_link) > 0: + return query_link + + for arg in link_args.keys(): + if arg in SKIP_ARGS: + continue + + safe_args[arg] = link_args[arg] + + # Remove original link query and replace with filtered args + query_link = query_link.replace(parsed_link.query, '') + if len(safe_args) > 0: + query_link = query_link + urlparse.urlencode(safe_args, doseq=True) + else: + query_link = query_link.replace('?', '') + + return query_link class Filter: def __init__(self, mobile=False, config=None, secret_key=''): @@ -58,40 +92,6 @@ class Filter: return soup - def get_first_url(self, soup): - # Replace hrefs with only the intended destination (no "utm" type tags) - for a in soup.find_all('a', href=True): - href = a['href'].replace('https://www.google.com', '') - - result_link = urlparse.urlparse(href) - query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' - - # Return the first search result URL - if 'url?q=' in href: - parsed_link = urlparse.urlparse(query_link) - link_args = parse_qs(parsed_link.query) - safe_args = {} - - if len(link_args) == 0 and len(parsed_link) > 0: - return query_link - - - for arg in link_args.keys(): - if arg in SKIP_ARGS: - continue - - safe_args[arg] = link_args[arg] - - # Remove original link query and replace with filtered args - query_link = query_link.replace(parsed_link.query, '') - if len(safe_args) > 0: - query_link = query_link + urlparse.urlencode(safe_args, doseq=True) - else: - query_link = query_link.replace('?', '') - - print(query_link) - return query_link - def remove_ads(self, soup): main_divs = soup.find('div', {'id': 'main'}) if main_divs is None: @@ -174,27 +174,7 @@ class Filter: a['href'] = new_search elif 'url?q=' in href: # Strip unneeded arguments - parsed_link = urlparse.urlparse(query_link) - link_args = parse_qs(parsed_link.query) - safe_args = {} - - if len(link_args) == 0 and len(parsed_link) > 0: - a['href'] = query_link - continue - - for arg in link_args.keys(): - if arg in SKIP_ARGS: - continue - - safe_args[arg] = link_args[arg] - - # Remove original link query and replace with filtered args - query_link = query_link.replace(parsed_link.query, '') - if len(safe_args) > 0: - query_link = query_link + urlparse.urlencode(safe_args, doseq=True) - else: - query_link = query_link.replace('?', '') - + query_link = filter_link_args(query_link) a['href'] = query_link # Add no-js option @@ -210,4 +190,4 @@ def gen_nojs(soup, link, sibling): nojs_link['style'] = 'display:block;width:100%;' nojs_link.string = 'NoJS Link: ' + nojs_link['href'] sibling.append(BeautifulSoup('


', 'html.parser')) - sibling.append(nojs_link) + sibling.append(nojs_link) \ No newline at end of file diff --git a/app/routes.py b/app/routes.py index 958efd3..2a3ea47 100644 --- a/app/routes.py +++ b/app/routes.py @@ -1,5 +1,5 @@ from app import app -from app.filter import Filter +from app.filter import Filter, get_first_link from app.models.config import Config from app.request import Request, gen_query import argparse @@ -74,9 +74,9 @@ def search(): except InvalidToken: pass - feeling_lucky = q.startswith("! ") + feeling_lucky = q.startswith('! ') - if feeling_lucky: + if feeling_lucky: # Well do you, punk? q = q[2:] user_agent = request.headers.get('User-Agent') @@ -90,7 +90,7 @@ def search(): dirty_soup = BeautifulSoup(results, 'html.parser') if feeling_lucky: - redirect_url = content_filter.get_first_url(dirty_soup) + redirect_url = get_first_link(dirty_soup) return redirect(redirect_url, 303) # Using 303 so the browser performs a GET request for the URL else: formatted_results = content_filter.clean(dirty_soup)