From 9f435bf8fefd1319b69ad46b967ba914df393d6f Mon Sep 17 00:00:00 2001
From: Ben Busby <33362396+benbusby@users.noreply.github.com>
Date: Thu, 28 May 2020 18:14:10 -0600
Subject: [PATCH] Major refactor of requests and session management
- Switches from pycurl to requests library
- Allows for less janky decoding, especially with non-latin character
sets
- Adds session level management of user configs
- Allows for each session to set its own config (people are probably
going to complain about this, though not sure if it'll be the same
number of people who are upset that their friends/family have to share
their config)
- Updates key gen/regen to more aggressively swap out keys after each
request
---
.gitignore | 1 +
app/__init__.py | 13 +++-
app/filter.py | 139 +++++++++++++++++++----------------
app/request.py | 30 +++-----
app/routes.py | 129 +++++++++++++++-----------------
app/static/config/.gitignore | 1 +
app/templates/header.html | 4 +-
app/templates/index.html | 10 +--
app/utils/__init__.py | 0
app/utils/misc.py | 20 +++++
app/utils/routing_utils.py | 69 +++++++++++++++++
requirements.txt | 3 +-
test/test_misc.py | 36 +++++++++
test/test_results.py | 6 +-
test/test_routes.py | 6 +-
15 files changed, 302 insertions(+), 165 deletions(-)
create mode 100644 app/static/config/.gitignore
create mode 100644 app/utils/__init__.py
create mode 100644 app/utils/misc.py
create mode 100644 app/utils/routing_utils.py
create mode 100644 test/test_misc.py
diff --git a/.gitignore b/.gitignore
index 20747c7..20307d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ __pycache__/
*.pem
config.json
test/static
+flask_session/
# pip stuff
build/
diff --git a/app/__init__.py b/app/__init__.py
index 4b78a8d..4b0739b 100644
--- a/app/__init__.py
+++ b/app/__init__.py
@@ -1,12 +1,21 @@
+from app.utils.misc import generate_user_keys
from cryptography.fernet import Fernet
from flask import Flask
+from flask_session import Session
import os
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
-app.secret_key = Fernet.generate_key()
+app.user_elements = {}
+app.config['SECRET_KEY'] = os.urandom(128)
+app.config['SESSION_TYPE'] = 'filesystem'
app.config['VERSION_NUMBER'] = '0.1.4'
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
-app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json'
+app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER'] + '/config')
+app.config['SESSION_FILE_DIR'] = app.config['CONFIG_PATH']
+app.config['SESSION_COOKIE_SECURE'] = True
+
+sess = Session()
+sess.init_app(app)
from app import routes
diff --git a/app/filter.py b/app/filter.py
index 5ff46b7..a5bfb19 100644
--- a/app/filter.py
+++ b/app/filter.py
@@ -17,14 +17,9 @@ 
def get_first_link(soup):
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
- href = a['href'].replace('https://www.google.com', '')
-
- result_link = urlparse.urlparse(href)
- query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
-
# Return the first search result URL
- if 'url?q=' in href:
- return filter_link_args(href)
+ if 'url?q=' in a['href']:
+ return filter_link_args(a['href'])
def filter_link_args(query_link):
@@ -52,7 +47,7 @@ def filter_link_args(query_link):
class Filter:
- def __init__(self, mobile=False, config=None, secret_key=''):
+ def __init__(self, user_keys: dict, mobile=False, config=None):
if config is None:
config = {}
@@ -61,11 +56,16 @@ class Filter:
self.nojs = config['nojs'] if 'nojs' in config else False
self.new_tab = config['new_tab'] if 'new_tab' in config else False
self.mobile = mobile
- self.secret_key = secret_key
+ self.user_keys = user_keys
+ self._elements = 0
def __getitem__(self, name):
return getattr(self, name)
+ @property
+ def elements(self):
+ return self._elements
+
def reskin(self, page):
# Aesthetic only re-skinning
page = page.replace('>G<', '>Wh<')
@@ -76,11 +76,29 @@ class Filter:
return page
+ def encrypt_path(self, msg, is_element=False):
+ # Encrypts path to avoid plaintext results in logs
+ if is_element:
+ # Element paths are tracked differently in order for the element key to be regenerated
+ # once all elements have been loaded
+ enc_path = Fernet(self.user_keys['element_key']).encrypt(msg.encode()).decode()
+ self._elements += 1
+ return enc_path
+
+ return Fernet(self.user_keys['text_key']).encrypt(msg.encode()).decode()
+
def clean(self, soup):
self.remove_ads(soup)
- self.update_image_paths(soup)
self.update_styling(soup)
- self.update_links(soup)
+
+ for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]:
+ self.update_element_src(img, 'image/png')
+
+ for audio in [_ for _ in soup.find_all('audio') if 'src' in _.attrs]:
+ self.update_element_src(audio, 'audio/mpeg')
+
+ for link in soup.find_all('a', href=True):
+ self.update_link(link)
input_form = soup.find('form')
if input_form is not None:
@@ -116,25 +134,24 @@ class Filter:
for div in ad_divs:
div.decompose()
- def update_image_paths(self, soup):
- for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]:
- img_src = img['src']
- if img_src.startswith('//'):
- img_src = 'https:' + img_src
- elif img_src.startswith(LOGO_URL):
- # Re-brand with Whoogle logo
- img['src'] = '/static/img/logo.png'
- img['style'] = 'height:40px;width:162px'
- continue
- elif img_src.startswith(GOOG_IMG):
- img['src'] = BLANK_B64
- continue
+ def update_element_src(self, element, mimetype):
+ element_src = element['src']
+ if element_src.startswith('//'):
+ element_src = 'https:' + element_src
+ elif element_src.startswith(LOGO_URL):
+ # Re-brand with Whoogle logo
+ element['src'] = '/static/img/logo.png'
+ element['style'] = 'height:40px;width:162px'
+ return
+ elif element_src.startswith(GOOG_IMG):
+ element['src'] = BLANK_B64
+ return
- enc_src = Fernet(self.secret_key).encrypt(img_src.encode())
- img['src'] = '/tmp?image_url=' + enc_src.decode()
- # TODO: Non-mobile image results link to website instead of image
- # if not self.mobile:
- # img.append(BeautifulSoup(FULL_RES_IMG.format(img_src), 'html.parser'))
+ element['src'] = '/element?url=' + self.encrypt_path(element_src, is_element=True) + \
+ '&type=' + urlparse.quote(mimetype)
+ # TODO: Non-mobile image results link to website instead of image
+ # if not self.mobile:
+ # img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser'))
def update_styling(self, soup):
# Remove unnecessary button(s)
@@ -170,45 +187,43 @@ class Filter:
for href_element in soup.findAll('a'):
href_element['style'] = 'color: white' if href_element['href'].startswith('/search') else ''
- def update_links(self, soup):
- # Replace hrefs with only the intended destination (no "utm" type tags)
- for a in soup.find_all('a', href=True):
- href = a['href'].replace('https://www.google.com', '')
- if '/advanced_search' in href:
- a.decompose()
- continue
- elif self.new_tab:
- a['target'] = '_blank'
+ def update_link(self, link):
+ # Replace href with only the intended destination (no "utm" type tags)
+ href = link['href'].replace('https://www.google.com', '')
+ if '/advanced_search' in href:
+ link.decompose()
+ return
+ elif self.new_tab:
+ link['target'] = '_blank'
- result_link = urlparse.urlparse(href)
- query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
+ result_link = urlparse.urlparse(href)
+ query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
- if query_link.startswith('/'):
- a['href'] = 'https://google.com' + query_link
- elif '/search?q=' in href:
- enc_result = Fernet(self.secret_key).encrypt(query_link.encode())
- new_search = '/search?q=' + enc_result.decode()
+ if query_link.startswith('/'):
+ link['href'] = 'https://google.com' + query_link
+ elif '/search?q=' in href:
+ new_search = '/search?q=' + self.encrypt_path(query_link)
- query_params = parse_qs(urlparse.urlparse(href).query)
- for param in VALID_PARAMS:
- param_val = query_params[param][0] if param in query_params else ''
- new_search += '&' + param + '=' + param_val
- a['href'] = new_search
- elif 'url?q=' in href:
- # Strip unneeded arguments
- a['href'] = filter_link_args(query_link)
+ query_params = parse_qs(urlparse.urlparse(href).query)
+ for param in VALID_PARAMS:
+ param_val = query_params[param][0] if param in query_params else ''
+ new_search += '&' + param + '=' + param_val
+ link['href'] = new_search
+ elif 'url?q=' in href:
+ # Strip unneeded arguments
+ link['href'] = filter_link_args(query_link)
- # Add no-js option
- if self.nojs:
- gen_nojs(soup, a['href'], a)
- else:
- a['href'] = href
+ # Add no-js option
+ if self.nojs:
+ gen_nojs(link)
+ else:
+ link['href'] = href
-def gen_nojs(soup, link, sibling):
- nojs_link = soup.new_tag('a')
- nojs_link['href'] = '/window?location=' + link
+def gen_nojs(sibling):
+ nojs_link = BeautifulSoup().new_tag('a')
+ nojs_link['href'] = '/window?location=' + sibling['href']
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
sibling.append(BeautifulSoup('