From c51dad752950c01c6e8dd2a3a918c4193e564389 Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Sun, 25 Oct 2020 21:14:20 -0400 Subject: [PATCH] Add check for Tor socket on init, improve Tor error handling Initializing the app sends a heartbeat request to Tor to check for availability, and updates the home page config options accordingly. This heartbeat is sent on every request, to ensure Tor support can be reconfigured without restarting the entire app. If Tor support is enabled, and a subsequent request fails, then a new TorError exception is raised, and the Tor feature is disabled until a valid connection is restored. The max attempts has been updated to 10, since 5 seemed a bit too low for how quickly the attempts go by. --- .travis.yml | 4 +- app/__init__.py | 8 +++- app/request.py | 96 +++++++++++++++++++++++++++++++------- app/routes.py | 25 ++++++---- app/templates/index.html | 10 +++- app/utils/routing_utils.py | 9 ++-- 6 files changed, 112 insertions(+), 40 deletions(-) diff --git a/.travis.yml b/.travis.yml index 800f560..110c0ea 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,7 @@ language: python python: 3.6 before_install: - - sudo apt-get -y install libgnutls28-dev tor - - sudo cat rc/torrc > /etc/tor/torrc - - sudo systemctl tor start + - sudo apt-get -y install libgnutls28-dev install: - pip install -r requirements.txt script: diff --git a/app/__init__.py b/app/__init__.py index 820edb0..1c84ec3 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,3 +1,4 @@ +from app.request import send_tor_signal from app.utils.session_utils import generate_user_keys from app.utils.gen_ddg_bangs import gen_bangs_json from flask import Flask @@ -25,11 +26,14 @@ if not os.path.exists(app.config['CONFIG_PATH']): if not os.path.exists(app.config['SESSION_FILE_DIR']): os.makedirs(app.config['SESSION_FILE_DIR']) -# (Re)generate DDG bang filter, and create path if it doesn't exist yet +# Generate DDG bang filter, and create path if it doesn't exist yet if not os.path.exists(app.config['BANG_PATH']): os.makedirs(app.config['BANG_PATH']) -gen_bangs_json(app.config['BANG_FILE']) + gen_bangs_json(app.config['BANG_FILE']) Session(app) +# Attempt to acquire tor identity, to determine if Tor config is available +send_tor_signal() + from app import routes diff --git a/app/request.py b/app/request.py index 5f9abcd..c26e9bf 100644 --- a/app/request.py +++ b/app/request.py @@ -2,10 +2,10 @@ from app.models.config import Config from lxml import etree import random import requests -from requests import Response +from requests import Response, ConnectionError import urllib.parse as urlparse import os -from stem import Signal +from stem import Signal, SocketError from stem.control import Controller # Core Google search URLs @@ -19,13 +19,36 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr'] -def acquire_tor_identity(): - with Controller.from_port(port=9051) as c: - c.authenticate() - c.signal(Signal.NEWNYM) +class TorError(Exception): + """Exception raised for errors in Tor requests. + + Attributes: + message -- a message describing the error that occurred + """ + + def __init__(self, message, disable=False): + self.message = message + self.disable = disable + super().__init__(self.message) -def gen_user_agent(is_mobile): +def send_tor_signal(new_identity=False) -> bool: + if new_identity: + print('Requesting new identity...') + + try: + with Controller.from_port(port=9051) as c: + c.authenticate() + c.signal(Signal.NEWNYM if new_identity else Signal.HEARTBEAT) + os.environ['TOR_AVAILABLE'] = '1' + return True + except (SocketError, ConnectionRefusedError, ConnectionError): + os.environ['TOR_AVAILABLE'] = '0' + + return False + + +def gen_user_agent(is_mobile) -> str: mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla' firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox' linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux' @@ -36,7 +59,7 @@ def gen_user_agent(is_mobile): return DESKTOP_UA.format(mozilla, linux, firefox) -def gen_query(query, args, config, near_city=None): +def gen_query(query, args, config, near_city=None) -> str: param_dict = {key: '' for key in VALID_PARAMS} # Use :past(hour/day/week/month/year) if available @@ -95,7 +118,19 @@ def gen_query(query, args, config, near_city=None): class Request: + """Class used for handling all outbound requests, including search queries, + search suggestions, and loading of external content (images, audio, etc). + + Attributes: + normal_ua -- the user's current user agent + root_path -- the root path of the whoogle instance + config -- the user's current whoogle configuration + """ def __init__(self, normal_ua, root_path, config: Config): + # Send heartbeat to Tor, used in determining if the user can or cannot + # enable Tor for future requests + send_tor_signal() + self.language = config.lang_search self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua self.modified_user_agent = gen_user_agent(self.mobile) @@ -123,7 +158,16 @@ class Request: def __getitem__(self, name): return getattr(self, name) - def autocomplete(self, query): + def autocomplete(self, query) -> list: + """Sends a query to Google's search suggestion service + + Args: + query: The in-progress query to send + + Returns: + list: The list of matches for possible search suggestions + + """ ac_query = dict(hl=self.language, q=query) response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text @@ -134,27 +178,43 @@ class Request: return [] def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response: + """Sends an outbound request to a URL. Optionally sends the request using Tor, if + enabled by the user. + + Args: + base_url: The URL to use in the request + query: The optional query string for the request + attempt: The number of attempts made for the request (used for cycling + through Tor identities, if enabled) + + Returns: + Response: The Response object returned by the requests call + + """ headers = { 'User-Agent': self.modified_user_agent } + if self.tor and not send_tor_signal(new_identity=attempt > 0): # Request new identity if the last one failed + raise TorError("Tor was previously enabled, but the connection has been dropped. Please check your " + + "Tor configuration and try again.", disable=True) + # Make sure that the tor connection is valid, if enabled if self.tor: tor_check = requests.get('https://check.torproject.org/', proxies=self.proxies, headers=headers) self.tor_valid = 'Congratulations' in tor_check.text - # TODO: Throw error if the connection isn't valid? + + if not self.tor_valid: + raise TorError("Tor connection succeeded, but the connection could not be validated by torproject.org", + disable=True) response = requests.get(base_url + query, proxies=self.proxies, headers=headers) - # Retry query with new identity if using Tor (max 5 attempts) - if 'form id="captcha-form"' in response.text: + # Retry query with new identity if using Tor (max 10 attempts) + if 'form id="captcha-form"' in response.text and self.tor: attempt += 1 - if attempt > 5: - return requests.get(self.root_path + 'tor-reject?q=' + query) - acquire_tor_identity() + if attempt > 10: + raise TorError("Tor query failed -- max attempts exceeded 10") return self.send(base_url, query, attempt) return response - - -acquire_tor_identity() diff --git a/app/routes.py b/app/routes.py index 9cded21..258f60c 100644 --- a/app/routes.py +++ b/app/routes.py @@ -9,12 +9,12 @@ import uuid from functools import wraps import waitress -from flask import jsonify, make_response, request, redirect, render_template, send_file, session +from flask import jsonify, make_response, request, redirect, render_template, send_file, session, url_for from requests import exceptions from app import app from app.models.config import Config -from app.request import Request +from app.request import Request, TorError from app.utils.session_utils import valid_user_session from app.utils.routing_utils import * @@ -72,6 +72,7 @@ def before_request_func(): request.headers.get('User-Agent'), request.url_root, config=g.user_config) + g.app_location = g.user_config.url @@ -106,11 +107,15 @@ def unknown_page(e): def index(): # Reset keys session['fernet_keys'] = generate_user_keys(g.cookies_disabled) + error_message = session['error_message'] if 'error_message' in session else '' + session['error_message'] = '' return render_template('index.html', languages=Config.LANGUAGES, countries=Config.COUNTRIES, config=g.user_config, + error_message=error_message, + tor_available=int(os.environ.get('TOR_AVAILABLE')), version_number=app.config['VERSION_NUMBER']) @@ -141,6 +146,8 @@ def autocomplete(): elif request.data: q = urlparse.unquote_plus(request.data.decode('utf-8').replace('q=', '')) + # Return a list of suggestions for the query + # Note: If Tor is enabled, this returns nothing, as the request is almost always rejected return jsonify([q, g.user_request.autocomplete(q) if not g.user_config.tor else []]) @@ -162,7 +169,13 @@ def search(): return redirect('/') # Generate response and number of external elements from the page - response, elements = search_util.generate_response() + try: + response, elements = search_util.generate_response() + except TorError as e: + session['error_message'] = e.message + ("\\n\\nTor config is now disabled!" if e.disable else "") + session['config']['tor'] = False if e.disable else session['config']['tor'] + return redirect(url_for('.index')) + if search_util.feeling_lucky or elements < 0: return redirect(response, code=303) @@ -272,12 +285,6 @@ def window(): return render_template('display.html', response=results) -@app.route('/tor-reject', methods=['GET']) -def tor_reject(): - return render_template('error.html', - query=request.args.get('q') + ' - Tor rejection') - - def run_app(): parser = argparse.ArgumentParser(description='Whoogle Search console runner') parser.add_argument('--port', default=5000, metavar='', diff --git a/app/templates/index.html b/app/templates/index.html index c5af52b..2f996a3 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -29,6 +29,12 @@ Whoogle Search +
@@ -111,8 +117,8 @@
- - + +
diff --git a/app/utils/routing_utils.py b/app/utils/routing_utils.py index ebf757f..a083da2 100644 --- a/app/utils/routing_utils.py +++ b/app/utils/routing_utils.py @@ -71,14 +71,11 @@ class RoutingUtils: full_query = gen_query(self.query, self.request_params, self.config, content_filter.near) get_body = g.user_request.send(query=full_query) - if '/tor-reject' in get_body.text: - # Skip formatting if this is a Tor error page - return get_body, -1 - # Produce cleanable html soup from response html_soup = BeautifulSoup(content_filter.reskin(get_body.text), 'html.parser') - html_soup.insert( - 0, BeautifulSoup(TOR_BANNER, features='lxml') if g.user_request.tor_valid else BeautifulSoup("")) + html_soup.insert(0, BeautifulSoup( + TOR_BANNER, + features='lxml') if g.user_request.tor_valid else BeautifulSoup("", features="lxml")) if self.feeling_lucky: return get_first_link(html_soup), 1