diff --git a/.travis.yml b/.travis.yml index 800f560..110c0ea 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,7 @@ language: python python: 3.6 before_install: - - sudo apt-get -y install libgnutls28-dev tor - - sudo cat rc/torrc > /etc/tor/torrc - - sudo systemctl tor start + - sudo apt-get -y install libgnutls28-dev install: - pip install -r requirements.txt script: diff --git a/app/__init__.py b/app/__init__.py index 820edb0..1c84ec3 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,3 +1,4 @@ +from app.request import send_tor_signal from app.utils.session_utils import generate_user_keys from app.utils.gen_ddg_bangs import gen_bangs_json from flask import Flask @@ -25,11 +26,14 @@ if not os.path.exists(app.config['CONFIG_PATH']): if not os.path.exists(app.config['SESSION_FILE_DIR']): os.makedirs(app.config['SESSION_FILE_DIR']) -# (Re)generate DDG bang filter, and create path if it doesn't exist yet +# Generate DDG bang filter, and create path if it doesn't exist yet if not os.path.exists(app.config['BANG_PATH']): os.makedirs(app.config['BANG_PATH']) -gen_bangs_json(app.config['BANG_FILE']) + gen_bangs_json(app.config['BANG_FILE']) Session(app) +# Attempt to acquire tor identity, to determine if Tor config is available +send_tor_signal() + from app import routes diff --git a/app/request.py b/app/request.py index 5f9abcd..c26e9bf 100644 --- a/app/request.py +++ b/app/request.py @@ -2,10 +2,10 @@ from app.models.config import Config from lxml import etree import random import requests -from requests import Response +from requests import Response, ConnectionError import urllib.parse as urlparse import os -from stem import Signal +from stem import Signal, SocketError from stem.control import Controller # Core Google search URLs @@ -19,13 +19,36 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr'] -def acquire_tor_identity(): - with Controller.from_port(port=9051) as c: - c.authenticate() - c.signal(Signal.NEWNYM) +class TorError(Exception): + """Exception raised for errors in Tor requests. + + Attributes: + message -- a message describing the error that occurred + """ + + def __init__(self, message, disable=False): + self.message = message + self.disable = disable + super().__init__(self.message) -def gen_user_agent(is_mobile): +def send_tor_signal(new_identity=False) -> bool: + if new_identity: + print('Requesting new identity...') + + try: + with Controller.from_port(port=9051) as c: + c.authenticate() + c.signal(Signal.NEWNYM if new_identity else Signal.HEARTBEAT) + os.environ['TOR_AVAILABLE'] = '1' + return True + except (SocketError, ConnectionRefusedError, ConnectionError): + os.environ['TOR_AVAILABLE'] = '0' + + return False + + +def gen_user_agent(is_mobile) -> str: mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla' firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox' linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux' @@ -36,7 +59,7 @@ def gen_user_agent(is_mobile): return DESKTOP_UA.format(mozilla, linux, firefox) -def gen_query(query, args, config, near_city=None): +def gen_query(query, args, config, near_city=None) -> str: param_dict = {key: '' for key in VALID_PARAMS} # Use :past(hour/day/week/month/year) if available @@ -95,7 +118,19 @@ def gen_query(query, args, config, near_city=None): class Request: + """Class used for handling all outbound requests, including search queries, + search suggestions, and loading of external content (images, audio, etc). + + Attributes: + normal_ua -- the user's current user agent + root_path -- the root path of the whoogle instance + config -- the user's current whoogle configuration + """ def __init__(self, normal_ua, root_path, config: Config): + # Send heartbeat to Tor, used in determining if the user can or cannot + # enable Tor for future requests + send_tor_signal() + self.language = config.lang_search self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua self.modified_user_agent = gen_user_agent(self.mobile) @@ -123,7 +158,16 @@ class Request: def __getitem__(self, name): return getattr(self, name) - def autocomplete(self, query): + def autocomplete(self, query) -> list: + """Sends a query to Google's search suggestion service + + Args: + query: The in-progress query to send + + Returns: + list: The list of matches for possible search suggestions + + """ ac_query = dict(hl=self.language, q=query) response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text @@ -134,27 +178,43 @@ class Request: return [] def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response: + """Sends an outbound request to a URL. Optionally sends the request using Tor, if + enabled by the user. + + Args: + base_url: The URL to use in the request + query: The optional query string for the request + attempt: The number of attempts made for the request (used for cycling + through Tor identities, if enabled) + + Returns: + Response: The Response object returned by the requests call + + """ headers = { 'User-Agent': self.modified_user_agent } + if self.tor and not send_tor_signal(new_identity=attempt > 0): # Request new identity if the last one failed + raise TorError("Tor was previously enabled, but the connection has been dropped. Please check your " + + "Tor configuration and try again.", disable=True) + # Make sure that the tor connection is valid, if enabled if self.tor: tor_check = requests.get('https://check.torproject.org/', proxies=self.proxies, headers=headers) self.tor_valid = 'Congratulations' in tor_check.text - # TODO: Throw error if the connection isn't valid? + + if not self.tor_valid: + raise TorError("Tor connection succeeded, but the connection could not be validated by torproject.org", + disable=True) response = requests.get(base_url + query, proxies=self.proxies, headers=headers) - # Retry query with new identity if using Tor (max 5 attempts) - if 'form id="captcha-form"' in response.text: + # Retry query with new identity if using Tor (max 10 attempts) + if 'form id="captcha-form"' in response.text and self.tor: attempt += 1 - if attempt > 5: - return requests.get(self.root_path + 'tor-reject?q=' + query) - acquire_tor_identity() + if attempt > 10: + raise TorError("Tor query failed -- max attempts exceeded 10") return self.send(base_url, query, attempt) return response - - -acquire_tor_identity() diff --git a/app/routes.py b/app/routes.py index 9cded21..258f60c 100644 --- a/app/routes.py +++ b/app/routes.py @@ -9,12 +9,12 @@ import uuid from functools import wraps import waitress -from flask import jsonify, make_response, request, redirect, render_template, send_file, session +from flask import jsonify, make_response, request, redirect, render_template, send_file, session, url_for from requests import exceptions from app import app from app.models.config import Config -from app.request import Request +from app.request import Request, TorError from app.utils.session_utils import valid_user_session from app.utils.routing_utils import * @@ -72,6 +72,7 @@ def before_request_func(): request.headers.get('User-Agent'), request.url_root, config=g.user_config) + g.app_location = g.user_config.url @@ -106,11 +107,15 @@ def unknown_page(e): def index(): # Reset keys session['fernet_keys'] = generate_user_keys(g.cookies_disabled) + error_message = session['error_message'] if 'error_message' in session else '' + session['error_message'] = '' return render_template('index.html', languages=Config.LANGUAGES, countries=Config.COUNTRIES, config=g.user_config, + error_message=error_message, + tor_available=int(os.environ.get('TOR_AVAILABLE')), version_number=app.config['VERSION_NUMBER']) @@ -141,6 +146,8 @@ def autocomplete(): elif request.data: q = urlparse.unquote_plus(request.data.decode('utf-8').replace('q=', '')) + # Return a list of suggestions for the query + # Note: If Tor is enabled, this returns nothing, as the request is almost always rejected return jsonify([q, g.user_request.autocomplete(q) if not g.user_config.tor else []]) @@ -162,7 +169,13 @@ def search(): return redirect('/') # Generate response and number of external elements from the page - response, elements = search_util.generate_response() + try: + response, elements = search_util.generate_response() + except TorError as e: + session['error_message'] = e.message + ("\\n\\nTor config is now disabled!" if e.disable else "") + session['config']['tor'] = False if e.disable else session['config']['tor'] + return redirect(url_for('.index')) + if search_util.feeling_lucky or elements < 0: return redirect(response, code=303) @@ -272,12 +285,6 @@ def window(): return render_template('display.html', response=results) -@app.route('/tor-reject', methods=['GET']) -def tor_reject(): - return render_template('error.html', - query=request.args.get('q') + ' - Tor rejection') - - def run_app(): parser = argparse.ArgumentParser(description='Whoogle Search console runner') parser.add_argument('--port', default=5000, metavar='', diff --git a/app/templates/index.html b/app/templates/index.html index c5af52b..2f996a3 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -29,6 +29,12 @@ Whoogle Search +
@@ -111,8 +117,8 @@
- - + +
diff --git a/app/utils/routing_utils.py b/app/utils/routing_utils.py index ebf757f..a083da2 100644 --- a/app/utils/routing_utils.py +++ b/app/utils/routing_utils.py @@ -71,14 +71,11 @@ class RoutingUtils: full_query = gen_query(self.query, self.request_params, self.config, content_filter.near) get_body = g.user_request.send(query=full_query) - if '/tor-reject' in get_body.text: - # Skip formatting if this is a Tor error page - return get_body, -1 - # Produce cleanable html soup from response html_soup = BeautifulSoup(content_filter.reskin(get_body.text), 'html.parser') - html_soup.insert( - 0, BeautifulSoup(TOR_BANNER, features='lxml') if g.user_request.tor_valid else BeautifulSoup("")) + html_soup.insert(0, BeautifulSoup( + TOR_BANNER, + features='lxml') if g.user_request.tor_valid else BeautifulSoup("", features="lxml")) if self.feeling_lucky: return get_first_link(html_soup), 1