diff --git a/Dockerfile b/Dockerfile index 61f77b2..7ecd84d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,9 @@ FROM python:3.8-slim WORKDIR /usr/src/app -RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev libssl-dev +RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev libssl-dev tor +RUN cat rc/torrc > /etc/tor/torrc +RUN service tor start COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt @@ -15,6 +17,15 @@ ENV WHOOGLE_USER=$username ARG password='' ENV WHOOGLE_PASS=$password +ARG proxyuser='' +ENV WHOOGLE_PROXY_USER=$proxyuser +ARG proxypass='' +ENV WHOOGLE_PROXY_PASS=$proxypass +ARG proxytype='' +ENV WHOOGLE_PROXY_TYPE=$proxytype +ARG proxyloc='' +ENV WHOOGLE_PROXY_LOC=$proxyloc + ARG use_https='' ENV HTTPS_ONLY=$use_https diff --git a/app/models/config.py b/app/models/config.py index 2fb4088..6cfe18f 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -305,6 +305,7 @@ class Config: self.safe = False self.dark = False self.nojs = False + self.tor = False self.near = '' self.alts = False self.new_tab = False diff --git a/app/request.py b/app/request.py index 4abb9b3..c8fdea0 100644 --- a/app/request.py +++ b/app/request.py @@ -1,8 +1,12 @@ +from app.models.config import Config from lxml import etree import random import requests from requests import Response import urllib.parse as urlparse +import os +from stem import Signal +from stem.control import Controller # Core Google search URLs SEARCH_URL = 'https://www.google.com/search?gbv=1&q=' @@ -15,6 +19,12 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr'] +def acquire_tor_conn(): + with Controller.from_port(port=9051) as c: + c.authenticate() + c.signal(Signal.NEWNYM) + + def gen_user_agent(is_mobile): mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla' firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox' @@ -85,11 +95,31 @@ def gen_query(query, args, config, near_city=None): class Request: - def __init__(self, normal_ua, language='lang_en'): - self.language = language + def __init__(self, normal_ua, root_path, config: Config): + self.language = config.lang_search self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua self.modified_user_agent = gen_user_agent(self.mobile) + # Set up proxy, if previously configured + if os.environ.get('WHOOGLE_PROXY_LOC'): + auth_str = '' + if os.environ.get('WHOOGLE_PROXY_USER'): + auth_str = os.environ.get('WHOOGLE_PROXY_USER') + \ + ':' + os.environ.get('WHOOGLE_PROXY_PASS') + self.proxies = { + 'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' + + auth_str + os.environ.get('WHOOGLE_PROXY_LOC'), + } + self.proxies['https'] = self.proxies['http'].replace('http', 'https') + else: + self.proxies = { + 'http': 'socks5://127.0.0.1:9050', + 'https': 'socks5://127.0.0.1:9050' + } if config.tor else {} + self.tor = config.tor + self.tor_valid = False + self.root_path = root_path + def __getitem__(self, name): return getattr(self, name) @@ -103,9 +133,28 @@ class Request: return [] - def send(self, base_url=SEARCH_URL, query='') -> Response: + def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response: headers = { 'User-Agent': self.modified_user_agent } - return requests.get(base_url + query, headers=headers) + # Make sure that the tor connection is valid, if enabled + if self.tor: + tor_check = requests.get('https://check.torproject.org/', proxies=self.proxies, headers=headers) + self.tor_valid = 'Congratulations' in tor_check.text + # TODO: Throw error if the connection isn't valid? + + response = requests.get(base_url + query, proxies=self.proxies, headers=headers) + + # Retry query with new identity if using Tor (max 5 attempts) + if 'form id="captcha-form"' in response.text: + attempt += 1 + if attempt > 5: + return requests.get(self.root_path + 'tor-reject?q=' + query) + acquire_tor_conn() + return self.send(base_url, query, attempt) + + return response + + +acquire_tor_conn() diff --git a/app/routes.py b/app/routes.py index da4be87..9cded21 100644 --- a/app/routes.py +++ b/app/routes.py @@ -62,13 +62,16 @@ def before_request_func(): if https_only and request.url.startswith('http://'): return redirect(request.url.replace('http://', 'https://', 1), code=308) - + g.user_config = Config(**session['config']) if not g.user_config.url: g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root - g.user_request = Request(request.headers.get('User-Agent'), language=g.user_config.lang_search) + g.user_request = Request( + request.headers.get('User-Agent'), + request.url_root, + config=g.user_config) g.app_location = g.user_config.url @@ -138,7 +141,7 @@ def autocomplete(): elif request.data: q = urlparse.unquote_plus(request.data.decode('utf-8').replace('q=', '')) - return jsonify([q, g.user_request.autocomplete(q)]) + return jsonify([q, g.user_request.autocomplete(q) if not g.user_config.tor else []]) @app.route('/search', methods=['GET', 'POST']) @@ -160,7 +163,7 @@ def search(): # Generate response and number of external elements from the page response, elements = search_util.generate_response() - if search_util.feeling_lucky: + if search_util.feeling_lucky or elements < 0: return redirect(response, code=303) # Keep count of external elements to fetch before element key can be regenerated @@ -269,6 +272,12 @@ def window(): return render_template('display.html', response=results) +@app.route('/tor-reject', methods=['GET']) +def tor_reject(): + return render_template('error.html', + query=request.args.get('q') + ' - Tor rejection') + + def run_app(): parser = argparse.ArgumentParser(description='Whoogle Search console runner') parser.add_argument('--port', default=5000, metavar='', @@ -281,6 +290,12 @@ def run_app(): help='Enforces HTTPS redirects for all requests') parser.add_argument('--userpass', default='', metavar='', help='Sets a username/password basic auth combo (default None)') + parser.add_argument('--proxyauth', default='', metavar='', + help='Sets a username/password for a HTTP/SOCKS proxy (default None)') + parser.add_argument('--proxytype', default='', metavar='', + help='Sets a proxy type for all connections (default None)') + parser.add_argument('--proxyurl', default='', metavar='', + help='Sets a proxy location for all connections (default None)') args = parser.parse_args() if args.userpass: @@ -288,6 +303,14 @@ def run_app(): os.environ['WHOOGLE_USER'] = user_pass[0] os.environ['WHOOGLE_PASS'] = user_pass[1] + if args.proxyauth or args.proxytype or args.proxyurl: + if args.proxyauth: + proxy_user_pass = args.proxyauth.split(':') + os.environ['WHOOGLE_PROXY_USER'] = proxy_user_pass[0] + os.environ['WHOOGLE_PROXY_PASS'] = proxy_user_pass[1] + os.environ['WHOOGLE_PROXY_TYPE'] = args.proxytype + os.environ['WHOOGLE_PROXY_LOC'] = args.proxyurl + os.environ['HTTPS_ONLY'] = '1' if args.https_only else '' if args.debug: diff --git a/app/static/js/controller.js b/app/static/js/controller.js index 156a84d..8775122 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -1,6 +1,6 @@ // Whoogle configurations that use boolean values and checkboxes CONFIG_BOOLS = [ - "nojs", "dark", "safe", "alts", "new_tab", "get_only" + "nojs", "dark", "safe", "alts", "new_tab", "get_only", "tor" ]; // Whoogle configurations that use string values and input fields diff --git a/app/templates/error.html b/app/templates/error.html index 003623d..9546e23 100644 --- a/app/templates/error.html +++ b/app/templates/error.html @@ -3,3 +3,4 @@

Error parsing "{{ query }}"

+Return Home diff --git a/app/templates/index.html b/app/templates/index.html index 4980316..c5af52b 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -110,6 +110,10 @@ +
+ + +
diff --git a/app/utils/routing_utils.py b/app/utils/routing_utils.py index c6c960b..ebf757f 100644 --- a/app/utils/routing_utils.py +++ b/app/utils/routing_utils.py @@ -7,6 +7,9 @@ from flask import g from typing import Any, Tuple +TOR_BANNER = '

You are using Tor


' + + class RoutingUtils: def __init__(self, request, config, session, cookies_disabled=False): self.request_params = request.args if request.method == 'GET' else request.form @@ -66,10 +69,16 @@ class RoutingUtils: content_filter = Filter(self.session['fernet_keys'], mobile=mobile, config=self.config) full_query = gen_query(self.query, self.request_params, self.config, content_filter.near) - get_body = g.user_request.send(query=full_query).text + get_body = g.user_request.send(query=full_query) + + if '/tor-reject' in get_body.text: + # Skip formatting if this is a Tor error page + return get_body, -1 # Produce cleanable html soup from response - html_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser') + html_soup = BeautifulSoup(content_filter.reskin(get_body.text), 'html.parser') + html_soup.insert( + 0, BeautifulSoup(TOR_BANNER, features='lxml') if g.user_request.tor_valid else BeautifulSoup("")) if self.feeling_lucky: return get_first_link(html_soup), 1 diff --git a/rc/torrc b/rc/torrc new file mode 100644 index 0000000..164a337 --- /dev/null +++ b/rc/torrc @@ -0,0 +1,9 @@ +User tor +DataDirectory /var/lib/tor +ControlPort 9051 +CookieAuthentication 1 +DataDirectoryGroupReadable 1 +CookieAuthFileGroupReadable 1 +ExtORPortCookieAuthFileGroupReadable 1 +CacheDirectoryGroupReadable 1 +CookieAuthFile /var/lib/tor/control_auth_cookie diff --git a/requirements.txt b/requirements.txt index 702d8ba..fd9d6f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,20 +1,34 @@ +attrs==19.3.0 beautifulsoup4==4.8.2 bs4==0.0.1 +cachelib==0.1 +certifi==2020.4.5.1 cffi==1.13.2 +chardet==3.0.4 Click==7.0 cryptography==2.8 Flask==1.1.1 Flask-Session==0.3.2 +idna==2.9 itsdangerous==1.1.0 Jinja2==2.10.3 lxml==4.5.1 MarkupSafe==1.1.1 +more-itertools==8.3.0 +packaging==20.4 +pluggy==0.13.1 +py==1.8.1 pycparser==2.19 pyOpenSSL==19.1.0 +pyparsing==2.4.7 +PySocks==1.7.1 pytest==5.4.1 python-dateutil==2.8.1 requests==2.23.0 six==1.14.0 soupsieve==1.9.5 -Werkzeug==0.16.0 +stem==1.8.0 +urllib3==1.25.9 waitress==1.4.3 +wcwidth==0.1.9 +Werkzeug==0.16.0