Add tor and http/socks proxy support

Allows users to enable/disable tor from the config menu, which will
forward all requests through Tor.

Also adds support for setting environment variables for alternative
proxy support. Setting the following variables will forward requests
through the proxy:
    - WHOOGLE_PROXY_USER (optional)
    - WHOOGLE_PROXY_PASS (optional)
    - WHOOGLE_PROXY_TYPE (required)
      - Can be "http", "socks4", or "socks5"
    - WHOOGLE_PROXY_LOC  (required)
      - Format: "<ip address>:<port>"

See #30
This commit is contained in:
Ben Busby 2020-10-25 18:14:16 -04:00
parent f3bb1e22b4
commit bd1d236923
10 changed files with 134 additions and 13 deletions

View File

@ -1,7 +1,9 @@
FROM python:3.8-slim
WORKDIR /usr/src/app
RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev libssl-dev
RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev libssl-dev tor
RUN cat rc/torrc > /etc/tor/torrc
RUN service tor start
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
@ -15,6 +17,15 @@ ENV WHOOGLE_USER=$username
ARG password=''
ENV WHOOGLE_PASS=$password
ARG proxyuser=''
ENV WHOOGLE_PROXY_USER=$proxyuser
ARG proxypass=''
ENV WHOOGLE_PROXY_PASS=$proxypass
ARG proxytype=''
ENV WHOOGLE_PROXY_TYPE=$proxytype
ARG proxyloc=''
ENV WHOOGLE_PROXY_LOC=$proxyloc
ARG use_https=''
ENV HTTPS_ONLY=$use_https

View File

@ -305,6 +305,7 @@ class Config:
self.safe = False
self.dark = False
self.nojs = False
self.tor = False
self.near = ''
self.alts = False
self.new_tab = False

View File

@ -1,8 +1,12 @@
from app.models.config import Config
from lxml import etree
import random
import requests
from requests import Response
import urllib.parse as urlparse
import os
from stem import Signal
from stem.control import Controller
# Core Google search URLs
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
@ -15,6 +19,12 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr']
def acquire_tor_conn():
with Controller.from_port(port=9051) as c:
c.authenticate()
c.signal(Signal.NEWNYM)
def gen_user_agent(is_mobile):
mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla'
firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox'
@ -85,11 +95,31 @@ def gen_query(query, args, config, near_city=None):
class Request:
def __init__(self, normal_ua, language='lang_en'):
self.language = language
def __init__(self, normal_ua, root_path, config: Config):
self.language = config.lang_search
self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
self.modified_user_agent = gen_user_agent(self.mobile)
# Set up proxy, if previously configured
if os.environ.get('WHOOGLE_PROXY_LOC'):
auth_str = ''
if os.environ.get('WHOOGLE_PROXY_USER'):
auth_str = os.environ.get('WHOOGLE_PROXY_USER') + \
':' + os.environ.get('WHOOGLE_PROXY_PASS')
self.proxies = {
'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' +
auth_str + os.environ.get('WHOOGLE_PROXY_LOC'),
}
self.proxies['https'] = self.proxies['http'].replace('http', 'https')
else:
self.proxies = {
'http': 'socks5://127.0.0.1:9050',
'https': 'socks5://127.0.0.1:9050'
} if config.tor else {}
self.tor = config.tor
self.tor_valid = False
self.root_path = root_path
def __getitem__(self, name):
return getattr(self, name)
@ -103,9 +133,28 @@ class Request:
return []
def send(self, base_url=SEARCH_URL, query='') -> Response:
def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response:
headers = {
'User-Agent': self.modified_user_agent
}
return requests.get(base_url + query, headers=headers)
# Make sure that the tor connection is valid, if enabled
if self.tor:
tor_check = requests.get('https://check.torproject.org/', proxies=self.proxies, headers=headers)
self.tor_valid = 'Congratulations' in tor_check.text
# TODO: Throw error if the connection isn't valid?
response = requests.get(base_url + query, proxies=self.proxies, headers=headers)
# Retry query with new identity if using Tor (max 5 attempts)
if 'form id="captcha-form"' in response.text:
attempt += 1
if attempt > 5:
return requests.get(self.root_path + 'tor-reject?q=' + query)
acquire_tor_conn()
return self.send(base_url, query, attempt)
return response
acquire_tor_conn()

View File

@ -62,13 +62,16 @@ def before_request_func():
if https_only and request.url.startswith('http://'):
return redirect(request.url.replace('http://', 'https://', 1), code=308)
g.user_config = Config(**session['config'])
if not g.user_config.url:
g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root
g.user_request = Request(request.headers.get('User-Agent'), language=g.user_config.lang_search)
g.user_request = Request(
request.headers.get('User-Agent'),
request.url_root,
config=g.user_config)
g.app_location = g.user_config.url
@ -138,7 +141,7 @@ def autocomplete():
elif request.data:
q = urlparse.unquote_plus(request.data.decode('utf-8').replace('q=', ''))
return jsonify([q, g.user_request.autocomplete(q)])
return jsonify([q, g.user_request.autocomplete(q) if not g.user_config.tor else []])
@app.route('/search', methods=['GET', 'POST'])
@ -160,7 +163,7 @@ def search():
# Generate response and number of external elements from the page
response, elements = search_util.generate_response()
if search_util.feeling_lucky:
if search_util.feeling_lucky or elements < 0:
return redirect(response, code=303)
# Keep count of external elements to fetch before element key can be regenerated
@ -269,6 +272,12 @@ def window():
return render_template('display.html', response=results)
@app.route('/tor-reject', methods=['GET'])
def tor_reject():
return render_template('error.html',
query=request.args.get('q') + ' - Tor rejection')
def run_app():
parser = argparse.ArgumentParser(description='Whoogle Search console runner')
parser.add_argument('--port', default=5000, metavar='<port number>',
@ -281,6 +290,12 @@ def run_app():
help='Enforces HTTPS redirects for all requests')
parser.add_argument('--userpass', default='', metavar='<username:password>',
help='Sets a username/password basic auth combo (default None)')
parser.add_argument('--proxyauth', default='', metavar='<username:password>',
help='Sets a username/password for a HTTP/SOCKS proxy (default None)')
parser.add_argument('--proxytype', default='', metavar='<socks4|socks5|http>',
help='Sets a proxy type for all connections (default None)')
parser.add_argument('--proxyurl', default='', metavar='<location:port>',
help='Sets a proxy location for all connections (default None)')
args = parser.parse_args()
if args.userpass:
@ -288,6 +303,14 @@ def run_app():
os.environ['WHOOGLE_USER'] = user_pass[0]
os.environ['WHOOGLE_PASS'] = user_pass[1]
if args.proxyauth or args.proxytype or args.proxyurl:
if args.proxyauth:
proxy_user_pass = args.proxyauth.split(':')
os.environ['WHOOGLE_PROXY_USER'] = proxy_user_pass[0]
os.environ['WHOOGLE_PROXY_PASS'] = proxy_user_pass[1]
os.environ['WHOOGLE_PROXY_TYPE'] = args.proxytype
os.environ['WHOOGLE_PROXY_LOC'] = args.proxyurl
os.environ['HTTPS_ONLY'] = '1' if args.https_only else ''
if args.debug:

View File

@ -1,6 +1,6 @@
// Whoogle configurations that use boolean values and checkboxes
CONFIG_BOOLS = [
"nojs", "dark", "safe", "alts", "new_tab", "get_only"
"nojs", "dark", "safe", "alts", "new_tab", "get_only", "tor"
];
// Whoogle configurations that use string values and input fields

View File

@ -3,3 +3,4 @@
<p>
Error parsing "{{ query }}"
</p>
<a href="/">Return Home</a>

View File

@ -110,6 +110,10 @@
<label for="config-new-tab">Open Links in New Tab: </label>
<input type="checkbox" name="new_tab" id="config-new-tab">
</div>
<div class="config-div">
<label for="config-tor">Use Tor: </label>
<input type="checkbox" name="tor" id="config-tor">
</div>
<div class="config-div">
<label for="config-get-only">GET Requests Only: </label>
<input type="checkbox" name="get_only" id="config-get-only">

View File

@ -7,6 +7,9 @@ from flask import g
from typing import Any, Tuple
TOR_BANNER = '<hr><h1 style="text-align: center">You are using Tor</h1><hr>'
class RoutingUtils:
def __init__(self, request, config, session, cookies_disabled=False):
self.request_params = request.args if request.method == 'GET' else request.form
@ -66,10 +69,16 @@ class RoutingUtils:
content_filter = Filter(self.session['fernet_keys'], mobile=mobile, config=self.config)
full_query = gen_query(self.query, self.request_params, self.config, content_filter.near)
get_body = g.user_request.send(query=full_query).text
get_body = g.user_request.send(query=full_query)
if '/tor-reject' in get_body.text:
# Skip formatting if this is a Tor error page
return get_body, -1
# Produce cleanable html soup from response
html_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
html_soup = BeautifulSoup(content_filter.reskin(get_body.text), 'html.parser')
html_soup.insert(
0, BeautifulSoup(TOR_BANNER, features='lxml') if g.user_request.tor_valid else BeautifulSoup(""))
if self.feeling_lucky:
return get_first_link(html_soup), 1

9
rc/torrc Normal file
View File

@ -0,0 +1,9 @@
User tor
DataDirectory /var/lib/tor
ControlPort 9051
CookieAuthentication 1
DataDirectoryGroupReadable 1
CookieAuthFileGroupReadable 1
ExtORPortCookieAuthFileGroupReadable 1
CacheDirectoryGroupReadable 1
CookieAuthFile /var/lib/tor/control_auth_cookie

View File

@ -1,20 +1,34 @@
attrs==19.3.0
beautifulsoup4==4.8.2
bs4==0.0.1
cachelib==0.1
certifi==2020.4.5.1
cffi==1.13.2
chardet==3.0.4
Click==7.0
cryptography==2.8
Flask==1.1.1
Flask-Session==0.3.2
idna==2.9
itsdangerous==1.1.0
Jinja2==2.10.3
lxml==4.5.1
MarkupSafe==1.1.1
more-itertools==8.3.0
packaging==20.4
pluggy==0.13.1
py==1.8.1
pycparser==2.19
pyOpenSSL==19.1.0
pyparsing==2.4.7
PySocks==1.7.1
pytest==5.4.1
python-dateutil==2.8.1
requests==2.23.0
six==1.14.0
soupsieve==1.9.5
Werkzeug==0.16.0
stem==1.8.0
urllib3==1.25.9
waitress==1.4.3
wcwidth==0.1.9
Werkzeug==0.16.0