diff --git a/Dockerfile b/Dockerfile index 61f77b2..bf038ba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,13 @@ FROM python:3.8-slim WORKDIR /usr/src/app -RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev libssl-dev +RUN apt-get update && apt-get install -y \ + build-essential \ + libcurl4-openssl-dev \ + libssl-dev \ + tor + +COPY misc/tor/torrc /etc/tor/torrc COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt @@ -15,6 +21,15 @@ ENV WHOOGLE_USER=$username ARG password='' ENV WHOOGLE_PASS=$password +ARG proxyuser='' +ENV WHOOGLE_PROXY_USER=$proxyuser +ARG proxypass='' +ENV WHOOGLE_PROXY_PASS=$proxypass +ARG proxytype='' +ENV WHOOGLE_PROXY_TYPE=$proxytype +ARG proxyloc='' +ENV WHOOGLE_PROXY_LOC=$proxyloc + ARG use_https='' ENV HTTPS_ONLY=$use_https @@ -25,4 +40,4 @@ COPY . . EXPOSE $EXPOSE_PORT -CMD ["./run"] +CMD misc/tor/start-tor.sh & ./run diff --git a/README.md b/README.md index 2547daf..9f57d64 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,11 @@ Contents 1. [Features](#features) 2. [Dependencies](#dependencies) 3. [Install/Deploy](#install) -4. [Usage](#usage) -5. [Extra Steps](#extra-steps) -6. [FAQ](#faq) -7. [Screenshots](#screenshots) +4. [Environment Variables](#environment-variables) +5. [Usage](#usage) +6. [Extra Steps](#extra-steps) +7. [FAQ](#faq) +8. [Screenshots](#screenshots) ## Features - No ads or sponsored content @@ -25,6 +26,7 @@ Contents - No AMP links - No URL tracking tags (i.e. utm=%s) - No referrer header +- Tor and HTTP/SOCKS proxy support - Autocomplete/search suggestions - POST request search and suggestion queries (when possible) - View images at full res without site redirect (currently mobile only) @@ -35,7 +37,7 @@ Contents - Optional location-based searching (i.e. results near \) - Optional NoJS mode to disable all Javascript in results -*If deployed to a remote server +*If deployed to a remote server, or configured to send requests through a VPN, Tor, proxy, etc. ## Dependencies If using Heroku Quick Deploy, **you can skip this section**. @@ -96,6 +98,7 @@ optional arguments: --debug Activates debug mode for the server (default False) --https-only Enforces HTTPS redirects for all requests (default False) ``` +See the [available environment variables](#environment-variables) for additional configuration. ### E) Manual Clone the repo and run the following commands to start the app in a local-only environment: @@ -108,6 +111,7 @@ source venv/bin/activate pip install -r requirements.txt ./run ``` +See the [available environment variables](#environment-variables) for additional configuration. #### systemd Configuration After building the virtual environment, you can add the following to `/lib/systemd/system/whoogle.service` to set up a Whoogle Search systemd service: @@ -117,6 +121,14 @@ After building the virtual environment, you can add the following to `/lib/syste Description=Whoogle [Service] +# Basic auth configuration, uncomment to enable +#Environment=WHOOGLE_USER= +#Environment=WHOOGLE_PASS= +# Proxy configuration, uncomment to enable +#Environment=WHOOGLE_PROXY_USER= +#Environment=WHOOGLE_PROXY_PASS= +#Environment=WHOOGLE_PROXY_TYPE= Type=simple User=root WorkingDirectory= @@ -166,6 +178,19 @@ docker build --tag whoogle-search:1.0 . docker run --publish 5000:5000 --detach --name whoogle-search whoogle-search:1.0 ``` +Optionally, you can also enable some of the following environment variables to further customize your instance: + +```bash +docker run --publish 5000:5000 --detach --name whoogle-search \ + -e WHOOGLE_USER=username \ + -e WHOOGLE_PASS=password \ + -e WHOOGLE_PROXY_USER=username \ + -e WHOOGLE_PROXY_PASS=password \ + -e WHOOGLE_PROXY_TYPE=socks5 \ + -e WHOOGLE_PROXY_LOC=ip \ + whoogle-search:1.0 +``` + And kill with: `docker rm --force whoogle-search` #### Using [Heroku CLI](https://devcenter.heroku.com/articles/heroku-cli) @@ -181,6 +206,7 @@ heroku open ``` This series of commands can take a while, but once you run it once, you shouldn't have to run it again. The final command, `heroku open` will launch a tab in your web browser, where you can test out Whoogle and even [set it as your primary search engine](https://github.com/benbusby/whoogle#set-whoogle-as-your-primary-search-engine). +You may also edit environment variables from your app’s Settings tab in the Heroku Dashboard. #### Using your own server, or alternative container deployment There are other methods for deploying docker containers that are well outlined in [this article](https://rollout.io/blog/the-shortlist-of-docker-hosting/), but there are too many to describe set up for each here. Generally it should be about the same amount of effort as the Heroku deployment. @@ -191,6 +217,20 @@ Depending on your preferences, you can also deploy the app yourself on your own - SSL certificates (free through [Let's Encrypt](https://letsencrypt.org/getting-started/)) - A bit more experience or willingness to work through issues +## Environment Variables +There are a few optional environment variables available for customizing a Whoogle instance: + +| Variable | Description | +| ------------------ | -------------------------------------------------------------- | +| WHOOGLE_USER | The username for basic auth. WHOOGLE_PASS must also be set if used. | +| WHOOGLE_PASS | The password for basic auth. WHOOGLE_USER must also be set if used. | +| WHOOGLE_PROXY_USER | The username of the proxy server. | +| WHOOGLE_PROXY_PASS | The password of the proxy server. | +| WHOOGLE_PROXY_TYPE | The type of the proxy server. Can be "socks5", "socks4", or "http". | +| WHOOGLE_PROXY_LOC | The location of the proxy server (host or ip). | +| EXPOSE_PORT | The port where Whoogle will be exposed. | +| HTTPS_ONLY | Enforce HTTPS. (See [here](https://github.com/benbusby/whoogle-search#https-enforcement)) | + ## Usage Same as most search engines, with the exception of filtering by time range. @@ -256,7 +296,8 @@ Only needed if your setup requires Flask to redirect to HTTPS on its own -- gene Note: You should have your own domain name and [an https certificate](https://letsencrypt.org/getting-started/) in order for this to work properly. - Heroku: Ensure that the `Root URL` configuration on the home page begins with `https://` and not `http://` -- Docker: Add `--build-arg use_https=1` to your run command +- Docker build: Add `--build-arg use_https=1` to your run command +- Docker image: Set the environment variable HTTPS_ONLY=1 - Pip/Pipx: Add the `--https-only` flag to the end of the `whoogle-search` command - Default `run` script: Modify the script locally to include the `--https-only` flag at the end of the python run command diff --git a/app.json b/app.json index 76b58a1..7482c98 100644 --- a/app.json +++ b/app.json @@ -1,8 +1,49 @@ { - "name": "Whoogle Search", - "description": "A lightweight, privacy-oriented, containerized Google search proxy for desktop/mobile that removes Javascript, AMP links, tracking, and ads/sponsored content", - "repository": "https://github.com/benbusby/whoogle-search", - "logo": "https://raw.githubusercontent.com/benbusby/whoogle-search/master/app/static/img/favicon/ms-icon-150x150.png", - "keywords": ["search", "metasearch", "flask", "docker", "heroku", "adblock", "degoogle", "privacy"], - "stack": "container" + "name": "Whoogle Search", + "description": "A lightweight, privacy-oriented, containerized Google search proxy for desktop/mobile that removes Javascript, AMP links, tracking, and ads/sponsored content", + "repository": "https://github.com/benbusby/whoogle-search", + "logo": "https://raw.githubusercontent.com/benbusby/whoogle-search/master/app/static/img/favicon/ms-icon-150x150.png", + "keywords": [ + "search", + "metasearch", + "flask", + "docker", + "heroku", + "adblock", + "degoogle", + "privacy" + ], + "stack": "container", + "env": { + "WHOOGLE_USER": { + "description": "The username for basic auth. WHOOGLE_PASS must also be set if used. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PASS": { + "description": "The password for basic auth. WHOOGLE_USER must also be set if used. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_USER": { + "description": "The username of the proxy server. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_PASS": { + "description": "The password of the proxy server. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_TYPE": { + "description": "The type of the proxy server. For example \"socks5\". Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_LOC": { + "description": "The location of the proxy server (host or ip). Leave empty to disable.", + "value": "", + "required": false + } + } } diff --git a/app/__init__.py b/app/__init__.py index 820edb0..a349acc 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,8 +1,10 @@ +from app.request import send_tor_signal from app.utils.session_utils import generate_user_keys from app.utils.gen_ddg_bangs import gen_bangs_json from flask import Flask from flask_session import Session import os +from stem import Signal app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static') app.user_elements = {} @@ -25,11 +27,15 @@ if not os.path.exists(app.config['CONFIG_PATH']): if not os.path.exists(app.config['SESSION_FILE_DIR']): os.makedirs(app.config['SESSION_FILE_DIR']) -# (Re)generate DDG bang filter, and create path if it doesn't exist yet +# Generate DDG bang filter, and create path if it doesn't exist yet if not os.path.exists(app.config['BANG_PATH']): os.makedirs(app.config['BANG_PATH']) -gen_bangs_json(app.config['BANG_FILE']) +if not os.path.exists(app.config['BANG_FILE']): + gen_bangs_json(app.config['BANG_FILE']) Session(app) +# Attempt to acquire tor identity, to determine if Tor config is available +send_tor_signal(Signal.HEARTBEAT) + from app import routes diff --git a/app/filter.py b/app/filter.py index 25e00b7..0fa32af 100644 --- a/app/filter.py +++ b/app/filter.py @@ -146,14 +146,14 @@ class Filter: element_src = 'https:' + element_src elif element_src.startswith(LOGO_URL): # Re-brand with Whoogle logo - element['src'] = '/static/img/logo.png' + element['src'] = 'static/img/logo.png' element['style'] = 'height:40px;width:162px' return elif element_src.startswith(GOOG_IMG): element['src'] = BLANK_B64 return - element['src'] = '/element?url=' + self.encrypt_path(element_src, is_element=True) + \ + element['src'] = 'element?url=' + self.encrypt_path(element_src, is_element=True) + \ '&type=' + urlparse.quote(mime) # TODO: Non-mobile image results link to website instead of image # if not self.mobile: @@ -184,7 +184,7 @@ class Filter: def update_link(self, link): # Replace href with only the intended destination (no "utm" type tags) href = link['href'].replace('https://www.google.com', '') - if '/advanced_search' in href or 'tbm=shop' in href: + if 'advanced_search' in href or 'tbm=shop' in href: # TODO: The "Shopping" tab requires further filtering (see #136) # Temporarily removing all links to that tab for now. link.decompose() @@ -202,7 +202,7 @@ class Filter: # "li:1" implies the query should be interpreted verbatim, so we wrap it in double quotes if 'li:1' in href: query_link = '"' + query_link + '"' - new_search = '/search?q=' + self.encrypt_path(query_link) + new_search = 'search?q=' + self.encrypt_path(query_link) query_params = parse_qs(urlparse.urlparse(href).query) for param in VALID_PARAMS: diff --git a/app/models/config.py b/app/models/config.py index deb9f66..48b02d2 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -312,6 +312,7 @@ class Config: self.safe = False self.dark = False self.nojs = False + self.tor = False self.near = '' self.theme = 'whoogle' self.alts = False diff --git a/app/request.py b/app/request.py index 4abb9b3..04ae3db 100644 --- a/app/request.py +++ b/app/request.py @@ -1,8 +1,12 @@ +from app.models.config import Config from lxml import etree import random import requests -from requests import Response +from requests import Response, ConnectionError import urllib.parse as urlparse +import os +from stem import Signal, SocketError +from stem.control import Controller # Core Google search URLs SEARCH_URL = 'https://www.google.com/search?gbv=1&q=' @@ -15,7 +19,36 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr'] -def gen_user_agent(is_mobile): +class TorError(Exception): + """Exception raised for errors in Tor requests. + + Attributes: + message -- a message describing the error that occurred + disable -- optionally disables Tor in the user config (note: + this should only happen if the connection has been dropped + altogether). + """ + + def __init__(self, message, disable=False): + self.message = message + self.disable = disable + super().__init__(self.message) + + +def send_tor_signal(signal: Signal) -> bool: + try: + with Controller.from_port(port=9051) as c: + c.authenticate() + c.signal(signal) + os.environ['TOR_AVAILABLE'] = '1' + return True + except (SocketError, ConnectionRefusedError, ConnectionError): + os.environ['TOR_AVAILABLE'] = '0' + + return False + + +def gen_user_agent(is_mobile) -> str: mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla' firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox' linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux' @@ -26,7 +59,7 @@ def gen_user_agent(is_mobile): return DESKTOP_UA.format(mozilla, linux, firefox) -def gen_query(query, args, config, near_city=None): +def gen_query(query, args, config, near_city=None) -> str: param_dict = {key: '' for key in VALID_PARAMS} # Use :past(hour/day/week/month/year) if available @@ -85,15 +118,56 @@ def gen_query(query, args, config, near_city=None): class Request: - def __init__(self, normal_ua, language='lang_en'): - self.language = language + """Class used for handling all outbound requests, including search queries, + search suggestions, and loading of external content (images, audio, etc). + + Attributes: + normal_ua -- the user's current user agent + root_path -- the root path of the whoogle instance + config -- the user's current whoogle configuration + """ + def __init__(self, normal_ua, root_path, config: Config): + # Send heartbeat to Tor, used in determining if the user can or cannot + # enable Tor for future requests + send_tor_signal(Signal.HEARTBEAT) + + self.language = config.lang_search self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua self.modified_user_agent = gen_user_agent(self.mobile) + # Set up proxy, if previously configured + if os.environ.get('WHOOGLE_PROXY_LOC'): + auth_str = '' + if os.environ.get('WHOOGLE_PROXY_USER'): + auth_str = os.environ.get('WHOOGLE_PROXY_USER') + \ + ':' + os.environ.get('WHOOGLE_PROXY_PASS') + self.proxies = { + 'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' + + auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC'), + } + self.proxies['https'] = self.proxies['http'].replace('http', 'https') + else: + self.proxies = { + 'http': 'socks5://127.0.0.1:9050', + 'https': 'socks5://127.0.0.1:9050' + } if config.tor else {} + self.tor = config.tor + self.tor_valid = False + self.root_path = root_path + def __getitem__(self, name): return getattr(self, name) - def autocomplete(self, query): + def autocomplete(self, query) -> list: + """Sends a query to Google's search suggestion service + + Args: + query: The in-progress query to send + + Returns: + list: The list of matches for possible search suggestions + + """ ac_query = dict(hl=self.language, q=query) response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text @@ -103,9 +177,45 @@ class Request: return [] - def send(self, base_url=SEARCH_URL, query='') -> Response: + def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response: + """Sends an outbound request to a URL. Optionally sends the request using Tor, if + enabled by the user. + + Args: + base_url: The URL to use in the request + query: The optional query string for the request + attempt: The number of attempts made for the request (used for cycling + through Tor identities, if enabled) + + Returns: + Response: The Response object returned by the requests call + + """ headers = { 'User-Agent': self.modified_user_agent } - return requests.get(base_url + query, headers=headers) + # Validate Tor connection and request new identity if the last one failed + if self.tor and not send_tor_signal(Signal.NEWNYM if attempt > 0 else Signal.HEARTBEAT): + raise TorError("Tor was previously enabled, but the connection has been dropped. Please check your " + + "Tor configuration and try again.", disable=True) + + # Make sure that the tor connection is valid, if enabled + if self.tor: + tor_check = requests.get('https://check.torproject.org/', proxies=self.proxies, headers=headers) + self.tor_valid = 'Congratulations' in tor_check.text + + if not self.tor_valid: + raise TorError("Tor connection succeeded, but the connection could not be validated by torproject.org", + disable=True) + + response = requests.get(base_url + query, proxies=self.proxies, headers=headers) + + # Retry query with new identity if using Tor (max 10 attempts) + if 'form id="captcha-form"' in response.text and self.tor: + attempt += 1 + if attempt > 10: + raise TorError("Tor query failed -- max attempts exceeded 10") + return self.send(base_url, query, attempt) + + return response diff --git a/app/routes.py b/app/routes.py index d983c2c..24a422d 100644 --- a/app/routes.py +++ b/app/routes.py @@ -9,12 +9,12 @@ import uuid from functools import wraps import waitress -from flask import jsonify, make_response, request, redirect, render_template, send_file, session +from flask import jsonify, make_response, request, redirect, render_template, send_file, session, url_for from requests import exceptions from app import app from app.models.config import Config -from app.request import Request +from app.request import Request, TorError from app.utils.session_utils import valid_user_session from app.utils.routing_utils import * @@ -68,7 +68,11 @@ def before_request_func(): if not g.user_config.url: g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root - g.user_request = Request(request.headers.get('User-Agent'), language=g.user_config.lang_search) + g.user_request = Request( + request.headers.get('User-Agent'), + request.url_root, + config=g.user_config) + g.app_location = g.user_config.url @@ -103,12 +107,16 @@ def unknown_page(e): def index(): # Reset keys session['fernet_keys'] = generate_user_keys(g.cookies_disabled) + error_message = session['error_message'] if 'error_message' in session else '' + session['error_message'] = '' return render_template('index.html', languages=Config.LANGUAGES, countries=Config.COUNTRIES, themes=Config.THEMES, config=g.user_config, + error_message=error_message, + tor_available=int(os.environ.get('TOR_AVAILABLE')), version_number=app.config['VERSION_NUMBER']) @@ -129,6 +137,10 @@ def opensearch(): @app.route('/autocomplete', methods=['GET', 'POST']) def autocomplete(): q = g.request_params.get('q') + if not q: + # FF will occasionally (incorrectly) send the q field without a + # mimetype in the format "b'q='" through the request.data field + q = str(request.data).replace('q=', '') # Search bangs if the query begins with "!", but not "! " (feeling lucky) if q.startswith('!') and len(q) > 1 and not q.startswith('! '): @@ -139,7 +151,9 @@ def autocomplete(): elif request.data: q = urlparse.unquote_plus(request.data.decode('utf-8').replace('q=', '')) - return jsonify([q, g.user_request.autocomplete(q)]) + # Return a list of suggestions for the query + # Note: If Tor is enabled, this returns nothing, as the request is almost always rejected + return jsonify([q, g.user_request.autocomplete(q) if not g.user_config.tor else []]) @app.route('/search', methods=['GET', 'POST']) @@ -160,8 +174,14 @@ def search(): return redirect('/') # Generate response and number of external elements from the page - response, elements = search_util.generate_response() - if search_util.feeling_lucky: + try: + response, elements = search_util.generate_response() + except TorError as e: + session['error_message'] = e.message + ("\\n\\nTor config is now disabled!" if e.disable else "") + session['config']['tor'] = False if e.disable else session['config']['tor'] + return redirect(url_for('.index')) + + if search_util.feeling_lucky or elements < 0: return redirect(response, code=303) # Keep count of external elements to fetch before element key can be regenerated @@ -284,6 +304,12 @@ def run_app(): help='Enforces HTTPS redirects for all requests') parser.add_argument('--userpass', default='', metavar='', help='Sets a username/password basic auth combo (default None)') + parser.add_argument('--proxyauth', default='', metavar='', + help='Sets a username/password for a HTTP/SOCKS proxy (default None)') + parser.add_argument('--proxytype', default='', metavar='', + help='Sets a proxy type for all connections (default None)') + parser.add_argument('--proxyloc', default='', metavar='', + help='Sets a proxy location for all connections (default None)') args = parser.parse_args() if args.userpass: @@ -291,6 +317,14 @@ def run_app(): os.environ['WHOOGLE_USER'] = user_pass[0] os.environ['WHOOGLE_PASS'] = user_pass[1] + if args.proxytype and args.proxyloc: + if args.proxyauth: + proxy_user_pass = args.proxyauth.split(':') + os.environ['WHOOGLE_PROXY_USER'] = proxy_user_pass[0] + os.environ['WHOOGLE_PROXY_PASS'] = proxy_user_pass[1] + os.environ['WHOOGLE_PROXY_TYPE'] = args.proxytype + os.environ['WHOOGLE_PROXY_LOC'] = args.proxyloc + os.environ['HTTPS_ONLY'] = '1' if args.https_only else '' if args.debug: diff --git a/app/static/js/autocomplete.js b/app/static/js/autocomplete.js index b8f8bf6..702ebc4 100644 --- a/app/static/js/autocomplete.js +++ b/app/static/js/autocomplete.js @@ -1,6 +1,6 @@ const handleUserInput = searchBar => { let xhrRequest = new XMLHttpRequest(); - xhrRequest.open("POST", "/autocomplete"); + xhrRequest.open("POST", "autocomplete"); xhrRequest.setRequestHeader("Content-type", "application/x-www-form-urlencoded"); xhrRequest.onload = function () { if (xhrRequest.readyState === 4 && xhrRequest.status !== 200) { @@ -123,4 +123,4 @@ const autocomplete = (searchInput, autocompleteResults) => { document.addEventListener("click", function (e) { closeAllLists(e.target); }); -}; \ No newline at end of file +}; diff --git a/app/static/js/controller.js b/app/static/js/controller.js index 156a84d..3ab8ca7 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -1,6 +1,6 @@ // Whoogle configurations that use boolean values and checkboxes CONFIG_BOOLS = [ - "nojs", "dark", "safe", "alts", "new_tab", "get_only" + "nojs", "dark", "safe", "alts", "new_tab", "get_only", "tor" ]; // Whoogle configurations that use string values and input fields @@ -31,7 +31,7 @@ const setupSearchLayout = () => { const fillConfigValues = () => { // Request existing config info let xhrGET = new XMLHttpRequest(); - xhrGET.open("GET", "/config"); + xhrGET.open("GET", "config"); xhrGET.onload = function() { if (xhrGET.readyState === 4 && xhrGET.status !== 200) { alert("Error loading Whoogle config"); @@ -82,7 +82,7 @@ const loadConfig = event => { } let xhrPUT = new XMLHttpRequest(); - xhrPUT.open("PUT", "/config?name=" + config + ".conf"); + xhrPUT.open("PUT", "config?name=" + config + ".conf"); xhrPUT.onload = function() { if (xhrPUT.readyState === 4 && xhrPUT.status !== 200) { alert("Error loading Whoogle config"); @@ -104,7 +104,7 @@ const saveConfig = event => { } let configForm = document.getElementById("config-form"); - configForm.action = '/config?name=' + config + ".conf"; + configForm.action = 'config?name=' + config + ".conf"; configForm.submit(); }; diff --git a/app/templates/display.html b/app/templates/display.html index 2386597..86502fb 100644 --- a/app/templates/display.html +++ b/app/templates/display.html @@ -1,17 +1,17 @@ - - - + + + - + {% if theme %} - - + + {% else %} - - + + {% endif %} {{ query }} - Whoogle Search diff --git a/app/templates/error.html b/app/templates/error.html index 003623d..9546e23 100644 --- a/app/templates/error.html +++ b/app/templates/error.html @@ -3,3 +3,4 @@

Error parsing "{{ query }}"

+Return Home diff --git a/app/templates/header.html b/app/templates/header.html index 875d13b..4ad9d01 100644 --- a/app/templates/header.html +++ b/app/templates/header.html @@ -3,7 +3,7 @@
@@ -22,7 +22,7 @@ {% else %}
@@ -52,4 +52,4 @@ document.getElementById("search-form").submit(); } }); - \ No newline at end of file + diff --git a/app/templates/index.html b/app/templates/index.html index 61a456a..684d1f7 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -1,25 +1,25 @@ - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - + + + + {% if config['theme'] %} @@ -28,13 +28,26 @@ {% else %} {% endif %} + Whoogle Search +
- - + +
@@ -46,7 +59,7 @@
- +
+
+ + +
diff --git a/app/templates/opensearch.xml b/app/templates/opensearch.xml index 8e2e7b2..5b533f4 100644 --- a/app/templates/opensearch.xml +++ b/app/templates/opensearch.xml @@ -9,7 +9,7 @@ - + {{ main_url }}/search diff --git a/app/utils/routing_utils.py b/app/utils/routing_utils.py index c6c960b..a083da2 100644 --- a/app/utils/routing_utils.py +++ b/app/utils/routing_utils.py @@ -7,6 +7,9 @@ from flask import g from typing import Any, Tuple +TOR_BANNER = '

You are using Tor


' + + class RoutingUtils: def __init__(self, request, config, session, cookies_disabled=False): self.request_params = request.args if request.method == 'GET' else request.form @@ -66,10 +69,13 @@ class RoutingUtils: content_filter = Filter(self.session['fernet_keys'], mobile=mobile, config=self.config) full_query = gen_query(self.query, self.request_params, self.config, content_filter.near) - get_body = g.user_request.send(query=full_query).text + get_body = g.user_request.send(query=full_query) # Produce cleanable html soup from response - html_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser') + html_soup = BeautifulSoup(content_filter.reskin(get_body.text), 'html.parser') + html_soup.insert(0, BeautifulSoup( + TOR_BANNER, + features='lxml') if g.user_request.tor_valid else BeautifulSoup("", features="lxml")) if self.feeling_lucky: return get_first_link(html_soup), 1 diff --git a/docker-compose.yml b/docker-compose.yml index 479b7f3..0d58aac 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,6 +4,15 @@ services: whoogle-search: image: benbusby/whoogle-search container_name: whoogle-search + #environment: # Uncomment to configure environment variables + # Basic auth configuration, uncomment to enable + #- WHOOGLE_USER= + #- WHOOGLE_PASS= + # Proxy configuration, uncomment to enable + #- WHOOGLE_PROXY_USER= + #- WHOOGLE_PROXY_PASS= + #- WHOOGLE_PROXY_TYPE= ports: - 5000:5000 restart: unless-stopped diff --git a/misc/tor/start-tor.sh b/misc/tor/start-tor.sh new file mode 100755 index 0000000..19be24a --- /dev/null +++ b/misc/tor/start-tor.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +if [ "$(whoami)" != "root" ]; then + tor -f /etc/tor/torrc +else + service tor start +fi diff --git a/misc/tor/torrc b/misc/tor/torrc new file mode 100644 index 0000000..b162719 --- /dev/null +++ b/misc/tor/torrc @@ -0,0 +1,8 @@ +DataDirectory /var/lib/tor +ControlPort 9051 +CookieAuthentication 1 +DataDirectoryGroupReadable 1 +CookieAuthFileGroupReadable 1 +ExtORPortCookieAuthFileGroupReadable 1 +CacheDirectoryGroupReadable 1 +CookieAuthFile /var/lib/tor/control_auth_cookie diff --git a/requirements.txt b/requirements.txt index 3193972..6e85c67 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,34 @@ +attrs==19.3.0 beautifulsoup4==4.8.2 bs4==0.0.1 +cachelib==0.1 +certifi==2020.4.5.1 cffi==1.13.2 +chardet==3.0.4 Click==7.0 -cryptography==2.8 +cryptography==3.2 Flask==1.1.1 Flask-Session==0.3.2 +idna==2.9 itsdangerous==1.1.0 Jinja2==2.10.3 lxml==4.5.1 MarkupSafe==1.1.1 +more-itertools==8.3.0 +packaging==20.4 +pluggy==0.13.1 +py==1.8.1 pycparser==2.19 pyOpenSSL==19.1.0 +pyparsing==2.4.7 +PySocks==1.7.1 pytest==5.4.1 python-dateutil==2.8.1 requests==2.23.0 -six==1.14.0 soupsieve==1.9.5 -Werkzeug==0.16.0 +stem==1.8.0 +urllib3==1.25.9 waitress==1.4.3 cssutils==1.0.2 +wcwidth==0.1.9 +Werkzeug==0.16.0