From 3a4a8ce95b9eb7d8e2ca6dd9c8db42f5a61dee54 Mon Sep 17 00:00:00 2001 From: mendel5 <60322520+mendel5@users.noreply.github.com> Date: Sun, 10 May 2020 10:53:19 +0200 Subject: [PATCH 01/58] fix spelling: Whoole --> Whoogle --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b6f73ba..338ae73 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ Since the instance is destroyed and rebuilt after inactivity, config settings wi ## FAQ **What's the difference between this and [Searx](https://github.com/asciimoo/searx)?** -Whoogle is intended to only ever be deployed to private instances by individuals of any background, with as little effort as possible. Prior knowledge of/experience with the command line or deploying applications is not necessary to deploy Whoogle, which isn't the case with Searx. As a result, Whoole is missing some features of Searx in order to be as easy to deploy as possible. +Whoogle is intended to only ever be deployed to private instances by individuals of any background, with as little effort as possible. Prior knowledge of/experience with the command line or deploying applications is not necessary to deploy Whoogle, which isn't the case with Searx. As a result, Whoogle is missing some features of Searx in order to be as easy to deploy as possible. Whoogle also only uses Google search results, not Bing/Quant/etc, and uses the existing Google search UI to make the transition away from Google search as unnoticeable as possible. From 2daca9a352ab296a3aa2fe040b9d80b29ed330ee Mon Sep 17 00:00:00 2001 From: Daniel Sendzik Date: Sun, 10 May 2020 14:00:22 +0200 Subject: [PATCH 02/58] Improved Dockerfile --- Dockerfile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 546eede..1eee37f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,13 @@ -FROM python:3 +FROM python:3.8 WORKDIR /usr/src/app -COPY . . +COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt + +COPY . . RUN chmod +x ./whoogle-search +EXPOSE 5000 + CMD ["./whoogle-search"] From 0fc5fa9d99dca95c0c328d6f243bdf0a4bd79647 Mon Sep 17 00:00:00 2001 From: Fabian Schliski Date: Sun, 10 May 2020 16:51:42 +0200 Subject: [PATCH 03/58] Add autofocus to input field Supported in all major browsers, allows the user to immediately start typing after loading the page. --- app/templates/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/templates/index.html b/app/templates/index.html index a221e49..7cc889c 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -28,7 +28,7 @@
- +
From 743caf6cc7d714763509436ebcea912538c7c74b Mon Sep 17 00:00:00 2001 From: Fabian Schliski Date: Sun, 10 May 2020 20:17:32 +0200 Subject: [PATCH 04/58] Updating autofocus value --- app/templates/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/templates/index.html b/app/templates/index.html index 7cc889c..94c7df8 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -28,7 +28,7 @@
- +
From 7ccad2799efce0c966bfcde2df75e46252f2c89c Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Sun, 10 May 2020 13:27:02 -0600 Subject: [PATCH 05/58] Added config option to address instance behind reverse proxy Config options now allow setting a "root url", which defaults to the request url root. Saving a new url in this field will allow for proper redirects and usage of the opensearch element. Also provides a possible solution for #17, where the default flask redirect method redirects to http instead of https. --- app/routes.py | 22 +++++++++++++++------- app/static/css/main.css | 4 ++++ app/static/js/controller.js | 19 ++++++------------- app/templates/index.html | 4 ++++ 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/app/routes.py b/app/routes.py index 5091cfc..09c39a8 100644 --- a/app/routes.py +++ b/app/routes.py @@ -19,12 +19,17 @@ CONFIG_PATH = app.config['STATIC_FOLDER'] + '/config.json' @app.before_request def before_request_func(): g.user_request = Request(request.headers.get('User-Agent')) - g.user_config = json.load(open(CONFIG_PATH)) if os.path.exists(CONFIG_PATH) else {} + g.user_config = json.load(open(CONFIG_PATH)) if os.path.exists(CONFIG_PATH) else {'url': request.url_root} + + if 'url' not in g.user_config or not g.user_config['url']: + g.user_config['url'] = request.url_root + + g.app_location = g.user_config['url'] @app.errorhandler(404) def unknown_page(e): - return redirect('/') + return redirect(g.app_location) @app.route('/', methods=['GET']) @@ -35,11 +40,11 @@ def index(): @app.route('/opensearch.xml', methods=['GET']) def opensearch(): - url_root = request.url_root - if url_root.endswith('/'): - url_root = url_root[:-1] + opensearch_url = g.app_location + if opensearch_url.endswith('/'): + opensearch_url = opensearch_url[:-1] - template = render_template('opensearch.xml', main_url=url_root) + template = render_template('opensearch.xml', main_url=opensearch_url) response = make_response(template) response.headers['Content-Type'] = 'application/xml' return response @@ -78,11 +83,14 @@ def config(): return json.dumps(g.user_config) else: config_data = request.form.to_dict() + if 'url' not in config_data or not config_data['url']: + config_data['url'] = request.url_root + with open(app.config['STATIC_FOLDER'] + '/config.json', 'w') as config_file: config_file.write(json.dumps(config_data, indent=4)) config_file.close() - return redirect('/') + return redirect(config_data['url']) @app.route('/url', methods=['GET']) diff --git a/app/static/css/main.css b/app/static/css/main.css index df6be12..f482373 100644 --- a/app/static/css/main.css +++ b/app/static/css/main.css @@ -113,3 +113,7 @@ button::-moz-focus-inner { -webkit-box-decoration-break: clone; box-decoration-break: clone; } + +.hidden { + display: none; +} diff --git a/app/static/js/controller.js b/app/static/js/controller.js index fed98d8..acf16af 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -15,7 +15,7 @@ const setupSearchLayout = () => { }); } -const fillConfigValues = (near, nojs, dark) => { +const fillConfigValues = (near, nojs, dark, url) => { // Request existing config info let xhrGET = new XMLHttpRequest(); xhrGET.open("GET", "/config"); @@ -29,19 +29,11 @@ const fillConfigValues = (near, nojs, dark) => { let configSettings = JSON.parse(xhrGET.responseText); near.value = configSettings["near"] ? configSettings["near"] : ""; - near.addEventListener("keyup", function() { - configSettings["near"] = near.value; - }); - nojs.checked = !!configSettings["nojs"]; - nojs.addEventListener("change", function() { - configSettings["nojs"] = nojs.checked ? 1 : 0; - }); - dark.checked = !!configSettings["dark"]; - dark.addEventListener("change", function() { - configSettings["dark"] = dark.checked ? 1 : 0; - }); + + // Addresses the issue of incorrect URL being used behind reverse proxy + url.value = configSettings["url"] ? configSettings["url"] : ""; }; xhrGET.send(); @@ -65,8 +57,9 @@ const setupConfigLayout = () => { const near = document.getElementById("config-near"); const noJS = document.getElementById("config-nojs"); const dark = document.getElementById("config-dark"); + const url = document.getElementById("config-url"); - fillConfigValues(near, noJS, dark); + fillConfigValues(near, noJS, dark, url); } document.addEventListener("DOMContentLoaded", function() { diff --git a/app/templates/index.html b/app/templates/index.html index a221e49..b6c12fd 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -53,6 +53,10 @@ +
+ + +
From 122a67d6abca9315de389c7215311c101995f153 Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Sun, 10 May 2020 13:59:37 -0600 Subject: [PATCH 06/58] Added docker-compose.yml --- docker-compose.yml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 docker-compose.yml diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c6c9ced --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,9 @@ +version: "3" + +services: + whoogle-search: + image: benbusby/whoogle-search + container_name: whoogle-search + ports: + - 8888:5000 + restart: unless-stopped From 6e1af0aa9e7010525de3cc4e7b5e8ca17093dfe7 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Mon, 11 May 2020 11:58:12 -0600 Subject: [PATCH 07/58] Update issue templates Updated bug report description, added feature request template --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- .github/ISSUE_TEMPLATE/feature_request.md | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 9bc149f..ea82955 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,6 +1,6 @@ --- name: Bug report -about: Create a bug report to help improve Whoogle +about: Create a bug report to help fix an issue with Whoogle title: "[BUG] " labels: bug assignees: benbusby diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..d89c925 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,17 @@ +--- +name: Feature request +about: Suggest a feature that would improve Whoogle +title: '' +labels: enhancement +assignees: '' + +--- + +**Describe the feature you'd like to see added** +A short description of the feature, and what it would accomplish. + +**Describe which parts of the project this would modify (front end/back end/configuration/etc)** +A short description of which aspects of Whoogle Search would need modification + +**Additional context** +Add any other context or screenshots about the feature request here. From f36a63e3f0b7dc128f0817b73a4a52ca629368d4 Mon Sep 17 00:00:00 2001 From: ygsk10 <60314251+ygsk10@users.noreply.github.com> Date: Tue, 12 May 2020 01:08:35 +0000 Subject: [PATCH 08/58] Update README.md (#22) * Update README.md * Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6e281fe..205274d 100644 --- a/README.md +++ b/README.md @@ -172,8 +172,9 @@ A good solution for this is to set up a simple cronjob on any device at your hom For instance, adding `*/20 7-23 * * * curl https://.herokuapp.com > /home//whoogle-refresh` will fetch the home page of the app every 20 minutes between 7am and midnight, allowing for downtime from midnight to 7am. And again, this wouldn't be a hard limit - you'd still have plenty of remaining hours of uptime each month in case you were searching after this window has closed. -Since the instance is destroyed and rebuilt after inactivity, config settings will be reset once the app enters downtime. If you have configuration settings active that you'd like to keep between periods of downtime (like dark mode for example), you could instead add `*/20 7-23 * * * curl -d "dark=1" -X POST https://.herokuapp.com > /home//whoogle-refresh` to keep these settings more or less permanent, and still keep the app from entering downtime when you're using it. +Since the instance is destroyed and rebuilt after inactivity, config settings will be reset once the app enters downtime. If you have configuration settings active that you'd like to keep between periods of downtime (like dark mode for example), you could instead add `*/20 7-23 * * * curl -d "dark=1" -X POST https://.herokuapp.com/config > /home//whoogle-refresh` to keep these settings more or less permanent, and still keep the app from entering downtime when you're using it. +Available config values are `near`, `nojs`, `dark` and `url` ## FAQ **What's the difference between this and [Searx](https://github.com/asciimoo/searx)?** From dcb2517a6c7f047267370ced73b3a73170ff5b52 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Mon, 11 May 2020 19:12:26 -0600 Subject: [PATCH 09/58] Update issue templates Removed myself as the auto assignee of all bug reports --- .github/ISSUE_TEMPLATE/bug_report.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index ea82955..5cfddc1 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -3,7 +3,7 @@ name: Bug report about: Create a bug report to help fix an issue with Whoogle title: "[BUG] " labels: bug -assignees: benbusby +assignees: '' --- From 445019d204e7e931acc1d4c3b1b478df2803c4b4 Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Tue, 12 May 2020 00:45:56 -0600 Subject: [PATCH 10/58] Fixed RAM usage bug Pushing straight to master since this is an extremely simple fix, with a pretty large performance benefit. The Phyme library used for generating a User Agent rhyme was consuming an absolute unit of memory. Now that it's removed, it's using about 10x less memory, at the cost of User Agents being not as funny anymore. --- app/request.py | 8 ++++---- app/rhyme.py | 25 ------------------------- requirements.txt | 1 - 3 files changed, 4 insertions(+), 30 deletions(-) delete mode 100644 app/rhyme.py diff --git a/app/request.py b/app/request.py index 471f4f4..aebc716 100644 --- a/app/request.py +++ b/app/request.py @@ -1,6 +1,6 @@ -from app import rhyme from io import BytesIO import pycurl +import random import urllib.parse as urlparse # Base search url @@ -16,9 +16,9 @@ VALID_PARAMS = ['tbs', 'tbm', 'start', 'near'] def gen_user_agent(normal_ua): is_mobile = 'Android' in normal_ua or 'iPhone' in normal_ua - mozilla = rhyme.get_rhyme('Mo') + rhyme.get_rhyme('zilla') - firefox = rhyme.get_rhyme('Fire') + rhyme.get_rhyme('fox') - linux = rhyme.get_rhyme('Lin') + 'ux' + mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla' + firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox' + linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux' if is_mobile: return MOBILE_UA.format(mozilla, firefox) diff --git a/app/rhyme.py b/app/rhyme.py deleted file mode 100644 index 23b9137..0000000 --- a/app/rhyme.py +++ /dev/null @@ -1,25 +0,0 @@ -import itertools -from Phyme import Phyme -import random -import sys -import time - -random.seed(time.time()) - -ph = Phyme() - - -def get_rhyme(word): - # Get all rhymes and merge to one list (normally separated by syllable count) - rhymes = ph.get_perfect_rhymes(word) - rhyme_vals = list(itertools.chain.from_iterable(list(rhymes.values()))) - - # Pick a random rhyme and strip out any non alpha characters - rhymed_word = rhyme_vals[random.randint(0, len(rhyme_vals) - 1)] - rhymed_word = ''.join(letter for letter in rhymed_word if letter.isalpha()) - - return rhymed_word.capitalize() - - -if __name__ == '__main__': - print(get_rhyme(sys.argv[1])) diff --git a/requirements.txt b/requirements.txt index 02e43ce..5a6a937 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,6 @@ Flask==1.1.1 itsdangerous==1.1.0 Jinja2==2.10.3 MarkupSafe==1.1.1 -Phyme==0.0.9 pycparser==2.19 pycurl==7.43.0.4 pyOpenSSL==19.1.0 From f7e69bded17fd02b4d8bdf7dceefe81e16e0ee66 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Tue, 12 May 2020 00:51:00 -0600 Subject: [PATCH 11/58] Minor punctuation fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 205274d..818289b 100644 --- a/README.md +++ b/README.md @@ -174,7 +174,7 @@ For instance, adding `*/20 7-23 * * * curl https://.heroku Since the instance is destroyed and rebuilt after inactivity, config settings will be reset once the app enters downtime. If you have configuration settings active that you'd like to keep between periods of downtime (like dark mode for example), you could instead add `*/20 7-23 * * * curl -d "dark=1" -X POST https://.herokuapp.com/config > /home//whoogle-refresh` to keep these settings more or less permanent, and still keep the app from entering downtime when you're using it. -Available config values are `near`, `nojs`, `dark` and `url` +Available config values are `near`, `nojs`, `dark` and `url`. ## FAQ **What's the difference between this and [Searx](https://github.com/asciimoo/searx)?** From 14a41a89b65e94e0502ac306b3688ae568615c33 Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Tue, 12 May 2020 22:32:16 +0100 Subject: [PATCH 12/58] No need to chmod `whoogle-search` script inside container (#33) --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 1eee37f..d5fa0a2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,6 @@ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY . . -RUN chmod +x ./whoogle-search EXPOSE 5000 From f700ed88e77a2eebafcc73cf1d41fb22e2ddc210 Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Wed, 13 May 2020 00:14:55 +0100 Subject: [PATCH 13/58] Swap out Flask's default web server for Waitress (#32) * Ignore venv when building docker file * Remove reference to 8888 port It wasn't really used anywhere, and setting it to 5000 everywhere removes ambiguity, and makes things easier to track and reason about * Use waitress rather than Flask's built in web server It's not production grade * Actually add waitress to requirements Woops! --- .dockerignore | 1 + README.md | 10 +++++----- app/__main__.py | 3 +++ app/routes.py | 13 ++++++++----- docker-compose.yml | 2 +- requirements.txt | 1 + whoogle-search | 4 +--- 7 files changed, 20 insertions(+), 14 deletions(-) create mode 100644 app/__main__.py diff --git a/.dockerignore b/.dockerignore index 2d2ecd6..80b070a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,2 @@ .git/ +venv/ diff --git a/README.md b/README.md index 818289b..4a2afd8 100644 --- a/README.md +++ b/README.md @@ -57,11 +57,11 @@ Provides: - Downtime after periods of inactivity \([solution](https://github.com/benbusby/whoogle-search#prevent-downtime-heroku-only)\) ### B) [pipx](https://github.com/pipxproject/pipx#install-pipx) -Persistent install: +Persistent install: `pipx install git+https://github.com/benbusby/whoogle-search.git` -Sandboxed temporary instance: +Sandboxed temporary instance: `pipx run git+https://github.com/benbusby/whoogle-search.git whoogle-search` @@ -76,7 +76,7 @@ Whoogle Search console runner optional arguments: -h, --help show this help message and exit - --port Specifies a port to run on (default 8888) + --port Specifies a port to run on (default 5000) --host Specifies the host address to use (default 127.0.0.1) --debug Activates debug mode for the Flask server (default False) ``` @@ -104,7 +104,7 @@ pip install -r requirements.txt git clone https://github.com/benbusby/whoogle-search.git cd whoogle-search docker build --tag whooglesearch:1.0 . -docker run --publish 8888:5000 --detach --name whooglesearch whooglesearch:1.0 +docker run --publish 5000:5000 --detach --name whooglesearch whooglesearch:1.0 ``` And kill with: `docker rm --force whooglesearch` @@ -172,7 +172,7 @@ A good solution for this is to set up a simple cronjob on any device at your hom For instance, adding `*/20 7-23 * * * curl https://.herokuapp.com > /home//whoogle-refresh` will fetch the home page of the app every 20 minutes between 7am and midnight, allowing for downtime from midnight to 7am. And again, this wouldn't be a hard limit - you'd still have plenty of remaining hours of uptime each month in case you were searching after this window has closed. -Since the instance is destroyed and rebuilt after inactivity, config settings will be reset once the app enters downtime. If you have configuration settings active that you'd like to keep between periods of downtime (like dark mode for example), you could instead add `*/20 7-23 * * * curl -d "dark=1" -X POST https://.herokuapp.com/config > /home//whoogle-refresh` to keep these settings more or less permanent, and still keep the app from entering downtime when you're using it. +Since the instance is destroyed and rebuilt after inactivity, config settings will be reset once the app enters downtime. If you have configuration settings active that you'd like to keep between periods of downtime (like dark mode for example), you could instead add `*/20 7-23 * * * curl -d "dark=1" -X POST https://.herokuapp.com/config > /home//whoogle-refresh` to keep these settings more or less permanent, and still keep the app from entering downtime when you're using it. Available config values are `near`, `nojs`, `dark` and `url`. ## FAQ diff --git a/app/__main__.py b/app/__main__.py new file mode 100644 index 0000000..03a424c --- /dev/null +++ b/app/__main__.py @@ -0,0 +1,3 @@ +from .routes import run_app + +run_app() diff --git a/app/routes.py b/app/routes.py index 09c39a8..7886de9 100644 --- a/app/routes.py +++ b/app/routes.py @@ -9,6 +9,7 @@ import io import json import os import urllib.parse as urlparse +import waitress app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) @@ -146,12 +147,14 @@ def window(): def run_app(): parser = argparse.ArgumentParser(description='Whoogle Search console runner') - parser.add_argument('--port', default=8888, metavar='', - help='Specifies a port to run on (default 8888)') + parser.add_argument('--port', default=5000, metavar='', + help='Specifies a port to run on (default 5000)') parser.add_argument('--host', default='127.0.0.1', metavar='', help='Specifies the host address to use (default 127.0.0.1)') parser.add_argument('--debug', default=False, action='store_true', - help='Activates debug mode for the Flask server (default False)') + help='Activates debug mode for the server (default False)') args = parser.parse_args() - - app.run(host=args.host, port=args.port, debug=args.debug) + if args.debug: + app.run(host=args.host, port=args.port, debug=args.debug) + else: + waitress.serve(app, listen="{}:{}".format(args.host, args.port)) diff --git a/docker-compose.yml b/docker-compose.yml index c6c9ced..479b7f3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,5 +5,5 @@ services: image: benbusby/whoogle-search container_name: whoogle-search ports: - - 8888:5000 + - 5000:5000 restart: unless-stopped diff --git a/requirements.txt b/requirements.txt index 5a6a937..eac1d8b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,4 @@ python-dateutil==2.8.1 six==1.14.0 soupsieve==1.9.5 Werkzeug==0.16.0 +waitress==1.4.3 diff --git a/whoogle-search b/whoogle-search index e8bcc7f..3393528 100755 --- a/whoogle-search +++ b/whoogle-search @@ -17,11 +17,9 @@ export STATIC_FOLDER=$APP_ROOT/static mkdir -p $STATIC_FOLDER -pkill flask - # Check for regular vs test run if [[ $SUBDIR == "test" ]]; then pytest -sv else - flask run --host="0.0.0.0" --port=$PORT + python3 -m app --port $PORT fi From a11ceb0a57124d5a6b4ac9a74b4cfd4bf8578cd0 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Tue, 12 May 2020 17:15:53 -0600 Subject: [PATCH 14/58] Feature: language config (#27) * Added language configuration support Main page now has a dropdown for selecting preferred language of results. Refactored config to be its own model with language constants. * Added more language support Interface language is now updated using the "hl" arg Fixed chinese traditional and simplified values Updated decoding of characters to gb2312 * Updated to use conditional decoding dependent on language * Updated filter to not rely on valid config to work properly --- app/filter.py | 2 +- app/models/__init__.py | 0 app/models/config.py | 74 ++++++++++++++++++++++++++++++++++++++++ app/request.py | 16 +++++++-- app/routes.py | 24 ++++++++----- app/templates/index.html | 13 +++++++ 6 files changed, 116 insertions(+), 13 deletions(-) create mode 100644 app/models/__init__.py create mode 100644 app/models/config.py diff --git a/app/filter.py b/app/filter.py index 64e35e7..a77116c 100644 --- a/app/filter.py +++ b/app/filter.py @@ -19,7 +19,7 @@ class Filter: if config is None: config = {} - self.near = config['near'] if 'near' in config else None + self.near = config['near'] if 'near' in config else '' self.dark = config['dark'] if 'dark' in config else False self.nojs = config['nojs'] if 'nojs' in config else False self.mobile = mobile diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/models/config.py b/app/models/config.py new file mode 100644 index 0000000..ee3c4ac --- /dev/null +++ b/app/models/config.py @@ -0,0 +1,74 @@ + +class Config: + # Derived from here: + # https://sites.google.com/site/tomihasa/google-language-codes#searchlanguage + LANGUAGES = [ + {'name': 'English', 'value': 'lang_en'}, + {'name': 'Afrikaans', 'value': 'lang_af'}, + {'name': 'Arabic', 'value': 'lang_ar'}, + {'name': 'Armenian', 'value': 'lang_hy'}, + {'name': 'Belarusian', 'value': 'lang_be'}, + {'name': 'Bulgarian', 'value': 'lang_bg'}, + {'name': 'Catalan', 'value': 'lang_ca'}, + {'name': 'Chinese (Simplified)', 'value': 'lang_zh-CN'}, + {'name': 'Chinese (Traditional)', 'value': 'lang_zh-TW'}, + {'name': 'Croatian', 'value': 'lang_hr'}, + {'name': 'Czech', 'value': 'lang_cs'}, + {'name': 'Danish', 'value': 'lang_da'}, + {'name': 'Dutch', 'value': 'lang_nl'}, + {'name': 'Esperanto', 'value': 'lang_eo'}, + {'name': 'Estonian', 'value': 'lang_et'}, + {'name': 'Filipino', 'value': 'lang_tl'}, + {'name': 'Finnish', 'value': 'lang_fi'}, + {'name': 'French', 'value': 'lang_fr'}, + {'name': 'German', 'value': 'lang_de'}, + {'name': 'Greek', 'value': 'lang_el'}, + {'name': 'Hebrew', 'value': 'lang_iw'}, + {'name': 'Hindi', 'value': 'lang_hi'}, + {'name': 'Hungarian', 'value': 'lang_hu'}, + {'name': 'Icelandic', 'value': 'lang_is'}, + {'name': 'Indonesian', 'value': 'lang_id'}, + {'name': 'Italian', 'value': 'lang_it'}, + {'name': 'Japanese', 'value': 'lang_ja'}, + {'name': 'Korean', 'value': 'lang_ko'}, + {'name': 'Latvian', 'value': 'lang_lv'}, + {'name': 'Lithuanian', 'value': 'lang_lt'}, + {'name': 'Norwegian', 'value': 'lang_no'}, + {'name': 'Persian', 'value': 'lang_fa'}, + {'name': 'Polish', 'value': 'lang_pl'}, + {'name': 'Portuguese', 'value': 'lang_pt'}, + {'name': 'Romanian', 'value': 'lang_ro'}, + {'name': 'Russian', 'value': 'lang_ru'}, + {'name': 'Serbian', 'value': 'lang_sr'}, + {'name': 'Slovak', 'value': 'lang_sk'}, + {'name': 'Slovenian', 'value': 'lang_sl'}, + {'name': 'Spanish', 'value': 'lang_es'}, + {'name': 'Swahili', 'value': 'lang_sw'}, + {'name': 'Swedish', 'value': 'lang_sv'}, + {'name': 'Thai', 'value': 'lang_th'}, + {'name': 'Turkish', 'value': 'lang_tr'}, + {'name': 'Ukrainian', 'value': 'lang_uk'}, + {'name': 'Vietnamese', 'value': 'lang_vi'}, + ] + + def __init__(self, **kwargs): + self.url = '' + self.lang = 'lang_en' + self.dark = False + self.nojs = False + self.near = '' + + for key, value in kwargs.items(): + setattr(self, key, value) + + def __getitem__(self, name): + return getattr(self, name) + + def __setitem__(self, name, value): + return setattr(self, name, value) + + def __delitem__(self, name): + return delattr(self, name) + + def __contains__(self, name): + return hasattr(self, name) \ No newline at end of file diff --git a/app/request.py b/app/request.py index aebc716..f50bfe4 100644 --- a/app/request.py +++ b/app/request.py @@ -26,7 +26,7 @@ def gen_user_agent(normal_ua): return DESKTOP_UA.format(mozilla, linux, firefox) -def gen_query(query, args, near_city=None): +def gen_query(query, args, near_city=None, language='lang_en'): param_dict = {key: '' for key in VALID_PARAMS} # Use :past(hour/day/week/month/year) if available # example search "new restaurants :past month" @@ -49,6 +49,9 @@ def gen_query(query, args, near_city=None): if near_city is not None: param_dict['near'] = '&near=' + urlparse.quote(near_city) + # Set language for results (lr) and interface (hl) + param_dict['lr'] = '&lr=' + language + '&hl=' + language.replace('lang_', '') + for val in param_dict.values(): if not val or val is None: continue @@ -58,12 +61,19 @@ def gen_query(query, args, near_city=None): class Request: - def __init__(self, normal_ua): + def __init__(self, normal_ua, language='lang_en'): self.modified_user_agent = gen_user_agent(normal_ua) + self.language = language def __getitem__(self, name): return getattr(self, name) + def get_decode_value(self): + if 'lang_zh' in self.language: + return 'gb2312' + else: + return 'unicode-escape' + def send(self, base_url=SEARCH_URL, query='', return_bytes=False): response_header = [] @@ -80,4 +90,4 @@ class Request: if return_bytes: return b_obj.getvalue() else: - return b_obj.getvalue().decode('unicode-escape', 'ignore') + return b_obj.getvalue().decode(self.get_decode_value(), 'ignore') diff --git a/app/routes.py b/app/routes.py index 7886de9..ba2394c 100644 --- a/app/routes.py +++ b/app/routes.py @@ -1,5 +1,6 @@ from app import app from app.filter import Filter +from app.models.config import Config from app.request import Request, gen_query import argparse from bs4 import BeautifulSoup @@ -19,13 +20,14 @@ CONFIG_PATH = app.config['STATIC_FOLDER'] + '/config.json' @app.before_request def before_request_func(): - g.user_request = Request(request.headers.get('User-Agent')) - g.user_config = json.load(open(CONFIG_PATH)) if os.path.exists(CONFIG_PATH) else {'url': request.url_root} + json_config = json.load(open(CONFIG_PATH)) if os.path.exists(CONFIG_PATH) else {'url': request.url_root} + g.user_config = Config(**json_config) - if 'url' not in g.user_config or not g.user_config['url']: - g.user_config['url'] = request.url_root + if not g.user_config.url: + g.user_config.url = request.url_root - g.app_location = g.user_config['url'] + g.user_request = Request(request.headers.get('User-Agent'), language=g.user_config.lang) + g.app_location = g.user_config.url @app.errorhandler(404) @@ -35,8 +37,12 @@ def unknown_page(e): @app.route('/', methods=['GET']) def index(): - bg = '#000' if 'dark' in g.user_config and g.user_config['dark'] else '#fff' - return render_template('index.html', bg=bg, ua=g.user_request.modified_user_agent) + bg = '#000' if g.user_config.dark else '#fff' + return render_template('index.html', + bg=bg, + ua=g.user_request.modified_user_agent, + languages=Config.LANGUAGES, + current_lang=g.user_config.lang) @app.route('/opensearch.xml', methods=['GET']) @@ -69,7 +75,7 @@ def search(): mobile = 'Android' in user_agent or 'iPhone' in user_agent content_filter = Filter(mobile, g.user_config, secret_key=app.secret_key) - full_query = gen_query(q, request_params, content_filter.near) + full_query = gen_query(q, request_params, content_filter.near, language=g.user_config.lang) get_body = g.user_request.send(query=full_query) results = content_filter.reskin(get_body) @@ -81,7 +87,7 @@ def search(): @app.route('/config', methods=['GET', 'POST']) def config(): if request.method == 'GET': - return json.dumps(g.user_config) + return json.dumps(g.user_config.__dict__) else: config_data = request.form.to_dict() if 'url' not in config_data or not config_data['url']: diff --git a/app/templates/index.html b/app/templates/index.html index 8332a51..89c38d3 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -41,6 +41,19 @@ User Agent: {{ ua }} +
+ + +
From db7cf7381bcbc19d4ef85aa1945f9d559d7f7d69 Mon Sep 17 00:00:00 2001 From: Whitney B Date: Tue, 12 May 2020 22:32:15 -0700 Subject: [PATCH 15/58] Fixed missing host parameter bug. (#38) Co-authored-by: Whitney Burian --- whoogle-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whoogle-search b/whoogle-search index 3393528..b4f229c 100755 --- a/whoogle-search +++ b/whoogle-search @@ -21,5 +21,5 @@ mkdir -p $STATIC_FOLDER if [[ $SUBDIR == "test" ]]; then pytest -sv else - python3 -m app --port $PORT + python3 -um app --host 0.0.0.0 --port $PORT fi From f4bd3df2bb65e76d7eabe0e53a6798c9e4dba89e Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Wed, 13 May 2020 00:19:51 -0600 Subject: [PATCH 16/58] Added option to search only via GET request (#36) This addresses #18, which brought up the issue of searching with Whoogle with the search instance set to always use a specific container in Firefox Container Tabs. Could also be useful if you want to share your search results or something, I guess. Though nobody likes when people do that. --- app/models/config.py | 1 + app/routes.py | 7 +++++-- app/static/js/controller.js | 25 ++++++++++++++----------- app/templates/index.html | 6 +++++- app/templates/opensearch.xml | 2 +- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/app/models/config.py b/app/models/config.py index ee3c4ac..1c866d7 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -57,6 +57,7 @@ class Config: self.dark = False self.nojs = False self.near = '' + self.get_only = False for key, value in kwargs.items(): setattr(self, key, value) diff --git a/app/routes.py b/app/routes.py index ba2394c..6b48473 100644 --- a/app/routes.py +++ b/app/routes.py @@ -42,7 +42,8 @@ def index(): bg=bg, ua=g.user_request.modified_user_agent, languages=Config.LANGUAGES, - current_lang=g.user_config.lang) + current_lang=g.user_config.lang, + request_type='get' if g.user_config.get_only else 'post') @app.route('/opensearch.xml', methods=['GET']) @@ -51,7 +52,9 @@ def opensearch(): if opensearch_url.endswith('/'): opensearch_url = opensearch_url[:-1] - template = render_template('opensearch.xml', main_url=opensearch_url) + template = render_template('opensearch.xml', + main_url=opensearch_url, + request_type='get' if g.user_config.get_only else 'post') response = make_response(template) response.headers['Content-Type'] = 'application/xml' return response diff --git a/app/static/js/controller.js b/app/static/js/controller.js index acf16af..e950eb5 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -13,9 +13,16 @@ const setupSearchLayout = () => { searchBtn.click(); } }); -} +}; + +const fillConfigValues = () => { + // Establish all config value elements + const near = document.getElementById("config-near"); + const noJS = document.getElementById("config-nojs"); + const dark = document.getElementById("config-dark"); + const url = document.getElementById("config-url"); + const getOnly = document.getElementById("config-get-only"); -const fillConfigValues = (near, nojs, dark, url) => { // Request existing config info let xhrGET = new XMLHttpRequest(); xhrGET.open("GET", "/config"); @@ -29,15 +36,16 @@ const fillConfigValues = (near, nojs, dark, url) => { let configSettings = JSON.parse(xhrGET.responseText); near.value = configSettings["near"] ? configSettings["near"] : ""; - nojs.checked = !!configSettings["nojs"]; + noJS.checked = !!configSettings["nojs"]; dark.checked = !!configSettings["dark"]; + getOnly.checked = !!configSettings["get_only"]; // Addresses the issue of incorrect URL being used behind reverse proxy url.value = configSettings["url"] ? configSettings["url"] : ""; }; xhrGET.send(); -} +}; const setupConfigLayout = () => { // Setup whoogle config @@ -54,13 +62,8 @@ const setupConfigLayout = () => { content.classList.toggle("open"); }); - const near = document.getElementById("config-near"); - const noJS = document.getElementById("config-nojs"); - const dark = document.getElementById("config-dark"); - const url = document.getElementById("config-url"); - - fillConfigValues(near, noJS, dark, url); -} + fillConfigValues(); +}; document.addEventListener("DOMContentLoaded", function() { setTimeout(function() { diff --git a/app/templates/index.html b/app/templates/index.html index 89c38d3..222fa17 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -26,7 +26,7 @@
-
+
@@ -66,6 +66,10 @@
+
+ + +
diff --git a/app/templates/opensearch.xml b/app/templates/opensearch.xml index 82254b2..bf44b00 100644 --- a/app/templates/opensearch.xml +++ b/app/templates/opensearch.xml @@ -4,7 +4,7 @@ Whoogle: A lightweight, deployable Google search proxy for desktop/mobile that removes Javascript, AMP links, and ads UTF-8 /static/img/favicon/favicon-32x32.png - + From ab4715314195d3c1896b7997c98fdaccd3eb27f3 Mon Sep 17 00:00:00 2001 From: Jake Howard Date: Wed, 13 May 2020 19:56:35 +0100 Subject: [PATCH 17/58] Use slim version of docker container (#34) * Use slim version of docker container This massively reduces the size of the final container (330mb -> 60mb) * Install libcurl and libssl explicitly in container They appear to be missing from `-slim` variant containers Co-authored-by: Ben Busby Co-authored-by: Ben Busby --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index d5fa0a2..4a8acf5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ -FROM python:3.8 +FROM python:3.8-slim WORKDIR /usr/src/app - +RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev libssl-dev COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt From 87f0a8d4968056d8051ee9cc8ddd5223bb004e2c Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Wed, 13 May 2020 18:27:04 -0600 Subject: [PATCH 18/58] Added volume mounted config to Dockerfile (#39) --- Dockerfile | 5 +++++ app/routes.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4a8acf5..f3438aa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,6 +5,11 @@ RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev li COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +ARG config_dir=/config +RUN mkdir $config_dir +VOLUME $config_dir +ENV CONFIG_VOLUME=$config_dir + COPY . . EXPOSE 5000 diff --git a/app/routes.py b/app/routes.py index 6b48473..4fa3c93 100644 --- a/app/routes.py +++ b/app/routes.py @@ -15,7 +15,7 @@ import waitress app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) -CONFIG_PATH = app.config['STATIC_FOLDER'] + '/config.json' +CONFIG_PATH = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json' @app.before_request @@ -96,7 +96,7 @@ def config(): if 'url' not in config_data or not config_data['url']: config_data['url'] = request.url_root - with open(app.config['STATIC_FOLDER'] + '/config.json', 'w') as config_file: + with open(CONFIG_PATH, 'w') as config_file: config_file.write(json.dumps(config_data, indent=4)) config_file.close() From e3d002f6c13f2670b0d5a046fe25056490ff683e Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Fri, 15 May 2020 10:07:11 -0600 Subject: [PATCH 19/58] Updated readme with more docker instructions --- README.md | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4a2afd8..d22d1d7 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Build Status](https://travis-ci.com/benbusby/whoogle-search.svg?branch=master)](https://travis-ci.com/benbusby/whoogle-search) [![codebeat badge](https://codebeat.co/badges/e96cada2-fb6f-4528-8285-7d72abd74e8d)](https://codebeat.co/projects/github-com-benbusby-shoogle-master) +![Docker Pulls](https://img.shields.io/docker/pulls/benbusby/whoogle-search) Get Google search results, but without any ads, javascript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile. @@ -100,11 +101,27 @@ pip install -r requirements.txt 2. Clone and deploy the docker app using a method below: #### Docker CLI +Through Docker Hub: +```bash +docker pull benbusby/whoogle-search +docker run --publish 5000:5000 --detach --name whoogle-search whoogle-search:latest +``` + +or with docker-compose: + ```bash git clone https://github.com/benbusby/whoogle-search.git cd whoogle-search -docker build --tag whooglesearch:1.0 . -docker run --publish 5000:5000 --detach --name whooglesearch whooglesearch:1.0 +docker-compose up +``` + +or by building yourself: + +```bash +git clone https://github.com/benbusby/whoogle-search.git +cd whoogle-search +docker build --tag whoogle-search:1.0 . +docker run --publish 5000:5000 --detach --name whoogle-search whoogle-search:1.0 ``` And kill with: `docker rm --force whooglesearch` From afd5b9aa835e7f4d1e0e3cbd01bcaf3ccdf02af1 Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Fri, 15 May 2020 14:17:16 -0600 Subject: [PATCH 20/58] Minor fix to dark mode on img results --- app/filter.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/app/filter.py b/app/filter.py index a77116c..006cbac 100644 --- a/app/filter.py +++ b/app/filter.py @@ -78,7 +78,7 @@ class Filter: # Special rebranding for image search results if img_src.startswith(LOGO_URL): img['src'] = '/static/img/logo.png' - img['height'] = 40 + img['style'] = 'height:40px;width:162px' else: img['src'] = BLANK_B64 @@ -114,9 +114,15 @@ class Filter: # Set up dark mode if active if self.dark: - soup.find('html')['style'] = 'scrollbar-color: #333 #111;' + soup.find('html')['style'] = 'scrollbar-color: #333 #111;color:#fff !important;background:#000 !important' for input_element in soup.findAll('input'): - input_element['style'] = 'color:#fff;' + input_element['style'] = 'color:#fff;background:#000;' + + for span_element in soup.findAll('span'): + span_element['style'] = 'color: white;' + + for href_element in soup.findAll('a'): + href_element['style'] = 'color: white' if href_element['href'].startswith('/search') else '' def update_links(self, soup): # Replace hrefs with only the intended destination (no "utm" type tags) From 1ed6178e9a50d961ddff4aa13f772937d07c4981 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 15 May 2020 15:44:50 -0600 Subject: [PATCH 21/58] Feature: https only -- adds option to enforce https on running instances (#48) * Adding HTTPS enforcement Command line runs of Whoogle Search through pip/pipx/etc will need the `--https-only` flag appended to the run command. Docker runs require the `use_https` build arg applied. * Update README.md Moved https-only note to top of docker run command, updated pip runner help output * Dockerfile: removed HTTPS enforcement, updated PORT setting Dockerfile no longer enforces an HTTPS connection, but still allows for setting via a build arg. The Flask server port is now configurable as a build arg as well, by setting a port number to "whoogle_port" * Fixed incorrect port assignment --- Dockerfile | 8 +++++++- README.md | 6 ++++-- app/routes.py | 10 ++++++++++ whoogle-search | 2 +- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index f3438aa..18551cc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,8 +10,14 @@ RUN mkdir $config_dir VOLUME $config_dir ENV CONFIG_VOLUME=$config_dir +ARG use_https='' +ENV HTTPS_ONLY=$use_https + +ARG whoogle_port=5000 +ENV EXPOSE_PORT=$whoogle_port + COPY . . -EXPOSE 5000 +EXPOSE $EXPOSE_PORT CMD ["./whoogle-search"] diff --git a/README.md b/README.md index d22d1d7..95de42c 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ Sandboxed temporary instance: ```bash $ whoogle-search --help usage: whoogle-search [-h] [--port ] [--host ] [--debug] + [--https-only] Whoogle Search console runner @@ -79,7 +80,8 @@ optional arguments: -h, --help show this help message and exit --port Specifies a port to run on (default 5000) --host Specifies the host address to use (default 127.0.0.1) - --debug Activates debug mode for the Flask server (default False) + --debug Activates debug mode for the server (default False) + --https-only Enforces HTTPS redirects for all requests (default False) ``` ### D) Manual @@ -124,7 +126,7 @@ docker build --tag whoogle-search:1.0 . docker run --publish 5000:5000 --detach --name whoogle-search whoogle-search:1.0 ``` -And kill with: `docker rm --force whooglesearch` +And kill with: `docker rm --force whoogle-search` #### Using [Heroku CLI](https://devcenter.heroku.com/articles/heroku-cli) ```bash diff --git a/app/routes.py b/app/routes.py index 4fa3c93..94a10c3 100644 --- a/app/routes.py +++ b/app/routes.py @@ -20,6 +20,12 @@ CONFIG_PATH = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config @app.before_request def before_request_func(): + # Always redirect to https if HTTPS_ONLY is set + if os.getenv('HTTPS_ONLY', False) and request.url.startswith('http://'): + url = request.url.replace('http://', 'https://', 1) + code = 301 + return redirect(url, code=code) + json_config = json.load(open(CONFIG_PATH)) if os.path.exists(CONFIG_PATH) else {'url': request.url_root} g.user_config = Config(**json_config) @@ -162,7 +168,11 @@ def run_app(): help='Specifies the host address to use (default 127.0.0.1)') parser.add_argument('--debug', default=False, action='store_true', help='Activates debug mode for the server (default False)') + parser.add_argument('--https-only', default=False, action='store_true', + help='Enforces HTTPS redirects for all requests') args = parser.parse_args() + os.environ['HTTPS_ONLY'] = '1' if args.https_only else '' + if args.debug: app.run(host=args.host, port=args.port, debug=args.debug) else: diff --git a/whoogle-search b/whoogle-search index b4f229c..f87c61a 100755 --- a/whoogle-search +++ b/whoogle-search @@ -7,7 +7,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd -P)" # Set default port if unavailable if [[ -z "${PORT}" ]]; then - PORT=5000 + PORT="${EXPOSE_PORT:-5000}" fi # Set directory to serve static content from From 0e9bbc737d3197ff9043e49545dc105e9e785c7c Mon Sep 17 00:00:00 2001 From: Paul Rothrock Date: Fri, 15 May 2020 18:01:26 -0400 Subject: [PATCH 22/58] Add Alfred Instructions (#52) --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 95de42c..67219fd 100644 --- a/README.md +++ b/README.md @@ -173,6 +173,13 @@ Update browser settings: - Click the 3 dot menu in the top right - Navigate to the settings menu and select the "search" sub-menu - Select Whoogle and press "Set as default" + - [Alfred](https://www.alfredapp.com/) (Mac OS X) + 1. Go to `Alfred Preferences` > `Features` > `Web Search` and click `Add Custom Search`. Then configure these settings + - Search URL: `https://\/search?q={query} + - Title: `Whoogle for '{query}'` (or whatever you want) + - Keyword: `whoogle` + + 2. Go to `Default Results` and click the `Setup fallback results` button. Click `+` and add Whoogle, then drag it to the top. - Others (TODO) ### Customizing and Configuration From 31237895841f699234e9d19716d028b93dd584af Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 15 May 2020 16:10:31 -0600 Subject: [PATCH 23/58] Added config option for opening links in new tab (#49) --- app/filter.py | 3 +++ app/models/config.py | 1 + app/static/js/controller.js | 2 ++ app/templates/index.html | 4 ++++ 4 files changed, 10 insertions(+) diff --git a/app/filter.py b/app/filter.py index 006cbac..0cda5d3 100644 --- a/app/filter.py +++ b/app/filter.py @@ -22,6 +22,7 @@ class Filter: self.near = config['near'] if 'near' in config else '' self.dark = config['dark'] if 'dark' in config else False self.nojs = config['nojs'] if 'nojs' in config else False + self.new_tab = config['new_tab'] if 'new_tab' in config else False self.mobile = mobile self.secret_key = secret_key @@ -131,6 +132,8 @@ class Filter: if '/advanced_search' in href: a.decompose() continue + elif self.new_tab: + a['target'] = '_blank' result_link = urlparse.urlparse(href) query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' diff --git a/app/models/config.py b/app/models/config.py index 1c866d7..1a53049 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -57,6 +57,7 @@ class Config: self.dark = False self.nojs = False self.near = '' + self.new_tab = False self.get_only = False for key, value in kwargs.items(): diff --git a/app/static/js/controller.js b/app/static/js/controller.js index e950eb5..02ecdb5 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -21,6 +21,7 @@ const fillConfigValues = () => { const noJS = document.getElementById("config-nojs"); const dark = document.getElementById("config-dark"); const url = document.getElementById("config-url"); + const newTab = document.getElementById("config-new-tab"); const getOnly = document.getElementById("config-get-only"); // Request existing config info @@ -39,6 +40,7 @@ const fillConfigValues = () => { noJS.checked = !!configSettings["nojs"]; dark.checked = !!configSettings["dark"]; getOnly.checked = !!configSettings["get_only"]; + newTab.checked = !!configSettings["new_tab"]; // Addresses the issue of incorrect URL being used behind reverse proxy url.value = configSettings["url"] ? configSettings["url"] : ""; diff --git a/app/templates/index.html b/app/templates/index.html index 222fa17..33f2153 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -66,6 +66,10 @@
+
+ + +
From b4165f99576eace15bb8698a59368e027c762ff4 Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Fri, 15 May 2020 16:29:22 -0600 Subject: [PATCH 24/58] Minor improvement to https enforcement --- app/routes.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/app/routes.py b/app/routes.py index 94a10c3..747b847 100644 --- a/app/routes.py +++ b/app/routes.py @@ -20,8 +20,10 @@ CONFIG_PATH = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config @app.before_request def before_request_func(): - # Always redirect to https if HTTPS_ONLY is set - if os.getenv('HTTPS_ONLY', False) and request.url.startswith('http://'): + # Always redirect to https if HTTPS_ONLY is set (otherwise default to false) + https_only = os.getenv('HTTPS_ONLY', False) + + if https_only and request.url.startswith('http://'): url = request.url.replace('http://', 'https://', 1) code = 301 return redirect(url, code=code) @@ -30,7 +32,7 @@ def before_request_func(): g.user_config = Config(**json_config) if not g.user_config.url: - g.user_config.url = request.url_root + g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root g.user_request = Request(request.headers.get('User-Agent'), language=g.user_config.lang) g.app_location = g.user_config.url @@ -100,7 +102,7 @@ def config(): else: config_data = request.form.to_dict() if 'url' not in config_data or not config_data['url']: - config_data['url'] = request.url_root + config_data['url'] = g.user_config.url with open(CONFIG_PATH, 'w') as config_file: config_file.write(json.dumps(config_data, indent=4)) From 7ec20ecf46f140b5bae1f598a541db8a9249e5ae Mon Sep 17 00:00:00 2001 From: Christopher Patton Date: Fri, 15 May 2020 15:36:01 -0700 Subject: [PATCH 25/58] Allow bind address override (#53) --- whoogle-search | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/whoogle-search b/whoogle-search index f87c61a..72f272d 100755 --- a/whoogle-search +++ b/whoogle-search @@ -3,23 +3,22 @@ # ./whoogle-search # Runs the full web app # ./whoogle-search test # Runs the testing suite -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd -P)" +set -euo pipefail -# Set default port if unavailable -if [[ -z "${PORT}" ]]; then - PORT="${EXPOSE_PORT:-5000}" -fi +SCRIPT_DIR="$(builtin cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" # Set directory to serve static content from -[[ ! -z $1 ]] && SUBDIR="$1" || SUBDIR="app" -export APP_ROOT=$SCRIPT_DIR/$SUBDIR -export STATIC_FOLDER=$APP_ROOT/static +SUBDIR="${1:-app}" +export APP_ROOT="$SCRIPT_DIR/$SUBDIR" +export STATIC_FOLDER="$APP_ROOT/static" -mkdir -p $STATIC_FOLDER +mkdir -p "$STATIC_FOLDER" # Check for regular vs test run -if [[ $SUBDIR == "test" ]]; then +if [[ "$SUBDIR" == "test" ]]; then pytest -sv else - python3 -um app --host 0.0.0.0 --port $PORT + python3 -um app \ + --host "${ADDRESS:-0.0.0.0}" \ + --port "${PORT:-"${EXPOSE_PORT:-5000}"}" fi From 11fa44eec13ee11155db67933466dde4901ac588 Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Fri, 15 May 2020 16:47:39 -0600 Subject: [PATCH 26/58] Minor refactoring for clarification Executable renamed to "run" to avoid confusion with pip installed script Updated heroku deploy button to use the heroku-app branch, which by default enforces HTTPS Added instructions for enforcing HTTPS on various deployment options, with note about how this isn't a required task. Updated setup.py description to use improved app description --- .travis.yml | 2 +- Dockerfile | 2 +- README.md | 12 ++++++++++-- whoogle-search => run | 4 ++-- setup.py | 2 +- 5 files changed, 15 insertions(+), 7 deletions(-) rename whoogle-search => run (83%) diff --git a/.travis.yml b/.travis.yml index 8695c53..3775cce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,4 +5,4 @@ before_install: install: - pip install -r requirements.txt script: - - ./whoogle-search test + - ./run test diff --git a/Dockerfile b/Dockerfile index 18551cc..30ab242 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,4 +20,4 @@ COPY . . EXPOSE $EXPOSE_PORT -CMD ["./whoogle-search"] +CMD ["./run"] diff --git a/README.md b/README.md index 67219fd..d63450f 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ If using Heroku Quick Deploy, **you can skip this section**. There are a few different ways to begin using the app, depending on your preferences: ### A) [Heroku Quick Deploy](https://heroku.com/about) -[![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/benbusby/whoogle-search) +[![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/benbusby/whoogle-search/tree/heroku-app) *Note: Requires a (free) Heroku account* @@ -93,7 +93,7 @@ cd whoogle-search python3 -m venv venv source venv/bin/activate pip install -r requirements.txt -./whoogle-search +./run ``` ### E) Manual (Docker) @@ -200,6 +200,14 @@ For instance, adding `*/20 7-23 * * * curl https://.heroku Since the instance is destroyed and rebuilt after inactivity, config settings will be reset once the app enters downtime. If you have configuration settings active that you'd like to keep between periods of downtime (like dark mode for example), you could instead add `*/20 7-23 * * * curl -d "dark=1" -X POST https://.herokuapp.com/config > /home//whoogle-refresh` to keep these settings more or less permanent, and still keep the app from entering downtime when you're using it. +### HTTPS Enforcement +Only needed if your setup requires Flask to redirect to HTTPS on its own -- generally this is something that doesn't need to be handled by Whoogle Search. + +- Heroku: Enforced by default +- Docker: Add `--build-arg use_https=1` to your run command +- Pip/Pipx: Add the `--https-only` flag to the end of the `whoogle-search` command +- Default `run` script: Modify the script locally to include the `--https-only` flag at the end of the python run command + Available config values are `near`, `nojs`, `dark` and `url`. ## FAQ **What's the difference between this and [Searx](https://github.com/asciimoo/searx)?** diff --git a/whoogle-search b/run similarity index 83% rename from whoogle-search rename to run index 72f272d..b2eedbd 100755 --- a/whoogle-search +++ b/run @@ -1,7 +1,7 @@ #!/bin/bash # Usage: -# ./whoogle-search # Runs the full web app -# ./whoogle-search test # Runs the testing suite +# ./run # Runs the full web app +# ./run test # Runs the testing suite set -euo pipefail diff --git a/setup.py b/setup.py index 29d6290..1723f6b 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ setuptools.setup( scripts=['whoogle-search'], include_package_data=True, install_requires=requirements, - description='Self-hosted, ad-free, privacy-respecting alternative to Google search', + description='Self-hosted, ad-free, privacy-respecting Google metasearch engine', long_description=long_description, long_description_content_type='text/markdown', url='https://github.com/benbusby/whoogle-search', From 46b6c66312ced2e3867ea0c70a998dd8dd580800 Mon Sep 17 00:00:00 2001 From: thomcatdotrocks <37344783+thomcatdotrocks@users.noreply.github.com> Date: Fri, 15 May 2020 18:04:01 -0500 Subject: [PATCH 27/58] Added instructions for running with systemd (#45) Co-authored-by: ThomCat --- README.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/README.md b/README.md index d63450f..b64df24 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,33 @@ pip install -r requirements.txt ./run ``` +#### systemd Configuration +After building the virtual environment, add the following to `/lib/systemd/system/whoogle.service`: + +``` +[Unit] +Description=Whoogle + +[Service] +Type=simple +User=root +WorkingDirectory= +ExecStart=/venv/bin/python3 -um app --host 0.0.0.0 --port 5000 +ExecReload=/bin/kill -HUP $MAINPID +Restart=always +RestartSec=3 +SyslogIdentifier=whoogle + +[Install] +WantedBy=multi-user.target +``` +Then, +``` +sudo systemctl daemon-reload +sudo systemctl enable whoogle +sudo systemctl start whoogle +``` + ### E) Manual (Docker) 1. Ensure the Docker daemon is running, and is accessible by your user account - To add user permissions, you can execute `sudo usermod -aG docker yourusername` From ea37044d67ee185f72dc60ca9a531e5b97f52543 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 15 May 2020 17:05:46 -0600 Subject: [PATCH 28/58] Minor change to systemd setup I thought this was addressed during the pull request, but I guess not? --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b64df24..88d38a1 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ pip install -r requirements.txt ``` #### systemd Configuration -After building the virtual environment, add the following to `/lib/systemd/system/whoogle.service`: +After building the virtual environment, you can add the following to `/lib/systemd/system/whoogle.service` to set up a Whoogle Search systemd service: ``` [Unit] From feae52ac1dabd98e26d3f6a50bb11c14aea05600 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 15 May 2020 18:25:08 -0600 Subject: [PATCH 29/58] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 13 +++++++------ .github/ISSUE_TEMPLATE/feature_request.md | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 5cfddc1..1678127 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,7 +1,7 @@ --- name: Bug report about: Create a bug report to help fix an issue with Whoogle -title: "[BUG] " +title: "[BUG] " labels: bug assignees: '' @@ -17,11 +17,12 @@ Steps to reproduce the behavior: 3. Scroll down to '....' 4. See error -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots to help explain your problem. +**Deployment Method** +- [ ] Heroku (one-click deploy) +- [ ] Docker +- [ ] `run` executable +- [ ] pip/pipx +- [ ] Other: **Desktop (please complete the following information):** - OS: [e.g. iOS] diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index d89c925..24bf2f6 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,7 +1,7 @@ --- name: Feature request about: Suggest a feature that would improve Whoogle -title: '' +title: "[FEATURE] " labels: enhancement assignees: '' From c341848a74ff3f82b6010e381b675efabe3dbb22 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 15 May 2020 18:29:21 -0600 Subject: [PATCH 30/58] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 1678127..a174b78 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -22,7 +22,13 @@ Steps to reproduce the behavior: - [ ] Docker - [ ] `run` executable - [ ] pip/pipx -- [ ] Other: +- [ ] Other: [describe setup] + +**Version of Whoogle Search** +- [ ] Latest build from [source] (i.e. GitHub, Docker Hub, pip, etc) +- [ ] Version [version number] +- [ ] Not sure + **Desktop (please complete the following information):** - OS: [e.g. iOS] From 601923e07449befa12c7e527213062370817ae3c Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sat, 16 May 2020 09:11:00 -0600 Subject: [PATCH 31/58] Update Dockerfile Fixed mkdir call to not error if config dir already exists --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 30ab242..fd8c746 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt ARG config_dir=/config -RUN mkdir $config_dir +RUN mkdir -p $config_dir VOLUME $config_dir ENV CONFIG_VOLUME=$config_dir From a4382d59f67a3f354c0bbb3fdffaf7792aba4f3f Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sat, 16 May 2020 09:31:07 -0600 Subject: [PATCH 32/58] Updated redirect code used in https redirects See https://developer.mozilla.org/en-US/docs/Web/HTTP/Redirections 301 redirections do not keep the request method intact, and can occasionally be changed from POST to GET 308 redirections always keep the request method, which is necessary for all POST search requests --- app/routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/routes.py b/app/routes.py index 747b847..a6016b8 100644 --- a/app/routes.py +++ b/app/routes.py @@ -25,7 +25,7 @@ def before_request_func(): if https_only and request.url.startswith('http://'): url = request.url.replace('http://', 'https://', 1) - code = 301 + code = 308 return redirect(url, code=code) json_config = json.load(open(CONFIG_PATH)) if os.path.exists(CONFIG_PATH) else {'url': request.url_root} From 56bf976ecd4fdac4d8e275736c98a89d621956af Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Mon, 18 May 2020 10:17:21 -0600 Subject: [PATCH 33/58] Added question template --- .github/ISSUE_TEMPLATE/question.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/question.md diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md new file mode 100644 index 0000000..a1d9b21 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,10 @@ +--- +name: Question +about: Ask a (simple) question about Whoogle +title: "[QUESTION] " +labels: question +assignees: '' + +--- + +Type out your question here. Please make sure that this is a topic that isn't already covered in the README. From 0e39b8f97b8be72990e66b8db197045343c0db0d Mon Sep 17 00:00:00 2001 From: Paul Rothrock Date: Mon, 18 May 2020 12:28:23 -0400 Subject: [PATCH 34/58] Added "I'm feeling lucky" function (#46) * Putting '! ' at the beginning of the query now redirects to the first search result Signed-off-by: Paul Rothrock * Moved get_first_url outside of filter class Signed-off-by: Paul Rothrock --- app/filter.py | 58 ++++++++++++++++++++++++++++----------------- app/routes.py | 19 ++++++++++++--- test/test_routes.py | 4 ++++ 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/app/filter.py b/app/filter.py index 0cda5d3..e2e8168 100644 --- a/app/filter.py +++ b/app/filter.py @@ -13,6 +13,40 @@ BLANK_B64 = '''  ''' +def get_first_link(soup): + # Replace hrefs with only the intended destination (no "utm" type tags) + for a in soup.find_all('a', href=True): + href = a['href'].replace('https://www.google.com', '') + + result_link = urlparse.urlparse(href) + query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' + + # Return the first search result URL + if 'url?q=' in href: + return filter_link_args(href) + +def filter_link_args(query_link): + parsed_link = urlparse.urlparse(query_link) + link_args = parse_qs(parsed_link.query) + safe_args = {} + + if len(link_args) == 0 and len(parsed_link) > 0: + return query_link + + for arg in link_args.keys(): + if arg in SKIP_ARGS: + continue + + safe_args[arg] = link_args[arg] + + # Remove original link query and replace with filtered args + query_link = query_link.replace(parsed_link.query, '') + if len(safe_args) > 0: + query_link = query_link + urlparse.urlencode(safe_args, doseq=True) + else: + query_link = query_link.replace('?', '') + + return query_link class Filter: def __init__(self, mobile=False, config=None, secret_key=''): @@ -149,27 +183,7 @@ class Filter: a['href'] = new_search elif 'url?q=' in href: # Strip unneeded arguments - parsed_link = urlparse.urlparse(query_link) - link_args = parse_qs(parsed_link.query) - safe_args = {} - - if len(link_args) == 0 and len(parsed_link) > 0: - a['href'] = query_link - continue - - for arg in link_args.keys(): - if arg in SKIP_ARGS: - continue - - safe_args[arg] = link_args[arg] - - # Remove original link query and replace with filtered args - query_link = query_link.replace(parsed_link.query, '') - if len(safe_args) > 0: - query_link = query_link + urlparse.urlencode(safe_args, doseq=True) - else: - query_link = query_link.replace('?', '') - + query_link = filter_link_args(query_link) a['href'] = query_link # Add no-js option @@ -185,4 +199,4 @@ def gen_nojs(soup, link, sibling): nojs_link['style'] = 'display:block;width:100%;' nojs_link.string = 'NoJS Link: ' + nojs_link['href'] sibling.append(BeautifulSoup('


', 'html.parser')) - sibling.append(nojs_link) + sibling.append(nojs_link) \ No newline at end of file diff --git a/app/routes.py b/app/routes.py index a6016b8..81791a0 100644 --- a/app/routes.py +++ b/app/routes.py @@ -1,5 +1,5 @@ from app import app -from app.filter import Filter +from app.filter import Filter, get_first_link from app.models.config import Config from app.request import Request, gen_query import argparse @@ -72,7 +72,7 @@ def opensearch(): def search(): request_params = request.args if request.method == 'GET' else request.form q = request_params.get('q') - + if q is None or len(q) == 0: return redirect('/') else: @@ -82,6 +82,11 @@ def search(): except InvalidToken: pass + feeling_lucky = q.startswith('! ') + + if feeling_lucky: # Well do you, punk? + q = q[2:] + user_agent = request.headers.get('User-Agent') mobile = 'Android' in user_agent or 'iPhone' in user_agent @@ -90,7 +95,15 @@ def search(): get_body = g.user_request.send(query=full_query) results = content_filter.reskin(get_body) - formatted_results = content_filter.clean(BeautifulSoup(results, 'html.parser')) + dirty_soup = BeautifulSoup(results, 'html.parser') + + if feeling_lucky: + redirect_url = get_first_link(dirty_soup) + return redirect(redirect_url, 303) # Using 303 so the browser performs a GET request for the URL + else: + formatted_results = content_filter.clean(dirty_soup) + + return render_template('display.html', query=urlparse.unquote(q), response=formatted_results) diff --git a/test/test_routes.py b/test/test_routes.py index 1124f5f..91e17be 100644 --- a/test/test_routes.py +++ b/test/test_routes.py @@ -17,6 +17,10 @@ def test_search(client): rv = client.get('/search?q=test') assert rv._status_code == 200 +def test_feeling_lucky(client): + rv = client.get('/search?q=!%20test') + assert rv._status_code == 303 + def test_config(client): rv = client.post('/config', data=demo_config) From 38b7b19e2adb66f50e665e04da472ed93b032ff9 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Mon, 18 May 2020 10:30:32 -0600 Subject: [PATCH 35/58] Added basic authentication (#51) Username/password can be set either as Dockerfile build arguments or passed into the run script as "--userpass " --- Dockerfile | 5 +++++ app/routes.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/Dockerfile b/Dockerfile index fd8c746..61f77b2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,6 +10,11 @@ RUN mkdir -p $config_dir VOLUME $config_dir ENV CONFIG_VOLUME=$config_dir +ARG username='' +ENV WHOOGLE_USER=$username +ARG password='' +ENV WHOOGLE_PASS=$password + ARG use_https='' ENV HTTPS_ONLY=$use_https diff --git a/app/routes.py b/app/routes.py index 81791a0..7931a14 100644 --- a/app/routes.py +++ b/app/routes.py @@ -6,6 +6,7 @@ import argparse from bs4 import BeautifulSoup from cryptography.fernet import Fernet, InvalidToken from flask import g, make_response, request, redirect, render_template, send_file +from functools import wraps import io import json import os @@ -18,6 +19,21 @@ app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config CONFIG_PATH = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json' +def auth_required(f): + @wraps(f) + def decorated(*args, **kwargs): + auth = request.authorization + + # Skip if username/password not set + whoogle_user = os.getenv('WHOOGLE_USER', '') + whoogle_pass = os.getenv('WHOOGLE_PASS', '') + if (not whoogle_user or not whoogle_pass) or (auth and whoogle_user == auth.username and whoogle_pass == auth.password): + return f(*args, **kwargs) + else: + return make_response('Not logged in', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'}) + return decorated + + @app.before_request def before_request_func(): # Always redirect to https if HTTPS_ONLY is set (otherwise default to false) @@ -44,6 +60,7 @@ def unknown_page(e): @app.route('/', methods=['GET']) +@auth_required def index(): bg = '#000' if g.user_config.dark else '#fff' return render_template('index.html', @@ -55,6 +72,7 @@ def index(): @app.route('/opensearch.xml', methods=['GET']) +@auth_required def opensearch(): opensearch_url = g.app_location if opensearch_url.endswith('/'): @@ -69,6 +87,7 @@ def opensearch(): @app.route('/search', methods=['GET', 'POST']) +@auth_required def search(): request_params = request.args if request.method == 'GET' else request.form q = request_params.get('q') @@ -109,6 +128,7 @@ def search(): @app.route('/config', methods=['GET', 'POST']) +@auth_required def config(): if request.method == 'GET': return json.dumps(g.user_config.__dict__) @@ -125,6 +145,7 @@ def config(): @app.route('/url', methods=['GET']) +@auth_required def url(): if 'url' in request.args: return redirect(request.args.get('url')) @@ -137,11 +158,13 @@ def url(): @app.route('/imgres') +@auth_required def imgres(): return redirect(request.args.get('imgurl')) @app.route('/tmp') +@auth_required def tmp(): cipher_suite = Fernet(app.secret_key) img_url = cipher_suite.decrypt(request.args.get('image_url').encode()).decode() @@ -159,6 +182,7 @@ def tmp(): @app.route('/window') +@auth_required def window(): get_body = g.user_request.send(base_url=request.args.get('location')) get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"') @@ -185,7 +209,15 @@ def run_app(): help='Activates debug mode for the server (default False)') parser.add_argument('--https-only', default=False, action='store_true', help='Enforces HTTPS redirects for all requests') + parser.add_argument('--userpass', default='', metavar='', + help='Sets a username/password basic auth combo (default None)') args = parser.parse_args() + + if args.userpass: + user_pass = args.userpass.split(':') + os.environ['WHOOGLE_USER'] = user_pass[0] + os.environ['WHOOGLE_PASS'] = user_pass[1] + os.environ['HTTPS_ONLY'] = '1' if args.https_only else '' if args.debug: From c51f1864194abc928135caaacccee0861bad57e4 Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Wed, 20 May 2020 11:02:30 -0600 Subject: [PATCH 36/58] Added version footer, minor PEP 8 refactoring --- app/__init__.py | 4 ++++ app/filter.py | 21 +++++++++++---------- app/routes.py | 31 ++++++++++++------------------- app/static/css/main.css | 12 ++++++++++++ app/templates/index.html | 4 +++- 5 files changed, 42 insertions(+), 30 deletions(-) diff --git a/app/__init__.py b/app/__init__.py index 82b63a3..3e2309c 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -4,5 +4,9 @@ import os app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static') app.secret_key = Fernet.generate_key() +app.config['VERSION_NUMBER'] = '0.1.1' +app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) +app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) +app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json' from app import routes diff --git a/app/filter.py b/app/filter.py index e2e8168..5e5ec01 100644 --- a/app/filter.py +++ b/app/filter.py @@ -13,6 +13,7 @@ BLANK_B64 = '''  ''' + def get_first_link(soup): # Replace hrefs with only the intended destination (no "utm" type tags) for a in soup.find_all('a', href=True): @@ -25,6 +26,7 @@ def get_first_link(soup): if 'url?q=' in href: return filter_link_args(href) + def filter_link_args(query_link): parsed_link = urlparse.urlparse(query_link) link_args = parse_qs(parsed_link.query) @@ -48,6 +50,7 @@ def filter_link_args(query_link): return query_link + class Filter: def __init__(self, mobile=False, config=None, secret_key=''): if config is None: @@ -109,14 +112,13 @@ class Filter: img_src = img['src'] if img_src.startswith('//'): img_src = 'https:' + img_src + elif img_src.startswith(LOGO_URL): + # Re-brand with Whoogle logo + img['src'] = '/static/img/logo.png' + img['style'] = 'height:40px;width:162px' + continue elif img_src.startswith(GOOG_IMG): - # Special rebranding for image search results - if img_src.startswith(LOGO_URL): - img['src'] = '/static/img/logo.png' - img['style'] = 'height:40px;width:162px' - else: - img['src'] = BLANK_B64 - + img['src'] = BLANK_B64 continue enc_src = Fernet(self.secret_key).encrypt(img_src.encode()) @@ -183,12 +185,11 @@ class Filter: a['href'] = new_search elif 'url?q=' in href: # Strip unneeded arguments - query_link = filter_link_args(query_link) - a['href'] = query_link + a['href'] = filter_link_args(query_link) # Add no-js option if self.nojs: - gen_nojs(soup, query_link, a) + gen_nojs(soup, a['href'], a) else: a['href'] = href diff --git a/app/routes.py b/app/routes.py index 7931a14..a667bb8 100644 --- a/app/routes.py +++ b/app/routes.py @@ -13,11 +13,6 @@ import os import urllib.parse as urlparse import waitress -app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) -app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) - -CONFIG_PATH = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json' - def auth_required(f): @wraps(f) @@ -27,7 +22,8 @@ def auth_required(f): # Skip if username/password not set whoogle_user = os.getenv('WHOOGLE_USER', '') whoogle_pass = os.getenv('WHOOGLE_PASS', '') - if (not whoogle_user or not whoogle_pass) or (auth and whoogle_user == auth.username and whoogle_pass == auth.password): + if (not whoogle_user or not whoogle_pass) or \ + (auth and whoogle_user == auth.username and whoogle_pass == auth.password): return f(*args, **kwargs) else: return make_response('Not logged in', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'}) @@ -38,13 +34,14 @@ def auth_required(f): def before_request_func(): # Always redirect to https if HTTPS_ONLY is set (otherwise default to false) https_only = os.getenv('HTTPS_ONLY', False) + config_path = app.config['CONFIG_PATH'] if https_only and request.url.startswith('http://'): - url = request.url.replace('http://', 'https://', 1) + https_url = request.url.replace('http://', 'https://', 1) code = 308 - return redirect(url, code=code) + return redirect(https_url, code=code) - json_config = json.load(open(CONFIG_PATH)) if os.path.exists(CONFIG_PATH) else {'url': request.url_root} + json_config = json.load(open(config_path)) if os.path.exists(config_path) else {'url': request.url_root} g.user_config = Config(**json_config) if not g.user_config.url: @@ -68,6 +65,7 @@ def index(): ua=g.user_request.modified_user_agent, languages=Config.LANGUAGES, current_lang=g.user_config.lang, + version_number=app.config['VERSION_NUMBER'], request_type='get' if g.user_config.get_only else 'post') @@ -91,7 +89,7 @@ def opensearch(): def search(): request_params = request.args if request.method == 'GET' else request.form q = request_params.get('q') - + if q is None or len(q) == 0: return redirect('/') else: @@ -103,7 +101,7 @@ def search(): feeling_lucky = q.startswith('! ') - if feeling_lucky: # Well do you, punk? + if feeling_lucky: # Well do you, punk? q = q[2:] user_agent = request.headers.get('User-Agent') @@ -112,18 +110,13 @@ def search(): content_filter = Filter(mobile, g.user_config, secret_key=app.secret_key) full_query = gen_query(q, request_params, content_filter.near, language=g.user_config.lang) get_body = g.user_request.send(query=full_query) - - results = content_filter.reskin(get_body) - dirty_soup = BeautifulSoup(results, 'html.parser') + dirty_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser') if feeling_lucky: - redirect_url = get_first_link(dirty_soup) - return redirect(redirect_url, 303) # Using 303 so the browser performs a GET request for the URL + return redirect(get_first_link(dirty_soup), 303) # Using 303 so the browser performs a GET request for the URL else: formatted_results = content_filter.clean(dirty_soup) - - return render_template('display.html', query=urlparse.unquote(q), response=formatted_results) @@ -137,7 +130,7 @@ def config(): if 'url' not in config_data or not config_data['url']: config_data['url'] = g.user_config.url - with open(CONFIG_PATH, 'w') as config_file: + with open(app.config['CONFIG_PATH'], 'w') as config_file: config_file.write(json.dumps(config_data, indent=4)) config_file.close() diff --git a/app/static/css/main.css b/app/static/css/main.css index f482373..ef4b557 100644 --- a/app/static/css/main.css +++ b/app/static/css/main.css @@ -1,3 +1,7 @@ +body { + font-family: Avenir, Helvetica, Arial, sans-serif; +} + .logo { width: 80%; display: block; @@ -117,3 +121,11 @@ button::-moz-focus-inner { .hidden { display: none; } + +footer { + position: fixed; + bottom: 0%; + text-align: center; + width: 100%; + z-index: -1; +} diff --git a/app/templates/index.html b/app/templates/index.html index 33f2153..4747dba 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -85,6 +85,8 @@
+ - From b15368ac283114247d1264de295e7469e068a47d Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Wed, 20 May 2020 11:07:01 -0600 Subject: [PATCH 37/58] Updated recent results test w/ +5 day tolerance --- test/test_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_results.py b/test/test_results.py index 6340f32..abf3dcd 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -62,6 +62,6 @@ def test_recent_results(client): try: date = parse(date_span) - assert (current_date - date).days <= num_days + assert (current_date - date).days <= (num_days + 5) # Date can have a little bit of wiggle room except ParserError: assert ' ago' in date_span From d2b60544c5edd30052d000dcfb1ba2d458e886d6 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Wed, 20 May 2020 11:11:07 -0600 Subject: [PATCH 38/58] Update README.md Added instructions for setting default search engine while using a reverse proxy --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 88d38a1..2e09dfa 100644 --- a/README.md +++ b/README.md @@ -185,6 +185,8 @@ To filter by a range of time, append ":past