From 6600d8580c54de50730539655cff5792f9cd0697 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sat, 23 Jan 2021 17:43:53 -0500 Subject: [PATCH 01/45] Add ability to redirect reddit.com to libredd.it (#180) * Adds the ability to redirect reddit.com to libredd.it using the existing "site alts" config setting. This adds the WHOOGLE_ALT_RD environment variable for optionally redirecting reddit links to libreddit (https://github.com/spikecodes/libreddit). * Include libreddit in home page site alt note --- Dockerfile | 2 ++ README.md | 1 + app.json | 11 ++++++++--- app/templates/index.html | 4 ++-- app/utils/filter_utils.py | 9 +++++++-- docker-compose.yml | 1 + 6 files changed, 21 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7a3f4b6..a62bcee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,6 +45,8 @@ ARG youtube_alt='invidious.snopyta.org' ENV WHOOGLE_ALT_YT=$youtube_alt ARG instagram_alt='bibliogram.art/u' ENV WHOOGLE_ALT_YT=$instagram_alt +ARG reddit_alt='libredd.it' +ENV WHOOGLE_ALT_RD=$reddit_alt COPY . . diff --git a/README.md b/README.md index e5c084a..b989c8e 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,7 @@ Description=Whoogle #Environment=WHOOGLE_ALT_TW=nitter.net #Environment=WHOOGLE_ALT_YT=invidious.snopyta.org #Environment=WHOOGLE_ALT_IG=bibliogram.art/u +#Environment=WHOOGLE_ALT_RD=libredd.it Type=simple User=root WorkingDirectory= diff --git a/app.json b/app.json index 2e70f01..2a54632 100644 --- a/app.json +++ b/app.json @@ -47,17 +47,22 @@ }, "WHOOGLE_ALT_TW": { "description": "The site to use as a replacement for twitter.com when site alternatives are enabled in the config.", - "value": "", + "value": "nitter.net", "required": false }, "WHOOGLE_ALT_YT": { "description": "The site to use as a replacement for youtube.com when site alternatives are enabled in the config.", - "value": "", + "value": "invidious.snopyta.org", "required": false }, "WHOOGLE_ALT_IG": { "description": "The site to use as a replacement for instagram.com when site alternatives are enabled in the config.", - "value": "", + "value": "bibliogram.art/u", + "required": false + }, + "WHOOGLE_ALT_RD": { + "description": "The site to use as a replacement for reddit.com when site alternatives are enabled in the config.", + "value": "libredd.it", "required": false } } diff --git a/app/templates/index.html b/app/templates/index.html index 766fab1..af00e75 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -116,8 +116,8 @@
-
— Replaces Twitter/YouTube/Instagram links - with Nitter/Invidious/Bibliogram links.
+
— Replaces Twitter/YouTube/Instagram/Reddit links + with Nitter/Invidious/Bibliogram/Libreddit links.
diff --git a/app/utils/filter_utils.py b/app/utils/filter_utils.py index d1a2604..c32f6d1 100644 --- a/app/utils/filter_utils.py +++ b/app/utils/filter_utils.py @@ -4,6 +4,7 @@ import urllib.parse as urlparse from urllib.parse import parse_qs SKIP_ARGS = ['ref_src', 'utm'] +SKIP_PREFIX = ['//www.', '//mobile.', '//m.'] FULL_RES_IMG = '
Full Image' GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' LOGO_URL = GOOG_IMG + '_desk' @@ -22,7 +23,8 @@ BLACKLIST = [ SITE_ALTS = { 'twitter.com': os.getenv('WHOOGLE_ALT_TW', 'nitter.net'), 'youtube.com': os.getenv('WHOOGLE_ALT_YT', 'invidious.snopyta.org'), - 'instagram.com': os.getenv('WHOOGLE_ALT_IG', 'bibliogram.art/u') + 'instagram.com': os.getenv('WHOOGLE_ALT_IG', 'bibliogram.art/u'), + 'reddit.com': os.getenv('WHOOGLE_ALT_RD', 'libredd.it') } @@ -47,7 +49,10 @@ def get_site_alt(link: str): link = link.replace(site_key, SITE_ALTS[site_key]) break - return link.replace('www.', '').replace('//m.', '//') + for prefix in SKIP_PREFIX: + link = link.replace(prefix, '//') + + return link def filter_link_args(query_link): diff --git a/docker-compose.yml b/docker-compose.yml index 75ad171..33a65b2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,6 +19,7 @@ services: #- WHOOGLE_ALT_TW=nitter.net #- WHOOGLE_ALT_YT=invidious.snopyta.org #- WHOOGLE_ALT_IG=bibliogram.art/u + #- WHOOGLE_ALT_RD=libredd.it ports: - 5000:5000 restart: unless-stopped From 4f4e7ef03516f3a2aaa7155f4fa0b8e0f9c3fbd9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Feb 2021 10:28:34 -0500 Subject: [PATCH 02/45] Bump cryptography from 3.2 to 3.3.2 (#193) Bumps [cryptography](https://github.com/pyca/cryptography) from 3.2 to 3.3.2. - [Release notes](https://github.com/pyca/cryptography/releases) - [Changelog](https://github.com/pyca/cryptography/blob/master/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/3.2...3.3.2) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c015b64..04ff631 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ certifi==2020.4.5.1 cffi==1.13.2 chardet==3.0.4 Click==7.0 -cryptography==3.2 +cryptography==3.3.2 Flask==1.1.1 Flask-Session==0.3.2 idna==2.9 From 3ed0cf02bf0221c90161b9b4739a3f1c05fc27b5 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 12 Feb 2021 10:37:39 -0500 Subject: [PATCH 03/45] List public instances in readme Note: future public instances should be added with a PR --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b989c8e..f8a9954 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,8 @@ Contents 5. [Usage](#usage) 6. [Extra Steps](#extra-steps) 7. [FAQ](#faq) -8. [Screenshots](#screenshots) +8. [Public Instances](#public-instances) +9. [Screenshots](#screenshots) ## Features - No ads or sponsored content @@ -330,6 +331,14 @@ I'm a huge fan of Searx though and encourage anyone to use that instead if they A lot of the app currently piggybacks on Google's existing support for fetching results pages with Javascript disabled. To their credit, they've done an excellent job with styling pages, but it seems that the image results page - particularly on mobile - is a little rough. Moving forward, with enough interest, I'd like to transition to fetching the results and parsing them into a unique Whoogle-fied interface that I can style myself. +## Public Instances + +*Note: Use public instances at your own discretion. Maintainers of Whoogle do not personally validate the integrity of these instances, and popular public instances are more likely to be rate-limited or blocked.* + +- https://whoogle.sdf.org +- https://whoogle.tormentasolar.win/ +- https://whoogle.himiko.cloud + ## Screenshots #### Desktop ![Whoogle Desktop](docs/screenshot_desktop.jpg) From 5538ac862ec4dc613adc571b9df3d7abc2565dee Mon Sep 17 00:00:00 2001 From: Tomasz Borychowski Date: Sun, 14 Feb 2021 15:50:53 +0000 Subject: [PATCH 04/45] add basic keyboard support --- app/static/css/header.css | 6 ++++++ app/static/js/keyboard.js | 43 ++++++++++++++++++++++++++++++++++++++ app/templates/display.html | 5 +++-- 3 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 app/static/js/keyboard.js diff --git a/app/static/css/header.css b/app/static/css/header.css index 3b24fdc..a7c8461 100644 --- a/app/static/css/header.css +++ b/app/static/css/header.css @@ -60,3 +60,9 @@ header { margin: 15px; display: block; } + + +#main>div:focus-within { + border-radius: 8px; + box-shadow: 0 0 6px 1px #2375e8; +} diff --git a/app/static/js/keyboard.js b/app/static/js/keyboard.js new file mode 100644 index 0000000..98a4a17 --- /dev/null +++ b/app/static/js/keyboard.js @@ -0,0 +1,43 @@ +(function () { + let searchBar, results; + const keymap = { + ArrowUp: goUp, + ArrowDown: goDown, + k: goUp, + j: goDown, + '/': focusSearch, + }; + let activeIdx = -1; + + document.addEventListener('DOMContentLoaded', () => { + searchBar = document.querySelector('#search-bar'); + results = document.querySelectorAll('#main>div>div>div>a'); + }); + + document.addEventListener('keydown', (e) => { + if (typeof keymap[e.key] === 'function') { + e.preventDefault(); + keymap[e.key](); + } + }); + + function goUp () { + if (activeIdx > 0) focusResult(activeIdx - 1); + else focusSearch(); + } + + function goDown () { + if (activeIdx < results.length - 1) focusResult(activeIdx + 1); + } + + function focusResult (idx) { + activeIdx = idx; + results[activeIdx].scrollIntoView({ behavior: 'smooth', block: 'center', inline: 'nearest' }); + results[activeIdx].focus(); + } + + function focusSearch () { + activeIdx = -1; + searchBar.focus(); + } +}()); diff --git a/app/templates/display.html b/app/templates/display.html index f1dc572..30eba0a 100644 --- a/app/templates/display.html +++ b/app/templates/display.html @@ -5,8 +5,6 @@ - - {% if dark_mode %} @@ -24,4 +22,7 @@ View on GitHub

+ + + From 7f3a284e04b76a8668559b2cfb526141e0a25e6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20=C5=A0tefko?= Date: Fri, 19 Feb 2021 18:30:05 +0100 Subject: [PATCH 05/45] Do not autocapitalize on index page search bar (#200) --- app/templates/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/templates/index.html b/app/templates/index.html index af00e75..a308e0d 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -47,7 +47,7 @@
- +
From 03bd4b68710863b0eadcbee0f5eb2c5a7d1a8c4d Mon Sep 17 00:00:00 2001 From: Tomasz Borychowski Date: Fri, 19 Feb 2021 22:49:32 +0000 Subject: [PATCH 06/45] fix 'j' and 'k' inside search input --- app/static/js/keyboard.js | 1 + 1 file changed, 1 insertion(+) diff --git a/app/static/js/keyboard.js b/app/static/js/keyboard.js index 98a4a17..7fd05ef 100644 --- a/app/static/js/keyboard.js +++ b/app/static/js/keyboard.js @@ -15,6 +15,7 @@ }); document.addEventListener('keydown', (e) => { + if (e.target.tagName === 'INPUT') return true; if (typeof keymap[e.key] === 'function') { e.preventDefault(); keymap[e.key](); From 64567a63ead86a8f231407e5a3a8778fc56c1bdd Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sat, 20 Feb 2021 15:04:32 -0500 Subject: [PATCH 07/45] Ensure G logo doesn't appear in mobile img results Adds a separate check to remove all images sourced from www.gstatic.com, which is where the mobile logo in particular is coming from. --- app/filter.py | 20 ++++++++------------ app/utils/filter_utils.py | 2 +- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/app/filter.py b/app/filter.py index ccd6af3..4c953d5 100644 --- a/app/filter.py +++ b/app/filter.py @@ -127,28 +127,24 @@ class Filter: question['style'] = 'padding: 10px; font-style: italic;' def update_element_src(self, element, mime): - element_src = element['src'] - if element_src.startswith('//'): - element_src = 'https:' + element_src - elif element_src.startswith(LOGO_URL): + src = element['src'] + + if src.startswith('//'): + src = 'https:' + src + + if src.startswith(LOGO_URL): # Re-brand with Whoogle logo element['src'] = 'static/img/logo.png' element['style'] = 'height:40px;width:162px' return - elif element_src.startswith(GOOG_IMG): + elif src.startswith(GOOG_IMG) or GOOG_STATIC in src: element['src'] = BLANK_B64 return element['src'] = 'element?url=' + self.encrypt_path( - element_src, + src, is_element=True) + '&type=' + urlparse.quote(mime) - # FIXME: Non-mobile image results link to website instead of image - # if not self.mobile: - # img.append( - # BeautifulSoup(FULL_RES_IMG.format(element_src), - # 'html.parser')) - def update_styling(self, soup): # Remove unnecessary button(s) for button in soup.find_all('button'): diff --git a/app/utils/filter_utils.py b/app/utils/filter_utils.py index c32f6d1..76b99ba 100644 --- a/app/utils/filter_utils.py +++ b/app/utils/filter_utils.py @@ -5,7 +5,7 @@ from urllib.parse import parse_qs SKIP_ARGS = ['ref_src', 'utm'] SKIP_PREFIX = ['//www.', '//mobile.', '//m.'] -FULL_RES_IMG = '
Full Image' +GOOG_STATIC = 'www.gstatic.com' GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' LOGO_URL = GOOG_IMG + '_desk' BLANK_B64 = ('data:image/png;base64,' From ecb7885a561dc323f919134bed86ee0f7a5100c1 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sat, 20 Feb 2021 15:31:15 -0500 Subject: [PATCH 08/45] Allow bang operator anywhere in query Bang operator can now be placed anywhere in the query, to allow for peak efficiency in stream of consciousness querying (i.e. `big !reddit chungus` will search reddit for big chungus`). Fixes #196 --- app/utils/routing_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/utils/routing_utils.py b/app/utils/routing_utils.py index 55a6253..4cbbb16 100644 --- a/app/utils/routing_utils.py +++ b/app/utils/routing_utils.py @@ -71,8 +71,9 @@ class RoutingUtils: return self.query def bang_operator(self, bangs_dict: dict) -> str: + split_query = self.query.split(' ') for operator in bangs_dict.keys(): - if self.query.split(' ')[0] != operator: + if operator not in split_query: continue return bangs_dict[operator]['url'].format( From 48c8e9d14b0e8d51d18c358daa872396bf4c79cd Mon Sep 17 00:00:00 2001 From: Basti Date: Fri, 26 Feb 2021 16:33:11 +0100 Subject: [PATCH 09/45] Update docker-compose security features (#208) Co-authored-by: Sebastian Forst --- docker-compose.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 33a65b2..3f48604 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,9 +1,26 @@ -version: "3" +# cant use mem_limit in a 3.x docker-compose file in non swarm mode +# see https://github.com/docker/compose/issues/4513 +version: "2.4" services: whoogle-search: image: benbusby/whoogle-search container_name: whoogle-search + restart: on-failure:5 + pids_limit: 50 + mem_limit: 256mb + memswap_limit: 256mb + # user debian-tor from tor package + user: '102' + security_opt: + - no-new-privileges + cap_drop: + - ALL + read_only: true + tmpfs: + - /config/:size=10M,uid=102,gid=102,mode=1700 + - /var/lib/tor/:size=10M,uid=102,gid=102,mode=1700 + - /run/tor/:size=1M,uid=102,gid=102,mode=1700 #environment: # Uncomment to configure environment variables # Basic auth configuration, uncomment to enable #- WHOOGLE_USER= From 36b350e1cd5ba7ce377ed8a8dcd1510c98342c9d Mon Sep 17 00:00:00 2001 From: Basti Date: Fri, 26 Feb 2021 16:49:40 +0100 Subject: [PATCH 10/45] Use multi-stage container build (#210) This only adds necessary packages and files from the repo to reduce the image size. Co-authored-by: pred2k --- Dockerfile | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index a62bcee..ec3095c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,23 @@ -FROM python:3.8-slim +FROM python:3.8-slim as builder -WORKDIR /usr/src/app RUN apt-get update && apt-get install -y \ build-essential \ - libcurl4-openssl-dev \ - libssl-dev \ libxml2-dev \ libxslt-dev \ - libffi-dev \ - tor + libffi-dev -COPY config/tor/torrc /etc/tor/torrc COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt + +RUN pip install --prefix /install --no-warn-script-location --no-cache-dir -r requirements.txt + + +FROM python:3.8-slim + +RUN apt-get update && apt-get install -y \ + libcurl4-openssl-dev \ + libssl-dev \ + tor \ + && rm -rf /var/lib/apt/lists/* ARG config_dir=/config RUN mkdir -p $config_dir @@ -48,7 +53,13 @@ ENV WHOOGLE_ALT_YT=$instagram_alt ARG reddit_alt='libredd.it' ENV WHOOGLE_ALT_RD=$reddit_alt -COPY . . +WORKDIR /whoogle + +COPY --from=builder /install /usr/local +COPY config/tor/torrc /etc/tor/torrc +COPY config/tor/start-tor.sh config/tor/start-tor.sh +COPY app/ app/ +COPY run . EXPOSE $EXPOSE_PORT From d1460168608bbdc2e8d31459b0ab225a1e8cef70 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 26 Feb 2021 11:42:42 -0500 Subject: [PATCH 11/45] Remove auth req for accessing opensearch Requiring authentication for accessing the opensearch template prevents the browser from accessing the file when adding as a default search engine. This removes the authentication requirement from the opensearch route, which should never provide any sensitive information anyways. --- app/routes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/routes.py b/app/routes.py index b175d77..f822b5e 100644 --- a/app/routes.py +++ b/app/routes.py @@ -136,7 +136,6 @@ def index(): @app.route('/opensearch.xml', methods=['GET']) -@auth_required def opensearch(): opensearch_url = g.app_location if opensearch_url.endswith('/'): From b7b6fb7c042c617a7f11f6ca4963bd8423eb2fc6 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 26 Feb 2021 12:04:06 -0500 Subject: [PATCH 12/45] Move ssl install to Dockerfile build stage --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index ec3095c..d901769 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,18 +4,17 @@ RUN apt-get update && apt-get install -y \ build-essential \ libxml2-dev \ libxslt-dev \ + libssl-dev \ libffi-dev COPY requirements.txt . RUN pip install --prefix /install --no-warn-script-location --no-cache-dir -r requirements.txt - FROM python:3.8-slim RUN apt-get update && apt-get install -y \ libcurl4-openssl-dev \ - libssl-dev \ tor \ && rm -rf /var/lib/apt/lists/* From dcb80ac25038ce810805c9e7436a9f2e43ba2a3a Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sun, 7 Mar 2021 14:04:05 -0500 Subject: [PATCH 13/45] Send CSP header in all responses Introduces a new content security policy header for responses to all requests to reduce the possibility of ip leaks to outside connections. By default blocks all inline scripts, and only allows content loaded from Whoogle. Refactors a few small inline scripting cases in the project to their own individual scripts. --- app/__init__.py | 7 +++++++ app/routes.py | 22 +++++++++++++++------- app/static/js/header.js | 11 +++++++++++ app/static/js/utils.js | 8 ++++---- app/templates/error.html | 2 +- app/templates/header.html | 12 +----------- app/templates/index.html | 6 ------ 7 files changed, 39 insertions(+), 29 deletions(-) create mode 100644 app/static/js/header.js diff --git a/app/__init__.py b/app/__init__.py index 13350a1..d5da4f5 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -40,6 +40,13 @@ app.config['BANG_PATH'] = os.getenv( app.config['BANG_FILE'] = os.path.join( app.config['BANG_PATH'], 'bangs.json') +app.config['CSP'] = 'default-src \'none\';' \ + 'img-src \'self\';' \ + 'style-src \'self\' \'unsafe-inline\';' \ + 'script-src \'self\';' \ + 'media-src \'self\';' \ + 'connect-src \'self\';' \ + 'form-action \'self\';' if not os.path.exists(app.config['CONFIG_PATH']): os.makedirs(app.config['CONFIG_PATH']) diff --git a/app/routes.py b/app/routes.py index f822b5e..acb38a6 100644 --- a/app/routes.py +++ b/app/routes.py @@ -87,7 +87,7 @@ def before_request_func(): @app.after_request -def after_request_func(response): +def after_request_func(resp): if app.user_elements[session['uuid']] <= 0 and '/element' in request.url: # Regenerate element key if all elements have been served to user session['fernet_keys'][ @@ -108,7 +108,11 @@ def after_request_func(response): for key in session_list: session.pop(key) - return response + resp.headers['Content-Security-Policy'] = app.config['CSP'] + if os.environ.get('HTTPS_ONLY', False): + resp.headers['Content-Security-Policy'] += 'upgrade-insecure-requests' + + return resp @app.errorhandler(404) @@ -122,15 +126,17 @@ def unknown_page(e): def index(): # Reset keys session['fernet_keys'] = generate_user_keys(g.cookies_disabled) - error_message = session[ - 'error_message'] if 'error_message' in session else '' - session['error_message'] = '' + + # Redirect if an error was raised + if 'error_message' in session and session['error_message']: + error_message = session['error_message'] + session['error_message'] = '' + return render_template('error.html', error_message=error_message) return render_template('index.html', languages=app.config['LANGUAGES'], countries=app.config['COUNTRIES'], config=g.user_config, - error_message=error_message, tor_available=int(os.environ.get('TOR_AVAILABLE')), version_number=app.config['VERSION_NUMBER']) @@ -286,7 +292,9 @@ def url(): if len(q) > 0 and 'http' in q: return redirect(q) else: - return render_template('error.html', query=q) + return render_template( + 'error.html', + error_message='Unable to resolve query: ' + q) @app.route('/imgres') diff --git a/app/static/js/header.js b/app/static/js/header.js new file mode 100644 index 0000000..02d8581 --- /dev/null +++ b/app/static/js/header.js @@ -0,0 +1,11 @@ +document.addEventListener("DOMContentLoaded", () => { + const searchBar = document.getElementById("search-bar"); + + searchBar.addEventListener("keyup", function (event) { + if (event.keyCode !== 13) { + handleUserInput(searchBar); + } else { + document.getElementById("search-form").submit(); + } + }); +}); diff --git a/app/static/js/utils.js b/app/static/js/utils.js index 775e072..56e052f 100644 --- a/app/static/js/utils.js +++ b/app/static/js/utils.js @@ -28,7 +28,7 @@ const checkForTracking = () => { /^[0-9]{15}$/ ] } - } + }; // Creates a link to a UPS/USPS/FedEx tracking page const createTrackingLink = href => { @@ -37,7 +37,7 @@ const checkForTracking = () => { link.innerHTML = "View Tracking Info"; link.href = href; mainDiv.prepend(link); - } + }; // Compares the query against a set of regex patterns // for tracking numbers @@ -48,12 +48,12 @@ const checkForTracking = () => { return true; } }); - } + }; for (const key of Object.keys(matchTracking)) { compareQuery(matchTracking[key]); } -} +}; document.addEventListener("DOMContentLoaded", function() { checkForTracking(); diff --git a/app/templates/error.html b/app/templates/error.html index 9546e23..efa3f79 100644 --- a/app/templates/error.html +++ b/app/templates/error.html @@ -1,6 +1,6 @@

Error


- Error parsing "{{ query }}" + Error: "{{ error_message|safe }}"

Return Home diff --git a/app/templates/header.html b/app/templates/header.html index 7ae980f..5c8da25 100644 --- a/app/templates/header.html +++ b/app/templates/header.html @@ -48,14 +48,4 @@ {% endif %} - + diff --git a/app/templates/index.html b/app/templates/index.html index a308e0d..de306f9 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -36,12 +36,6 @@ Whoogle Search -
From 1d612c68a4d5162d5861812c63308a6ac5b60bcc Mon Sep 17 00:00:00 2001 From: Nico Date: Sun, 7 Mar 2021 21:50:23 +0000 Subject: [PATCH 14/45] Add Arch Linux installation instructions to readme (#217) --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index f8a9954..fd9060b 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,9 @@ heroku open This series of commands can take a while, but once you run it once, you shouldn't have to run it again. The final command, `heroku open` will launch a tab in your web browser, where you can test out Whoogle and even [set it as your primary search engine](https://github.com/benbusby/whoogle#set-whoogle-as-your-primary-search-engine). You may also edit environment variables from your app’s Settings tab in the Heroku Dashboard. +#### Arch Linux & Arch-based Distributions +There is an [AUR package available](https://aur.archlinux.org/packages/whoogle-git/), as well as a pre-built and daily updated package available at [Chaotic-AUR](https://chaotic.cx). + #### Using your own server, or alternative container deployment There are other methods for deploying docker containers that are well outlined in [this article](https://rollout.io/blog/the-shortlist-of-docker-hosting/), but there are too many to describe set up for each here. Generally it should be about the same amount of effort as the Heroku deployment. From f8dfc7853971d3e1bd63e7df93e8745007d9f6d2 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Mon, 8 Mar 2021 12:22:04 -0500 Subject: [PATCH 15/45] Improve naming of *_utils files, update fn/class doc The app/utils/*_utils weren't named very well, and all have been updated to have more accurate names. Function and class documention for the utils have been updated as well, as part of the effort to improve overall documentation for the project. --- app/__init__.py | 4 +- app/filter.py | 4 +- app/request.py | 10 ++-- app/routes.py | 15 ++--- app/utils/bangs.py | 61 +++++++++++++++++++ app/utils/gen_ddg_bangs.py | 26 -------- app/utils/{filter_utils.py => results.py} | 73 ++++++++++++++++++----- app/utils/{routing_utils.py => search.py} | 64 ++++++++++++++------ app/utils/session.py | 45 ++++++++++++++ app/utils/session_utils.py | 24 -------- test/conftest.py | 2 +- test/test_misc.py | 2 +- test/test_results.py | 2 +- 13 files changed, 232 insertions(+), 100 deletions(-) create mode 100644 app/utils/bangs.py delete mode 100644 app/utils/gen_ddg_bangs.py rename app/utils/{filter_utils.py => results.py} (59%) rename app/utils/{routing_utils.py => search.py} (69%) create mode 100644 app/utils/session.py delete mode 100644 app/utils/session_utils.py diff --git a/app/__init__.py b/app/__init__.py index d5da4f5..5edb9b5 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,6 +1,6 @@ from app.request import send_tor_signal -from app.utils.session_utils import generate_user_keys -from app.utils.gen_ddg_bangs import gen_bangs_json +from app.utils.session import generate_user_keys +from app.utils.bangs import gen_bangs_json from flask import Flask from flask_session import Session import json diff --git a/app/filter.py b/app/filter.py index 4c953d5..66e9c6e 100644 --- a/app/filter.py +++ b/app/filter.py @@ -1,5 +1,5 @@ from app.request import VALID_PARAMS -from app.utils.filter_utils import * +from app.utils.results import * from bs4.element import ResultSet from cryptography.fernet import Fernet import re @@ -208,7 +208,7 @@ class Filter: # Add no-js option if self.nojs: - gen_nojs(link) + append_nojs(link) else: link['href'] = href diff --git a/app/request.py b/app/request.py index 446d844..71eeb45 100644 --- a/app/request.py +++ b/app/request.py @@ -23,8 +23,8 @@ class TorError(Exception): """Exception raised for errors in Tor requests. Attributes: - message -- a message describing the error that occurred - disable -- optionally disables Tor in the user config (note: + message: a message describing the error that occurred + disable: optionally disables Tor in the user config (note: this should only happen if the connection has been dropped altogether). """ @@ -133,9 +133,9 @@ class Request: search suggestions, and loading of external content (images, audio, etc). Attributes: - normal_ua -- the user's current user agent - root_path -- the root path of the whoogle instance - config -- the user's current whoogle configuration + normal_ua: the user's current user agent + root_path: the root path of the whoogle instance + config: the user's current whoogle configuration """ def __init__(self, normal_ua, root_path, config: Config): diff --git a/app/routes.py b/app/routes.py index acb38a6..b084f71 100644 --- a/app/routes.py +++ b/app/routes.py @@ -16,8 +16,9 @@ from requests import exceptions from app import app from app.models.config import Config from app.request import Request, TorError -from app.utils.session_utils import valid_user_session -from app.utils.routing_utils import * +from app.utils.bangs import resolve_bang +from app.utils.session import valid_user_session +from app.utils.search import * # Load DDG bang json files only on init bang_json = json.load(open(app.config['BANG_FILE'])) @@ -199,13 +200,13 @@ def search(): # Update user config if specified in search args g.user_config = g.user_config.from_params(g.request_params) - search_util = RoutingUtils(request, g.user_config, session, - cookies_disabled=g.cookies_disabled) + search_util = Search(request, g.user_config, session, + cookies_disabled=g.cookies_disabled) query = search_util.new_search_query() - resolved_bangs = search_util.bang_operator(bang_json) - if resolved_bangs != '': - return redirect(resolved_bangs) + bang = resolve_bang(query=query, bangs_dict=bang_json) + if bang != '': + return redirect(bang) # Redirect to home if invalid/blank search if not query: diff --git a/app/utils/bangs.py b/app/utils/bangs.py new file mode 100644 index 0000000..56daf4f --- /dev/null +++ b/app/utils/bangs.py @@ -0,0 +1,61 @@ +import json +import requests + +DDG_BANGS = 'https://duckduckgo.com/bang.v255.js' + + +def gen_bangs_json(bangs_file: str) -> None: + """Generates a json file from the DDG bangs list + + Args: + bangs_file: The str path to the new DDG bangs json file + + Returns: + None + + """ + try: + # Request full list from DDG + r = requests.get(DDG_BANGS) + r.raise_for_status() + except requests.exceptions.HTTPError as err: + raise SystemExit(err) + + # Convert to json + data = json.loads(r.text) + + # Set up a json object (with better formatting) for all available bangs + bangs_data = {} + + for row in data: + bang_command = '!' + row['t'] + bangs_data[bang_command] = { + 'url': row['u'].replace('{{{s}}}', '{}'), + 'suggestion': bang_command + ' (' + row['s'] + ')' + } + + json.dump(bangs_data, open(bangs_file, 'w')) + + +def resolve_bang(query: str, bangs_dict: dict) -> str: + """Transform's a user's query to a bang search, if an operator is found + + Args: + query: The search query + bangs_dict: The dict of available bang operators, with corresponding + format string search URLs + (i.e. "!w": "https://en.wikipedia.org...?search={}") + + Returns: + str: A formatted redirect for a bang search, or an empty str if there + wasn't a match or didn't contain a bang operator + + """ + split_query = query.split(' ') + for operator in bangs_dict.keys(): + if operator not in split_query: + continue + + return bangs_dict[operator]['url'].format( + query.replace(operator, '').strip()) + return '' diff --git a/app/utils/gen_ddg_bangs.py b/app/utils/gen_ddg_bangs.py deleted file mode 100644 index 0ed3953..0000000 --- a/app/utils/gen_ddg_bangs.py +++ /dev/null @@ -1,26 +0,0 @@ -import json -import requests - - -def gen_bangs_json(bangs_file): - # Request list - try: - r = requests.get('https://duckduckgo.com/bang.v255.js') - r.raise_for_status() - except requests.exceptions.HTTPError as err: - raise SystemExit(err) - - # Convert to json - data = json.loads(r.text) - - # Set up a json object (with better formatting) for all available bangs - bangs_data = {} - - for row in data: - bang_command = '!' + row['t'] - bangs_data[bang_command] = { - 'url': row['u'].replace('{{{s}}}', '{}'), - 'suggestion': bang_command + ' (' + row['s'] + ')' - } - - json.dump(bangs_data, open(bangs_file, 'w')) diff --git a/app/utils/filter_utils.py b/app/utils/results.py similarity index 59% rename from app/utils/filter_utils.py rename to app/utils/results.py index 76b99ba..a1d20dc 100644 --- a/app/utils/filter_utils.py +++ b/app/utils/results.py @@ -28,12 +28,30 @@ SITE_ALTS = { } -def has_ad_content(element: str): +def has_ad_content(element: str) -> bool: + """Inspects an HTML element for ad related content + + Args: + element: The HTML element to inspect + + Returns: + bool: True/False for the element containing an ad + + """ return element.upper() in (value.upper() for value in BLACKLIST) \ or 'ⓘ' in element -def get_first_link(soup): +def get_first_link(soup: BeautifulSoup) -> str: + """Retrieves the first result link from the query response + + Args: + soup: The BeautifulSoup response body + + Returns: + str: A str link to the first result + + """ # Replace hrefs with only the intended destination (no "utm" type tags) for a in soup.find_all('a', href=True): # Return the first search result URL @@ -41,7 +59,16 @@ def get_first_link(soup): return filter_link_args(a['href']) -def get_site_alt(link: str): +def get_site_alt(link: str) -> str: + """Returns an alternative to a particular site, if one is configured + + Args: + link: A string result URL to check against the SITE_ALTS map + + Returns: + str: An updated (or ignored) result link + + """ for site_key in SITE_ALTS.keys(): if site_key not in link: continue @@ -55,13 +82,22 @@ def get_site_alt(link: str): return link -def filter_link_args(query_link): - parsed_link = urlparse.urlparse(query_link) +def filter_link_args(link: str) -> str: + """Filters out unnecessary URL args from a result link + + Args: + link: The string result link to check for extraneous URL params + + Returns: + str: An updated (or ignored) result link + + """ + parsed_link = urlparse.urlparse(link) link_args = parse_qs(parsed_link.query) safe_args = {} if len(link_args) == 0 and len(parsed_link) > 0: - return query_link + return link for arg in link_args.keys(): if arg in SKIP_ARGS: @@ -70,19 +106,28 @@ def filter_link_args(query_link): safe_args[arg] = link_args[arg] # Remove original link query and replace with filtered args - query_link = query_link.replace(parsed_link.query, '') + link = link.replace(parsed_link.query, '') if len(safe_args) > 0: - query_link = query_link + urlparse.urlencode(safe_args, doseq=True) + link = link + urlparse.urlencode(safe_args, doseq=True) else: - query_link = query_link.replace('?', '') + link = link.replace('?', '') - return query_link + return link -def gen_nojs(sibling): +def append_nojs(result: BeautifulSoup) -> None: + """Appends a no-Javascript alternative for a search result + + Args: + result: The search result to append a no-JS link to + + Returns: + None + + """ nojs_link = BeautifulSoup(features='html.parser').new_tag('a') - nojs_link['href'] = '/window?location=' + sibling['href'] + nojs_link['href'] = '/window?location=' + result['href'] nojs_link['style'] = 'display:block;width:100%;' nojs_link.string = 'NoJS Link: ' + nojs_link['href'] - sibling.append(BeautifulSoup('


', 'html.parser')) - sibling.append(nojs_link) + result.append(BeautifulSoup('


', 'html.parser')) + result.append(nojs_link) diff --git a/app/utils/routing_utils.py b/app/utils/search.py similarity index 69% rename from app/utils/routing_utils.py rename to app/utils/search.py index 4cbbb16..84a457e 100644 --- a/app/utils/routing_utils.py +++ b/app/utils/search.py @@ -1,5 +1,5 @@ from app.filter import Filter, get_first_link -from app.utils.session_utils import generate_user_keys +from app.utils.session import generate_user_keys from app.request import gen_query from bs4 import BeautifulSoup as bsoup from cryptography.fernet import Fernet, InvalidToken @@ -11,6 +11,18 @@ TOR_BANNER = '

You are using Tor


' def needs_https(url: str) -> bool: + """Checks if the current instance needs to be upgraded to HTTPS + + Note that all Heroku instances are available by default over HTTPS, but + do not automatically set up a redirect when visited over HTTP. + + Args: + url: The instance url + + Returns: + bool: True/False representing the need to upgrade + + """ https_only = os.getenv('HTTPS_ONLY', False) is_heroku = url.endswith('.herokuapp.com') is_http = url.startswith('http://') @@ -18,7 +30,15 @@ def needs_https(url: str) -> bool: return (is_heroku and is_http) or (https_only and is_http) -class RoutingUtils: +class Search: + """Search query preprocessor - used before submitting the query or + redirecting to another site + + Attributes: + request: the incoming flask request + config: the current user config settings + session: the flask user session + """ def __init__(self, request, config, session, cookies_disabled=False): method = request.method self.request_params = request.args if method == 'GET' else request.form @@ -31,19 +51,28 @@ class RoutingUtils: self.search_type = self.request_params.get( 'tbm') if 'tbm' in self.request_params else '' - def __getitem__(self, name): + def __getitem__(self, name) -> Any: return getattr(self, name) - def __setitem__(self, name, value): + def __setitem__(self, name, value) -> None: return setattr(self, name, value) - def __delitem__(self, name): + def __delitem__(self, name) -> None: return delattr(self, name) - def __contains__(self, name): + def __contains__(self, name) -> bool: return hasattr(self, name) def new_search_query(self) -> str: + """Parses a plaintext query into a valid string for submission + + Also decrypts the query string, if encrypted (in the case of + paginated results). + + Returns: + str: A valid query string + + """ # Generate a new element key each time a new search is performed self.session['fernet_keys']['element_key'] = generate_user_keys( cookies_disabled=self.cookies_disabled)['element_key'] @@ -70,17 +99,18 @@ class RoutingUtils: self.query = q[2:] if self.feeling_lucky else q return self.query - def bang_operator(self, bangs_dict: dict) -> str: - split_query = self.query.split(' ') - for operator in bangs_dict.keys(): - if operator not in split_query: - continue - - return bangs_dict[operator]['url'].format( - self.query.replace(operator, '').strip()) - return '' - def generate_response(self) -> Tuple[Any, int]: + """Generates a response for the user's query + + Returns: + Tuple[Any, int]: A tuple in the format (response, # of elements) + For example, in the case of a "feeling lucky" + search, the response is a result URL, with no + encrypted elements to account for. Otherwise, the + response is a BeautifulSoup response body, with + N encrypted elements to track before key regen. + + """ mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent content_filter = Filter( @@ -102,7 +132,7 @@ class RoutingUtils: if g.user_request.tor_valid else bsoup('', 'html.parser')) if self.feeling_lucky: - return get_first_link(html_soup), 1 + return get_first_link(html_soup), 0 else: formatted_results = content_filter.clean(html_soup) diff --git a/app/utils/session.py b/app/utils/session.py new file mode 100644 index 0000000..f34d725 --- /dev/null +++ b/app/utils/session.py @@ -0,0 +1,45 @@ +from cryptography.fernet import Fernet +from flask import current_app as app + +REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys'] + + +def generate_user_keys(cookies_disabled=False) -> dict: + """Generates a set of user keys + + Args: + cookies_disabled: Flag for whether or not cookies are disabled by the + user. If so, the user can only use the default key + set generated on app init for queries. + + Returns: + dict: A new Fernet key set + + """ + if cookies_disabled: + return app.default_key_set + + # Generate/regenerate unique key per user + return { + 'element_key': Fernet.generate_key(), + 'text_key': Fernet.generate_key() + } + + +def valid_user_session(session: dict) -> bool: + """Validates the current user session + + Args: + session: The current Flask user session + + Returns: + bool: True/False indicating that all required session values are + available + + """ + # Generate secret key for user if unavailable + for value in REQUIRED_SESSION_VALUES: + if value not in session: + return False + + return True diff --git a/app/utils/session_utils.py b/app/utils/session_utils.py deleted file mode 100644 index f959abe..0000000 --- a/app/utils/session_utils.py +++ /dev/null @@ -1,24 +0,0 @@ -from cryptography.fernet import Fernet -from flask import current_app as app - -REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys'] - - -def generate_user_keys(cookies_disabled=False) -> dict: - if cookies_disabled: - return app.default_key_set - - # Generate/regenerate unique key per user - return { - 'element_key': Fernet.generate_key(), - 'text_key': Fernet.generate_key() - } - - -def valid_user_session(session): - # Generate secret key for user if unavailable - for value in REQUIRED_SESSION_VALUES: - if value not in session: - return False - - return True diff --git a/test/conftest.py b/test/conftest.py index 4b19636..f0912de 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,5 +1,5 @@ from app import app -from app.utils.session_utils import generate_user_keys +from app.utils.session import generate_user_keys import pytest import random diff --git a/test/test_misc.py b/test/test_misc.py index 92fcadb..e399b4a 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -1,4 +1,4 @@ -from app.utils.session_utils import generate_user_keys, valid_user_session +from app.utils.session import generate_user_keys, valid_user_session def test_generate_user_keys(): diff --git a/test/test_results.py b/test/test_results.py index 74af29c..c0f7fd1 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -1,6 +1,6 @@ from bs4 import BeautifulSoup from app.filter import Filter -from app.utils.session_utils import generate_user_keys +from app.utils.session import generate_user_keys from datetime import datetime from dateutil.parser import * From e5d1f6a2927a9e8d6754356747646cac929ea06b Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Mon, 8 Mar 2021 12:38:40 -0500 Subject: [PATCH 16/45] Add healthcheck to Dockerfile See #184 --- Dockerfile | 3 +++ app/utils/results.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index d901769..981512b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -62,4 +62,7 @@ COPY run . EXPOSE $EXPOSE_PORT +HEALTHCHECK --interval=5m --timeout=5s \ + CMD wget --no-verbose --tries=1 http://localhost:${EXPOSE_PORT}/ || exit 1 + CMD config/tor/start-tor.sh & ./run diff --git a/app/utils/results.py b/app/utils/results.py index a1d20dc..58c450f 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -38,8 +38,8 @@ def has_ad_content(element: str) -> bool: bool: True/False for the element containing an ad """ - return element.upper() in (value.upper() for value in BLACKLIST) \ - or 'ⓘ' in element + return (element.upper() in (value.upper() for value in BLACKLIST) + or 'ⓘ' in element) def get_first_link(soup: BeautifulSoup) -> str: From 0e2757fc0756b6e7192c92965c6e3a2630477e7b Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Mon, 8 Mar 2021 17:08:55 -0500 Subject: [PATCH 17/45] Update heroku quick deploy branch, env vars in readme The Heroku quick deploy branch has been changed from heroku-app to heroku-app-beta, since a lot of users want to use features from develop in their heroku instances. The environment variables in the readme were updated to include the reddit redirect var. --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fd9060b..536fa90 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ If using Heroku Quick Deploy, **you can skip this section**. There are a few different ways to begin using the app, depending on your preferences: ### A) [Heroku Quick Deploy](https://heroku.com/about) -[![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/benbusby/whoogle-search/tree/heroku-app) +[![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/benbusby/whoogle-search/tree/heroku-app-beta) *Note: Requires a (free) Heroku account* @@ -248,6 +248,7 @@ There are a few optional environment variables available for customizing a Whoog | WHOOGLE_ALT_TW | The twitter.com alternative to use when site alternatives are enabled in the config. | | WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. | | WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. | +| WHOOGLE_ALT_RD | The reddit.com alternative to use when site alternatives are enabled in the config. | ## Usage Same as most search engines, with the exception of filtering by time range. From fea10828cc043ae025c13dd5950d837926fcc198 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Tue, 9 Mar 2021 10:12:35 -0500 Subject: [PATCH 18/45] Add UI requests note to feature template The feature request template should not be used for requesting updates to the UI. There's already a pinned issue for UI customization, and all requests should go there. --- .github/ISSUE_TEMPLATE/feature_request.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 9da6d04..f91a033 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -7,6 +7,12 @@ assignees: '' --- + + **Describe the feature you'd like to see added** A short description of the feature, and what it would accomplish. From b87619a13352ac2fe031945907c27119e306991d Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Wed, 17 Mar 2021 12:27:08 -0400 Subject: [PATCH 19/45] Add heroku-regen script, rename config/ -> misc/ Introduces a new script for quickly regenerating a Heroku instance (typically with a new IP) to handle the rare circumstances where an instance is flagged by Google and prompted for a captcha. The config/ dir was renamed to misc/ to avoid confusion with the Docker mounted config volume, and to more closely match its intended purpose (which is to contain all miscellaneous features/scripts/etc that add functionality to Whoogle, but are not critical for general use). --- Dockerfile | 6 +++--- misc/heroku-regen.sh | 29 +++++++++++++++++++++++++++++ {config => misc}/tor/start-tor.sh | 0 {config => misc}/tor/torrc | 0 4 files changed, 32 insertions(+), 3 deletions(-) create mode 100755 misc/heroku-regen.sh rename {config => misc}/tor/start-tor.sh (100%) rename {config => misc}/tor/torrc (100%) diff --git a/Dockerfile b/Dockerfile index 981512b..4a2e884 100644 --- a/Dockerfile +++ b/Dockerfile @@ -55,8 +55,8 @@ ENV WHOOGLE_ALT_RD=$reddit_alt WORKDIR /whoogle COPY --from=builder /install /usr/local -COPY config/tor/torrc /etc/tor/torrc -COPY config/tor/start-tor.sh config/tor/start-tor.sh +COPY misc/tor/torrc /etc/tor/torrc +COPY misc/tor/start-tor.sh misc/tor/start-tor.sh COPY app/ app/ COPY run . @@ -65,4 +65,4 @@ EXPOSE $EXPOSE_PORT HEALTHCHECK --interval=5m --timeout=5s \ CMD wget --no-verbose --tries=1 http://localhost:${EXPOSE_PORT}/ || exit 1 -CMD config/tor/start-tor.sh & ./run +CMD misc/tor/start-tor.sh & ./run diff --git a/misc/heroku-regen.sh b/misc/heroku-regen.sh new file mode 100755 index 0000000..45af8f0 --- /dev/null +++ b/misc/heroku-regen.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# Assumes this is being executed from a session that has already logged +# into Heroku with "heroku login -i" beforehand. +# +# You can set this up to run every night when you aren't using the +# instance with a cronjob. For example: +# 0 3 * * * /home/pi/whoogle-search/config/heroku-regen.sh + +HEROKU_CLI_SITE="https://devcenter.heroku.com/articles/heroku-cli" + +if ! type "heroku" > /dev/null; then + echo "Must have heroku cli installed: $HEROKU_CLI_SITE" + exit 1 +fi + +cd "$(builtin cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)/../" + +if [ $# -ne 1 ]; then + echo "Must provide the name of the Whoogle instance to regenerate" + exit 1 +fi + +APP_NAME="$1" + +heroku apps:destroy $APP_NAME --confirm $APP_NAME +heroku apps:create $APP_NAME +heroku container:login +heroku container:push web +heroku container:release web diff --git a/config/tor/start-tor.sh b/misc/tor/start-tor.sh similarity index 100% rename from config/tor/start-tor.sh rename to misc/tor/start-tor.sh diff --git a/config/tor/torrc b/misc/tor/torrc similarity index 100% rename from config/tor/torrc rename to misc/tor/torrc From 5884001f05a403c4a304f4ede704d1df541ed094 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Wed, 17 Mar 2021 12:44:30 -0400 Subject: [PATCH 20/45] Use bash for heroku-regen --- misc/heroku-regen.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/misc/heroku-regen.sh b/misc/heroku-regen.sh index 45af8f0..198edcf 100755 --- a/misc/heroku-regen.sh +++ b/misc/heroku-regen.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Assumes this is being executed from a session that has already logged # into Heroku with "heroku login -i" beforehand. # @@ -8,22 +8,22 @@ HEROKU_CLI_SITE="https://devcenter.heroku.com/articles/heroku-cli" -if ! type "heroku" > /dev/null; then +if ! [[ -x "$(command -v heroku)" ]]; then echo "Must have heroku cli installed: $HEROKU_CLI_SITE" exit 1 fi cd "$(builtin cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)/../" -if [ $# -ne 1 ]; then - echo "Must provide the name of the Whoogle instance to regenerate" +if [[ $# -ne 1 ]]; then + echo -e "Must provide the name of the Whoogle instance to regenerate" exit 1 fi APP_NAME="$1" -heroku apps:destroy $APP_NAME --confirm $APP_NAME -heroku apps:create $APP_NAME +heroku apps:destroy "$APP_NAME" --confirm "$APP_NAME" +heroku apps:create "$APP_NAME" heroku container:login heroku container:push web heroku container:release web From 6f46facf9eb2a328a56454f0dbf660c9cdae1b0c Mon Sep 17 00:00:00 2001 From: FireMasterK <20838718+FireMasterK@users.noreply.github.com> Date: Thu, 18 Mar 2021 13:59:59 +0000 Subject: [PATCH 21/45] Add new public instances (#223) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 536fa90..162c126 100644 --- a/README.md +++ b/README.md @@ -342,6 +342,7 @@ A lot of the app currently piggybacks on Google's existing support for fetching - https://whoogle.sdf.org - https://whoogle.tormentasolar.win/ - https://whoogle.himiko.cloud +- https://whoogle.kavin.rocks or http://whoogledq5f5wly5p4i2ohnvjwlihnlg4oajjum2oeddfwqdwupbuhqd.onion ## Screenshots #### Desktop From 2114cb87c060a6244fbd3d5aad4634413de34c4e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Mar 2021 23:47:29 -0400 Subject: [PATCH 22/45] Bump jinja2 from 2.10.3 to 2.11.3 (#229) Bumps [jinja2](https://github.com/pallets/jinja) from 2.10.3 to 2.11.3. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/master/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/2.10.3...2.11.3) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 04ff631..aeeaad1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,7 @@ Flask==1.1.1 Flask-Session==0.3.2 idna==2.9 itsdangerous==1.1.0 -Jinja2==2.10.3 +Jinja2==2.11.3 MarkupSafe==1.1.1 more-itertools==8.3.0 packaging==20.4 From 337d0ebe376bc1f2e592b2cecfff9f9d6d9da246 Mon Sep 17 00:00:00 2001 From: Shimul Date: Sun, 21 Mar 2021 05:22:06 +0530 Subject: [PATCH 23/45] Handle manifest-src in CSP (#231) --- app/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/__init__.py b/app/__init__.py index 5edb9b5..e0cf88e 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -41,6 +41,7 @@ app.config['BANG_FILE'] = os.path.join( app.config['BANG_PATH'], 'bangs.json') app.config['CSP'] = 'default-src \'none\';' \ + 'manifest-src \'self\';' \ 'img-src \'self\';' \ 'style-src \'self\' \'unsafe-inline\';' \ 'script-src \'self\';' \ From 62a9b9e9492c45c7288c1be22f6d7ddb332ab80f Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sat, 20 Mar 2021 21:21:41 -0400 Subject: [PATCH 24/45] Allow user-defined CSS/theming (#227) * Add custom CSS field to config This allows users to set/customize an instance's theme and appearance to their liking. The config CSS field is prepopulated with all default CSS variable values to allow quick editing. Note that this can be somewhat of a "footgun" if someone updates the CSS to hide all fields/search/etc. Should probably add some sort of bandaid "admin" feature for public instances to employ until the whole cookie/session issue is investigated further. * Symlink all app static files to test dir * Refactor app/misc/*.json -> app/static/settings/*.json The country/language json files are used for user config settings, so the "misc" name didn't really make sense. Also moved these to the static folder to make testing easier. * Fix light theme variables in dark theme css * Minor style tweaking --- app/__init__.py | 8 +- app/models/config.py | 7 + app/routes.py | 4 +- app/static/css/dark-theme.css | 111 +++++++++++++--- app/static/css/light-theme.css | 130 +++++++++++++++++++ app/static/css/main.css | 73 +++++++---- app/static/css/search-dark.css | 40 ------ app/static/css/search.css | 39 ------ app/static/css/variables.css | 24 ++++ app/static/img/whoogle.svg | 1 + app/static/js/controller.js | 40 ------ app/{misc => static/settings}/countries.json | 0 app/{misc => static/settings}/languages.json | 0 app/templates/display.html | 9 +- app/templates/header.html | 16 +-- app/templates/index.html | 85 +++++++----- run | 6 +- test/misc | 1 - 18 files changed, 381 insertions(+), 213 deletions(-) create mode 100644 app/static/css/light-theme.css delete mode 100644 app/static/css/search-dark.css delete mode 100644 app/static/css/search.css create mode 100644 app/static/css/variables.css create mode 100644 app/static/img/whoogle.svg rename app/{misc => static/settings}/countries.json (100%) rename app/{misc => static/settings}/languages.json (100%) delete mode 120000 test/misc diff --git a/app/__init__.py b/app/__init__.py index e0cf88e..6ce52b0 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -18,13 +18,13 @@ app.config['VERSION_NUMBER'] = '0.3.2' app.config['APP_ROOT'] = os.getenv( 'APP_ROOT', os.path.dirname(os.path.abspath(__file__))) -app.config['LANGUAGES'] = json.load(open( - os.path.join(app.config['APP_ROOT'], 'misc/languages.json'))) -app.config['COUNTRIES'] = json.load(open( - os.path.join(app.config['APP_ROOT'], 'misc/countries.json'))) app.config['STATIC_FOLDER'] = os.getenv( 'STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) +app.config['LANGUAGES'] = json.load(open( + os.path.join(app.config['STATIC_FOLDER'], 'settings/languages.json'))) +app.config['COUNTRIES'] = json.load(open( + os.path.join(app.config['STATIC_FOLDER'], 'settings/countries.json'))) app.config['CONFIG_PATH'] = os.getenv( 'CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'config')) diff --git a/app/models/config.py b/app/models/config.py index 3916be2..756af29 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -1,8 +1,15 @@ +from flask import current_app +import os + + class Config: def __init__(self, **kwargs): + app_config = current_app.config self.url = '' self.lang_search = '' self.lang_interface = '' + self.style = open(os.path.join(app_config['STATIC_FOLDER'], + 'css/variables.css')).read() self.ctry = '' self.safe = False self.dark = False diff --git a/app/routes.py b/app/routes.py index b084f71..10336ed 100644 --- a/app/routes.py +++ b/app/routes.py @@ -233,12 +233,12 @@ def search(): 'display.html', query=urlparse.unquote(query), search_type=search_util.search_type, - dark_mode=g.user_config.dark, + config=g.user_config, response=response, version_number=app.config['VERSION_NUMBER'], search_header=(render_template( 'header.html', - dark_mode=g.user_config.dark, + config=g.user_config, query=urlparse.unquote(query), search_type=search_util.search_type, mobile=g.user_request.mobile) diff --git a/app/static/css/dark-theme.css b/app/static/css/dark-theme.css index 4bd6af1..940f646 100644 --- a/app/static/css/dark-theme.css +++ b/app/static/css/dark-theme.css @@ -1,13 +1,17 @@ html { - background-color: #222 !important; + background: var(--whoogle-dark-background) !important; } body { - background-color: #222 !important; + background: var(--whoogle-dark-background) !important; } div { - color: #fff !important; + color: var(--whoogle-dark-text) !important; +} + +label { + color: var(--whoogle-dark-contrast-text) !important; } li a { @@ -15,43 +19,114 @@ li a { } li { - color: #fff !important; + color: var(--whoogle-dark-text) !important; +} + +textarea { + background: var(--whoogle-dark-background) !important; + color: var(--whoogle-dark-text) !important; } a:visited h3 div { - color: #bbbbff !important; + color: var(--whoogle-dark-result-visited) !important; } a:link h3 div { - color: #4b8eea !important; + color: var(--whoogle-dark-result-title) !important; } a:link div { - color: #aaffaa !important; + color: var(--whoogle-dark-result-url) !important; } div span { - color: #bbb !important; + color: var(--whoogle-dark-secondary-text) !important; } input { - background-color: #111 !important; - color: #fff !important; + background-color: var(--whoogle-dark-background) !important; + color: var(--whoogle-dark-text) !important; } -#search-bar { - color: #fff !important; - background-color: #222 !important; +select { + background: var(--whoogle-dark-background) !important; + color: var(--whoogle-dark-text) !important; } .search-container { - background-color: #222 !important; + background-color: var(--whoogle-dark-background) !important; } -.ZINbbc{ - background-color: #1a1a1a !important; +.ZINbbc { + background-color: var(--whoogle-dark-result-bg) !important; } -.bRsWnc{ - background-color: #1a1a1a !important; +.bRsWnc { + background-color: var(--whoogle-dark-result-bg) !important; +} + +#search-bar { + border: 2px solid var(--whoogle-dark-accent) !important; + border-radius: 8px; + color: var(--whoogle-dark-text) !important; +} + +#search-bar:focus { + color: var(--whoogle-dark-text) !important; +} + +#search-submit { + border: 1px solid var(--whoogle-dark-accent) !important; + background: var(--whoogle-dark-accent) !important; + color: var(--whoogle-dark-background) !important; +} + +.info-text { + color: var(--whoogle-dark-contrast-text) !important; + opacity: 75%; +} + +.collapsible { + color: var(--whoogle-dark-accent) !important; +} + +.collapsible:after { + color: var(--whoogle-dark-accent) !important; +} + +.active { + background-color: var(--whoogle-dark-accent) !important; + color: var(--whoogle-dark-contrast-text) !important; +} + +.content { + background-color: var(--whoogle-dark-accent) !important; + color: var(--whoogle-contrast-text) !important; +} + +.active:after { + color: var(--whoogle-dark-contrast-text); +} + +#gh-link { + color: var(--whoogle-dark-accent); +} + +.autocomplete-items { + border: 1px solid #685e79; +} + +.autocomplete-items div { + color: #fff; + background-color: #222; + border-bottom: 1px solid #242424; +} + +.autocomplete-items div:hover { + background-color: #404040; +} + +.autocomplete-active { + background-color: var(--whoogle-dark-accent) !important; + color: var(--whoogle-dark-background) !important; } diff --git a/app/static/css/light-theme.css b/app/static/css/light-theme.css new file mode 100644 index 0000000..a878c5e --- /dev/null +++ b/app/static/css/light-theme.css @@ -0,0 +1,130 @@ +html { + background: var(--whoogle-background) !important; +} + +body { + background: var(--whoogle-background) !important; +} + +div { + color: var(--whoogle-text) !important; +} + +label { + color: var(--whoogle-contrast-text) !important; +} + +li a { + color: #4b8eaa !important; +} + +li { + color: var(--whoogle-text) !important; +} + +textarea { + background: var(--whoogle-background) !important; + color: var(--whoogle-text) !important; +} + +select { + background: var(--whoogle-background) !important; + color: var(--whoogle-text) !important; +} + +.ZINbbc { + background-color: var(--whoogle-result-bg) !important; +} + +.bRsWnc { + background-color: var(--whoogle-result-bg) !important; +} + +a:visited h3 div { + color: var(--whoogle-result-visited) !important; +} + +a:link h3 div { + color: var(--whoogle-result-title) !important; +} + +a:link div { + color: var(--whoogle-result-url) !important; +} + +div span { + color: var(--whoogle-secondary-text) !important; +} + +input { + background-color: var(--whoogle-background) !important; + color: var(--whoogle-text) !important; +} + +#search-bar { + color: var(--whoogle-text) !important; + background-color: var(--whoogle-background); +} + +.home-search { + border: 3px solid var(--whoogle-accent) !important; +} + +.search-container { + background-color: var(--whoogle-background) !important; +} + +#search-submit { + border: 1px solid var(--whoogle-accent) !important; + background: var(--whoogle-accent) !important; + color: var(--whoogle-background) !important; +} + +.info-text { + color: var(--whoogle-contrast-text) !important; + opacity: 75%; +} + +.collapsible { + color: var(--whoogle-accent) !important; +} + +.collapsible:after { + color: var(--whoogle-accent) !important; +} + +.active { + background-color: var(--whoogle-accent) !important; + color: var(--whoogle-contrast-text) !important; +} + +.content { + background-color: var(--whoogle-accent) !important; + color: var(--whoogle-contrast-text) !important; +} + +.active:after { + color: var(--whoogle-contrast-text); +} + +#gh-link { + color: var(--whoogle-accent); +} + +.autocomplete-items { + border: 1px solid #d4d4d4; +} + +.autocomplete-items div { + background-color: #fff; + border-bottom: 1px solid #d4d4d4; +} + +.autocomplete-items div:hover { + background-color: #e9e9e9; +} + +.autocomplete-active { + background-color: var(--whoogle-accent) !important; + color: var(--whoogle-background) !important; +} diff --git a/app/static/css/main.css b/app/static/css/main.css index 5b35bf6..937812a 100644 --- a/app/static/css/main.css +++ b/app/static/css/main.css @@ -10,6 +10,7 @@ body { } .search-container { + background: transparent !important; width: 80%; position: absolute; top: 50%; @@ -26,29 +27,21 @@ body { } #search-bar { + background: transparent !important; width: 100%; - border: 3px solid #685e79; padding: 5px; height: 40px; outline: none; font-size: 24px; - color: #685e79; border-radius: 10px 10px 0 0; max-width: 600px; background: rgba(0, 0, 0, 0); } -#search-bar:focus { - color: #685e79; -} - #search-submit { width: 100%; height: 40px; - border: 1px solid #685e79; - background: #685e79 !important; text-align: center; - color: #fff; cursor: pointer; font-size: 20px; align-content: center; @@ -70,7 +63,6 @@ button::-moz-focus-inner { .collapsible { outline: 0; background-color: rgba(0, 0, 0, 0); - color: #685e79; cursor: pointer; padding: 18px; width: 100%; @@ -81,14 +73,8 @@ button::-moz-focus-inner { border-radius: 10px 10px 0 0; } -.active { - background-color: #685e79; - color: white; -} - .collapsible:after { content: '\002B'; - color: #685e79; font-weight: bold; float: right; margin-left: 5px; @@ -96,7 +82,6 @@ button::-moz-focus-inner { .active:after { content: "\2212"; - color: white; } .content { @@ -104,8 +89,6 @@ button::-moz-focus-inner { max-height: 0; overflow: hidden; transition: max-height 0.2s ease-out; - background-color: #685e79; - color: white; border-radius: 0 0 10px 10px; } @@ -113,12 +96,6 @@ button::-moz-focus-inner { padding-bottom: 20px; } -.ua-span { - color: white; - -webkit-box-decoration-break: clone; - box-decoration-break: clone; -} - .hidden { display: none; } @@ -135,3 +112,49 @@ footer { font-style: italic; font-size: 12px; } + +#config-style { + resize: none; + overflow-y: scroll; + width: 100%; + height: 100px; +} + +.whoogle-logo { + display: none; +} + +.whoogle-svg { + width: 80%; + display: block; + margin: auto; + padding-bottom: 10px; +} + +.autocomplete { + position: relative; + display: inline-block; + width: 100%; +} + +.autocomplete-items { + position: absolute; + border-bottom: none; + border-top: none; + z-index: 99; + + /*position the autocomplete items to be the same width as the container:*/ + top: 100%; + left: 0; + right: 0; +} + +.autocomplete-items div { + padding: 10px; + cursor: pointer; +} + +details summary { + padding: 10px; + font-weight: bold; +} diff --git a/app/static/css/search-dark.css b/app/static/css/search-dark.css deleted file mode 100644 index efd923e..0000000 --- a/app/static/css/search-dark.css +++ /dev/null @@ -1,40 +0,0 @@ -.autocomplete { - position: relative; - display: inline-block; - width: 100%; -} - -.autocomplete-items { - position: absolute; - border: 1px solid #685e79; - border-bottom: none; - border-top: none; - z-index: 99; - - /*position the autocomplete items to be the same width as the container:*/ - top: 100%; - left: 0; - right: 0; -} - -.autocomplete-items div { - padding: 10px; - cursor: pointer; - color: #fff; - background-color: #222; - border-bottom: 1px solid #242424; -} - -.autocomplete-items div:hover { - background-color: #404040; -} - -.autocomplete-active { - background-color: #685e79 !important; - color: #ffffff; -} - -details summary { - padding: 10px; - font-weight: bold; -} diff --git a/app/static/css/search.css b/app/static/css/search.css deleted file mode 100644 index 155cfcf..0000000 --- a/app/static/css/search.css +++ /dev/null @@ -1,39 +0,0 @@ -.autocomplete { - position: relative; - display: inline-block; - width: 100%; -} - -.autocomplete-items { - position: absolute; - border: 1px solid #d4d4d4; - border-bottom: none; - border-top: none; - z-index: 99; - - /*position the autocomplete items to be the same width as the container:*/ - top: 100%; - left: 0; - right: 0; -} - -.autocomplete-items div { - padding: 10px; - cursor: pointer; - background-color: #fff; - border-bottom: 1px solid #d4d4d4; -} - -.autocomplete-items div:hover { - background-color: #e9e9e9; -} - -.autocomplete-active { - background-color: #685e79 !important; - color: #ffffff; -} - -details summary { - padding: 10px; - font-weight: bold; -} diff --git a/app/static/css/variables.css b/app/static/css/variables.css new file mode 100644 index 0000000..efb6d3a --- /dev/null +++ b/app/static/css/variables.css @@ -0,0 +1,24 @@ +/* Colors */ +:root { + /* LIGHT THEME COLORS */ + --whoogle-background: #fff; + --whoogle-accent: #685e79; + --whoogle-text: #000; + --whoogle-contrast-text: #fff; + --whoogle-secondary-text: #70757a; + --whoogle-result-bg: #fff; + --whoogle-result-title: #1967d2; + --whoogle-result-url: #0d652d; + --whoogle-result-visited: #4b11a8; + + /* DARK THEME COLORS */ + --whoogle-dark-background: #222; + --whoogle-dark-accent: #685e79; + --whoogle-dark-text: #fff; + --whoogle-dark-contrast-text: #000; + --whoogle-dark-secondary-text: #bbb; + --whoogle-dark-result-bg: #000; + --whoogle-dark-result-title: #1967d2; + --whoogle-dark-result-url: #4b11a8; + --whoogle-dark-result-visited: #bbbbff; +} diff --git a/app/static/img/whoogle.svg b/app/static/img/whoogle.svg new file mode 100644 index 0000000..41a783e --- /dev/null +++ b/app/static/img/whoogle.svg @@ -0,0 +1 @@ + diff --git a/app/static/js/controller.js b/app/static/js/controller.js index 3ab8ca7..8399220 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -1,14 +1,3 @@ -// Whoogle configurations that use boolean values and checkboxes -CONFIG_BOOLS = [ - "nojs", "dark", "safe", "alts", "new_tab", "get_only", "tor" -]; - -// Whoogle configurations that use string values and input fields -CONFIG_STRS = [ - "near", "url" -]; - - const setupSearchLayout = () => { // Setup search field const searchBar = document.getElementById("search-bar"); @@ -28,33 +17,6 @@ const setupSearchLayout = () => { }); }; -const fillConfigValues = () => { - // Request existing config info - let xhrGET = new XMLHttpRequest(); - xhrGET.open("GET", "config"); - xhrGET.onload = function() { - if (xhrGET.readyState === 4 && xhrGET.status !== 200) { - alert("Error loading Whoogle config"); - return; - } - - // Allow for updating/saving config values - let configSettings = JSON.parse(xhrGET.responseText); - - CONFIG_STRS.forEach(function(item) { - let configElement = document.getElementById("config-" + item.replace("_", "-")); - configElement.value = configSettings[item] ? configSettings[item] : ""; - }); - - CONFIG_BOOLS.forEach(function(item) { - let configElement = document.getElementById("config-" + item.replace("_", "-")); - configElement.checked = !!configSettings[item]; - }); - }; - - xhrGET.send(); -}; - const setupConfigLayout = () => { // Setup whoogle config const collapsible = document.getElementById("config-collapsible"); @@ -69,8 +31,6 @@ const setupConfigLayout = () => { content.classList.toggle("open"); }); - - fillConfigValues(); }; const loadConfig = event => { diff --git a/app/misc/countries.json b/app/static/settings/countries.json similarity index 100% rename from app/misc/countries.json rename to app/static/settings/countries.json diff --git a/app/misc/languages.json b/app/static/settings/languages.json similarity index 100% rename from app/misc/languages.json rename to app/static/settings/languages.json diff --git a/app/templates/display.html b/app/templates/display.html index 30eba0a..9913f55 100644 --- a/app/templates/display.html +++ b/app/templates/display.html @@ -5,11 +5,10 @@ - + - {% if dark_mode %} - - {% endif %} + + {{ query }} - Whoogle Search @@ -17,7 +16,7 @@ {{ response|safe }}