From a8914e4e78745883a0046e7510f3f51b4279d39e Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sun, 4 Oct 2020 19:39:04 -0400 Subject: [PATCH 1/2] Release v0.2.1 (#116) - Updated dark theme (#121) - Dark theme is no longer the previous high contrast "white on black" color scheme - New configuration settings - Split interface and result language config (#89) - Added option for using privacy respecting result alternatives (#106) - `youtube.com` -> `invidiou.site` - `twitter.com` -> `nitter.net` - `instagram.com` -> `bibliogram.art` - Improved search suggestion arrow key navigation behavior (#115) - Added repl.it deployment (#114) - Improved ad filtering for non-English results (f7380ae15dbfc8f1d3a71d42c330e7827870f7a3) - Split interface and result language config (#89) - New config option: privacy respecting result alternatives (#106) - Updated search suggestion behavior (#115) - Minor project improvements and refactoring: - Added footer to results UI - Updated opensearch template - Various bug fixes, including: - Fixed pipx run command (#118) - Fixed browser autocomplete (#128) - Fixed missing autofocus on search field in Firefox (dfb1e81fa12e2cc9ccc75a193fd4afdcb79d01e9) --- .github/ISSUE_TEMPLATE/feature_request.md | 3 - .replit | 2 + README.md | 26 ++++++-- app/__init__.py | 4 +- app/filter.py | 80 +++++------------------ app/models/config.py | 7 +- app/request.py | 10 ++- app/routes.py | 17 +++-- app/static/css/dark-theme.css | 42 ++++++++++++ app/static/css/main.css | 16 +++-- app/static/js/autocomplete.js | 34 ++++++++-- app/static/js/controller.js | 40 +++++++----- app/templates/display.html | 3 + app/templates/index.html | 33 ++++++++-- app/templates/opensearch.xml | 7 +- app/utils/filter_utils.py | 79 ++++++++++++++++++++++ app/utils/routing_utils.py | 2 +- app/utils/{misc.py => session_utils.py} | 5 -- setup.py | 2 +- test/conftest.py | 2 +- test/test_misc.py | 2 +- test/test_results.py | 4 +- test/test_routes.py | 3 +- 23 files changed, 284 insertions(+), 139 deletions(-) create mode 100644 .replit create mode 100644 app/static/css/dark-theme.css create mode 100644 app/utils/filter_utils.py rename app/utils/{misc.py => session_utils.py} (62%) diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 24bf2f6..9da6d04 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -10,8 +10,5 @@ assignees: '' **Describe the feature you'd like to see added** A short description of the feature, and what it would accomplish. -**Describe which parts of the project this would modify (front end/back end/configuration/etc)** -A short description of which aspects of Whoogle Search would need modification - **Additional context** Add any other context or screenshots about the feature request here. diff --git a/.replit b/.replit new file mode 100644 index 0000000..909eee8 --- /dev/null +++ b/.replit @@ -0,0 +1,2 @@ +language = "python3" +run = "pip install -r requirements.txt && ./run" diff --git a/README.md b/README.md index 0475393..3e9c823 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![Build Status](https://travis-ci.com/benbusby/whoogle-search.svg?branch=master)](https://travis-ci.com/benbusby/whoogle-search) [![codebeat badge](https://codebeat.co/badges/e96cada2-fb6f-4528-8285-7d72abd74e8d)](https://codebeat.co/projects/github-com-benbusby-shoogle-master) [![Docker Pulls](https://img.shields.io/docker/pulls/benbusby/whoogle-search)](https://hub.docker.com/r/benbusby/whoogle-search) +[![Gitter](https://img.shields.io/gitter/room/benbusby/whoogle-search)](https://gitter.im/whoogle-search/community) Get Google search results, but without any ads, javascript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile. @@ -21,7 +22,7 @@ Contents - No ads or sponsored content - No javascript - No cookies -- No tracking/linking of your personal IP address +- No tracking/linking of your personal IP address\* - No AMP links - No URL tracking tags (i.e. utm=%s) - No referrer header @@ -34,6 +35,8 @@ Contents - Optional location-based searching (i.e. results near \) - Optional NoJS mode to disable all Javascript in results +*If deployed to a remote server + ## Dependencies If using Heroku Quick Deploy, **you can skip this section**. @@ -55,19 +58,28 @@ There are a few different ways to begin using the app, depending on your prefere Provides: - Free deployment of app -- Free https url (https://\.herokuapp.com) +- Free HTTPS url (https://\.herokuapp.com) - Downtime after periods of inactivity \([solution](https://github.com/benbusby/whoogle-search#prevent-downtime-heroku-only)\) -### B) [pipx](https://github.com/pipxproject/pipx#install-pipx) +### B) [Repl.it](https://repl.it) +[![Run on Repl.it](https://repl.it/badge/github/benbusby/whoogle-search)](https://repl.it/github/benbusby/whoogle-search) + +Provides: +- Free deployment of app (can be ran without account) +- Free HTTPS url (https://\.\\.repl\.co) + - Supports custom domains +- Downtime after periods of inactivity \([solution 1](https://repl.it/talk/ask/use-this-pingmat1replco-just-enter/28821/101298), [solution 2](https://repl.it/talk/learn/How-to-use-and-setup-UptimeRobot/9003)\) + +### C) [pipx](https://github.com/pipxproject/pipx#install-pipx) Persistent install: `pipx install git+https://github.com/benbusby/whoogle-search.git` Sandboxed temporary instance: -`pipx run git+https://github.com/benbusby/whoogle-search.git whoogle-search` +`pipx run --spec git+https://github.com/benbusby/whoogle-search.git whoogle-search` -### C) pip +### D) pip `pip install whoogle-search` ```bash @@ -85,7 +97,7 @@ optional arguments: --https-only Enforces HTTPS redirects for all requests (default False) ``` -### D) Manual +### E) Manual Clone the repo and run the following commands to start the app in a local-only environment: ```bash @@ -124,7 +136,7 @@ sudo systemctl enable whoogle sudo systemctl start whoogle ``` -### E) Manual (Docker) +### F) Manual (Docker) 1. Ensure the Docker daemon is running, and is accessible by your user account - To add user permissions, you can execute `sudo usermod -aG docker yourusername` - Running `docker ps` should return something besides an error. If you encounter an error saying the daemon isn't running, try `sudo systemctl start docker` (Linux) or ensure the docker tool is running (Windows/macOS). diff --git a/app/__init__.py b/app/__init__.py index 22e436d..8293c44 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,4 +1,4 @@ -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys from flask import Flask from flask_session import Session import os @@ -9,7 +9,7 @@ app.default_key_set = generate_user_keys() app.no_cookie_ips = [] app.config['SECRET_KEY'] = os.urandom(32) app.config['SESSION_TYPE'] = 'filesystem' -app.config['VERSION_NUMBER'] = '0.2.0' +app.config['VERSION_NUMBER'] = '0.2.1' app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'config')) diff --git a/app/filter.py b/app/filter.py index 1cc9f87..e56dc67 100644 --- a/app/filter.py +++ b/app/filter.py @@ -1,56 +1,11 @@ from app.request import VALID_PARAMS -from app.utils.misc import BLACKLIST -from bs4 import BeautifulSoup +from app.utils.filter_utils import * from bs4.element import ResultSet from cryptography.fernet import Fernet import re import urllib.parse as urlparse from urllib.parse import parse_qs -SKIP_ARGS = ['ref_src', 'utm'] -FULL_RES_IMG = '
Full Image' -GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' -LOGO_URL = GOOG_IMG + '_desk' -BLANK_B64 = ''' - -''' - - -def get_first_link(soup): - # Replace hrefs with only the intended destination (no "utm" type tags) - for a in soup.find_all('a', href=True): - # Return the first search result URL - if 'url?q=' in a['href']: - return filter_link_args(a['href']) - - -def filter_link_args(query_link): - parsed_link = urlparse.urlparse(query_link) - link_args = parse_qs(parsed_link.query) - safe_args = {} - - if len(link_args) == 0 and len(parsed_link) > 0: - return query_link - - for arg in link_args.keys(): - if arg in SKIP_ARGS: - continue - - safe_args[arg] = link_args[arg] - - # Remove original link query and replace with filtered args - query_link = query_link.replace(parsed_link.query, '') - if len(safe_args) > 0: - query_link = query_link + urlparse.urlencode(safe_args, doseq=True) - else: - query_link = query_link.replace('?', '') - - return query_link - - -def has_ad_content(element: str): - return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element - class Filter: def __init__(self, user_keys: dict, mobile=False, config=None): @@ -61,6 +16,7 @@ class Filter: self.dark = config['dark'] if 'dark' in config else False self.nojs = config['nojs'] if 'nojs' in config else False self.new_tab = config['new_tab'] if 'new_tab' in config else False + self.alt_redirect = config['alts'] if 'alts' in config else False self.mobile = mobile self.user_keys = user_keys self.main_divs = ResultSet('') @@ -188,18 +144,6 @@ class Filter: except AttributeError: pass - # Set up dark mode if active - if self.dark: - soup.find('html')['style'] = 'scrollbar-color: #333 #111;color:#fff !important;background:#000 !important' - for input_element in soup.findAll('input'): - input_element['style'] = 'color:#fff;background:#000;' - - for span_element in soup.findAll('span'): - span_element['style'] = 'color: white;' - - for href_element in soup.findAll('a'): - href_element['style'] = 'color: white' if href_element['href'].startswith('/search') else '' - def update_link(self, link): # Replace href with only the intended destination (no "utm" type tags) href = link['href'].replace('https://www.google.com', '') @@ -213,8 +157,12 @@ class Filter: query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' if query_link.startswith('/'): + # Internal google links (i.e. mail, maps, etc) should still be forwarded to Google link['href'] = 'https://google.com' + query_link elif '/search?q=' in href: + # "li:1" implies the query should be interpreted verbatim, so we wrap it in double quotes + if 'li:1' in href: + query_link = '"' + query_link + '"' new_search = '/search?q=' + self.encrypt_path(query_link) query_params = parse_qs(urlparse.urlparse(href).query) @@ -232,11 +180,13 @@ class Filter: else: link['href'] = href + # Replace link location if "alts" config is enabled + if self.alt_redirect: + # Search and replace all link descriptions with alternative location + link['href'] = get_site_alt(link['href']) + link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys()))) + if len(link_desc) == 0: + return -def gen_nojs(sibling): - nojs_link = BeautifulSoup().new_tag('a') - nojs_link['href'] = '/window?location=' + sibling['href'] - nojs_link['style'] = 'display:block;width:100%;' - nojs_link.string = 'NoJS Link: ' + nojs_link['href'] - sibling.append(BeautifulSoup('


', 'html.parser')) - sibling.append(nojs_link) + # Replace link destination + link_desc[0].replace_with(get_site_alt(link_desc[0])) diff --git a/app/models/config.py b/app/models/config.py index 544d2d1..2fb4088 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -2,6 +2,7 @@ class Config: # Derived from here: # https://sites.google.com/site/tomihasa/google-language-codes#searchlanguage LANGUAGES = [ + {'name': 'Default (none specified)', 'value': ''}, {'name': 'English', 'value': 'lang_en'}, {'name': 'Afrikaans', 'value': 'lang_af'}, {'name': 'Arabic', 'value': 'lang_ar'}, @@ -51,7 +52,7 @@ class Config: ] COUNTRIES = [ - {'name': 'Default (use server location)', 'value': ''}, + {'name': 'Default (none)', 'value': ''}, {'name': 'Afghanistan', 'value': 'countryAF'}, {'name': 'Albania', 'value': 'countryAL'}, {'name': 'Algeria', 'value': 'countryDZ'}, @@ -298,12 +299,14 @@ class Config: def __init__(self, **kwargs): self.url = '' - self.lang = 'lang_en' + self.lang_search = '' + self.lang_interface = '' self.ctry = '' self.safe = False self.dark = False self.nojs = False self.near = '' + self.alts = False self.new_tab = False self.get_only = False diff --git a/app/request.py b/app/request.py index fe7d3fb..4abb9b3 100644 --- a/app/request.py +++ b/app/request.py @@ -12,7 +12,7 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0' DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' # Valid query params -VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source'] +VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr'] def gen_user_agent(is_mobile): @@ -66,10 +66,14 @@ def gen_query(query, args, config, near_city=None): param_dict['source'] = '&source=' + args.get('source') param_dict['lr'] = ('&lr=' + ''.join([_ for _ in sub_lang if not _.isdigit()])) if sub_lang else '' else: - param_dict['lr'] = '&lr=' + config.lang + param_dict['lr'] = ('&lr=' + config.lang_search) if config.lang_search else '' + + # Set autocorrected search ignore + if 'nfpr' in args: + param_dict['nfpr'] = '&nfpr=' + args.get('nfpr') param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else '' - param_dict['hl'] = '&hl=' + config.lang.replace('lang_', '') + param_dict['hl'] = ('&hl=' + config.lang_interface.replace('lang_', '')) if config.lang_interface else '' param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off') for val in param_dict.values(): diff --git a/app/routes.py b/app/routes.py index ed288c0..56bc6de 100644 --- a/app/routes.py +++ b/app/routes.py @@ -15,7 +15,7 @@ from requests import exceptions from app import app from app.models.config import Config from app.request import Request -from app.utils.misc import valid_user_session +from app.utils.session_utils import valid_user_session from app.utils.routing_utils import * @@ -59,13 +59,13 @@ def before_request_func(): if https_only and request.url.startswith('http://'): return redirect(request.url.replace('http://', 'https://', 1), code=308) - + g.user_config = Config(**session['config']) if not g.user_config.url: g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root - g.user_request = Request(request.headers.get('User-Agent'), language=g.user_config.lang) + g.user_request = Request(request.headers.get('User-Agent'), language=g.user_config.lang_search) g.app_location = g.user_config.url @@ -115,12 +115,11 @@ def opensearch(): if opensearch_url.endswith('/'): opensearch_url = opensearch_url[:-1] - template = render_template('opensearch.xml', - main_url=opensearch_url, - request_type='get' if g.user_config.get_only else 'post') - response = make_response(template) - response.headers['Content-Type'] = 'application/xml' - return response + return render_template( + 'opensearch.xml', + main_url=opensearch_url, + request_type='' if g.user_config.get_only else 'method="post"' + ), 200, {'Content-Disposition': 'attachment; filename="opensearch.xml"'} @app.route('/autocomplete', methods=['GET', 'POST']) diff --git a/app/static/css/dark-theme.css b/app/static/css/dark-theme.css new file mode 100644 index 0000000..36cfada --- /dev/null +++ b/app/static/css/dark-theme.css @@ -0,0 +1,42 @@ +html { + background-color: #000 !important; +} + +body { + background-color: #222 !important; +} + +div { + /*background-color: #111 !important;*/ + color: #fff !important; +} + +a:visited h3 div { + color: #bbbbff !important; +} + +a:link h3 div { + color: #4b8eea !important; +} + +a:link div { + color: #aaffaa !important; +} + +div span { + color: #bbb !important; +} + +input { + background-color: #111 !important; + color: #fff !important; +} + +#search-bar { + color: #fff !important; + background-color: #000 !important; +} + +.search-container { + background-color: #000 !important; +} diff --git a/app/static/css/main.css b/app/static/css/main.css index ef4b557..5b35bf6 100644 --- a/app/static/css/main.css +++ b/app/static/css/main.css @@ -16,6 +16,7 @@ body { left: 50%; transform: translate(-50%, -50%); max-width: 600px; + z-index: 15; } .search-items { @@ -34,10 +35,10 @@ body { color: #685e79; border-radius: 10px 10px 0 0; max-width: 600px; - background: rgba(0,0,0,0); + background: rgba(0, 0, 0, 0); } -#search-bar:focus{ +#search-bar:focus { color: #685e79; } @@ -45,7 +46,7 @@ body { width: 100%; height: 40px; border: 1px solid #685e79; - background: #685e79; + background: #685e79 !important; text-align: center; color: #fff; cursor: pointer; @@ -68,7 +69,7 @@ button::-moz-focus-inner { .collapsible { outline: 0; - background-color: rgba(0,0,0,0); + background-color: rgba(0, 0, 0, 0); color: #685e79; cursor: pointer; padding: 18px; @@ -127,5 +128,10 @@ footer { bottom: 0%; text-align: center; width: 100%; - z-index: -1; + z-index: 10; +} + +.info-text { + font-style: italic; + font-size: 12px; } diff --git a/app/static/js/autocomplete.js b/app/static/js/autocomplete.js index 84e9b23..3d179ca 100644 --- a/app/static/js/autocomplete.js +++ b/app/static/js/autocomplete.js @@ -2,7 +2,7 @@ const handleUserInput = searchBar => { let xhrRequest = new XMLHttpRequest(); xhrRequest.open("POST", "/autocomplete"); xhrRequest.setRequestHeader("Content-type", "application/x-www-form-urlencoded"); - xhrRequest.onload = function() { + xhrRequest.onload = function () { if (xhrRequest.readyState === 4 && xhrRequest.status !== 200) { // Do nothing if failed to fetch autocomplete results return; @@ -18,6 +18,7 @@ const handleUserInput = searchBar => { const autocomplete = (searchInput, autocompleteResults) => { let currentFocus; + let originalSearch; searchInput.addEventListener("input", function () { let autocompleteList, autocompleteItem, i, val = this.value; @@ -53,9 +54,11 @@ const autocomplete = (searchInput, autocompleteResults) => { let suggestion = document.getElementById(this.id + "-autocomplete-list"); if (suggestion) suggestion = suggestion.getElementsByTagName("div"); if (e.keyCode === 40) { // down + e.preventDefault(); currentFocus++; addActive(suggestion); } else if (e.keyCode === 38) { //up + e.preventDefault(); currentFocus--; addActive(suggestion); } else if (e.keyCode === 13) { // enter @@ -63,17 +66,36 @@ const autocomplete = (searchInput, autocompleteResults) => { if (currentFocus > -1) { if (suggestion) suggestion[currentFocus].click(); } + } else { + originalSearch = document.getElementById("search-bar").value; } }); const addActive = suggestion => { - if (!suggestion || !suggestion[currentFocus]) return false; + let searchBar = document.getElementById("search-bar"); + + // Handle navigation outside of suggestion list + if (!suggestion || !suggestion[currentFocus]) { + if (currentFocus >= suggestion.length) { + // Move selection back to the beginning + currentFocus = 0; + } else if (currentFocus < 0) { + // Retrieve original search and remove active suggestion selection + currentFocus = -1; + searchBar.value = originalSearch; + removeActive(suggestion); + return; + } else { + return; + } + } + removeActive(suggestion); - - if (currentFocus >= suggestion.length) currentFocus = 0; - if (currentFocus < 0) currentFocus = (suggestion.length - 1); - suggestion[currentFocus].classList.add("autocomplete-active"); + + // Autofill search bar with suggestion content + searchBar.value = suggestion[currentFocus].textContent; + searchBar.focus(); }; const removeActive = suggestion => { diff --git a/app/static/js/controller.js b/app/static/js/controller.js index 95d917b..156a84d 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -1,3 +1,14 @@ +// Whoogle configurations that use boolean values and checkboxes +CONFIG_BOOLS = [ + "nojs", "dark", "safe", "alts", "new_tab", "get_only" +]; + +// Whoogle configurations that use string values and input fields +CONFIG_STRS = [ + "near", "url" +]; + + const setupSearchLayout = () => { // Setup search field const searchBar = document.getElementById("search-bar"); @@ -18,15 +29,6 @@ const setupSearchLayout = () => { }; const fillConfigValues = () => { - // Establish all config value elements - const near = document.getElementById("config-near"); - const noJS = document.getElementById("config-nojs"); - const dark = document.getElementById("config-dark"); - const safe = document.getElementById("config-safe"); - const url = document.getElementById("config-url"); - const newTab = document.getElementById("config-new-tab"); - const getOnly = document.getElementById("config-get-only"); - // Request existing config info let xhrGET = new XMLHttpRequest(); xhrGET.open("GET", "/config"); @@ -39,15 +41,15 @@ const fillConfigValues = () => { // Allow for updating/saving config values let configSettings = JSON.parse(xhrGET.responseText); - near.value = configSettings["near"] ? configSettings["near"] : ""; - noJS.checked = !!configSettings["nojs"]; - dark.checked = !!configSettings["dark"]; - safe.checked = !!configSettings["safe"]; - getOnly.checked = !!configSettings["get_only"]; - newTab.checked = !!configSettings["new_tab"]; + CONFIG_STRS.forEach(function(item) { + let configElement = document.getElementById("config-" + item.replace("_", "-")); + configElement.value = configSettings[item] ? configSettings[item] : ""; + }); - // Addresses the issue of incorrect URL being used behind reverse proxy - url.value = configSettings["url"] ? configSettings["url"] : ""; + CONFIG_BOOLS.forEach(function(item) { + let configElement = document.getElementById("config-" + item.replace("_", "-")); + configElement.checked = !!configSettings[item]; + }); }; xhrGET.send(); @@ -113,4 +115,8 @@ document.addEventListener("DOMContentLoaded", function() { setupSearchLayout(); setupConfigLayout(); + + // Focusing on the search input field requires a delay for elements to finish + // loading (seemingly only on FF) + setTimeout(function() { document.getElementById("search-bar").focus(); }, 250); }); diff --git a/app/templates/display.html b/app/templates/display.html index bd18838..6a8a609 100644 --- a/app/templates/display.html +++ b/app/templates/display.html @@ -8,6 +8,9 @@ + {% if dark_mode %} + + {% endif %} {{ query }} - Whoogle Search diff --git a/app/templates/index.html b/app/templates/index.html index 23e3fbd..4980316 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -23,6 +23,9 @@ + {% if config.dark %} + + {% endif %} Whoogle Search @@ -31,7 +34,7 @@
- +
@@ -42,7 +45,7 @@
- + +
— Note: If enabled, a website will only appear in the results if it is *hosted* in the selected country.
- - {% for lang in languages %} + {% endfor %} + +
+
+ +
+
+ + +
— Replaces Twitter/YouTube/Instagram links + with Nitter/Invidious/Bibliogram links.
+
diff --git a/app/templates/opensearch.xml b/app/templates/opensearch.xml index b737be7..8e2e7b2 100644 --- a/app/templates/opensearch.xml +++ b/app/templates/opensearch.xml @@ -1,13 +1,14 @@ + Whoogle Whoogle: A lightweight, deployable Google search proxy for desktop/mobile that removes Javascript, AMP links, and ads UTF-8 - /static/img/favicon/favicon-32x32.png - +  + - + {{ main_url }}/search diff --git a/app/utils/filter_utils.py b/app/utils/filter_utils.py new file mode 100644 index 0000000..7f9e9a5 --- /dev/null +++ b/app/utils/filter_utils.py @@ -0,0 +1,79 @@ +from bs4 import BeautifulSoup +import urllib.parse as urlparse +from urllib.parse import parse_qs + +SKIP_ARGS = ['ref_src', 'utm'] +FULL_RES_IMG = '
Full Image' +GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' +LOGO_URL = GOOG_IMG + '_desk' +BLANK_B64 = ''' + +''' + +BLACKLIST = [ + 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고', + 'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', + 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés' +] + +SITE_ALTS = { + 'twitter.com': 'nitter.net', + 'youtube.com': 'invidiou.site', + 'instagram.com': 'bibliogram.art/u' +} + + +def has_ad_content(element: str): + return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element + + +def get_first_link(soup): + # Replace hrefs with only the intended destination (no "utm" type tags) + for a in soup.find_all('a', href=True): + # Return the first search result URL + if 'url?q=' in a['href']: + return filter_link_args(a['href']) + + +def get_site_alt(link: str): + for site_key in SITE_ALTS.keys(): + if site_key not in link: + continue + + link = link.replace(site_key, SITE_ALTS[site_key]) + break + + return link + + +def filter_link_args(query_link): + parsed_link = urlparse.urlparse(query_link) + link_args = parse_qs(parsed_link.query) + safe_args = {} + + if len(link_args) == 0 and len(parsed_link) > 0: + return query_link + + for arg in link_args.keys(): + if arg in SKIP_ARGS: + continue + + safe_args[arg] = link_args[arg] + + # Remove original link query and replace with filtered args + query_link = query_link.replace(parsed_link.query, '') + if len(safe_args) > 0: + query_link = query_link + urlparse.urlencode(safe_args, doseq=True) + else: + query_link = query_link.replace('?', '') + + return query_link + + +def gen_nojs(sibling): + nojs_link = BeautifulSoup().new_tag('a') + nojs_link['href'] = '/window?location=' + sibling['href'] + nojs_link['style'] = 'display:block;width:100%;' + nojs_link.string = 'NoJS Link: ' + nojs_link['href'] + sibling.append(BeautifulSoup('


', 'html.parser')) + sibling.append(nojs_link) \ No newline at end of file diff --git a/app/utils/routing_utils.py b/app/utils/routing_utils.py index 40f8a90..2a649b4 100644 --- a/app/utils/routing_utils.py +++ b/app/utils/routing_utils.py @@ -1,5 +1,5 @@ from app.filter import Filter, get_first_link -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys from app.request import gen_query from bs4 import BeautifulSoup from cryptography.fernet import Fernet, InvalidToken diff --git a/app/utils/misc.py b/app/utils/session_utils.py similarity index 62% rename from app/utils/misc.py rename to app/utils/session_utils.py index b87941d..f959abe 100644 --- a/app/utils/misc.py +++ b/app/utils/session_utils.py @@ -2,11 +2,6 @@ from cryptography.fernet import Fernet from flask import current_app as app REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys'] -BLACKLIST = [ - 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고', - 'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', - 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés' -] def generate_user_keys(cookies_disabled=False) -> dict: diff --git a/setup.py b/setup.py index 08652bc..b2cddd1 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setuptools.setup( author='Ben Busby', author_email='benbusby@protonmail.com', name='whoogle-search', - version='0.2.0', + version='0.2.1', include_package_data=True, install_requires=requirements, description='Self-hosted, ad-free, privacy-respecting Google metasearch engine', diff --git a/test/conftest.py b/test/conftest.py index 63aec3e..7a15f00 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,5 +1,5 @@ from app import app -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys import pytest diff --git a/test/test_misc.py b/test/test_misc.py index 8eb1d78..92fcadb 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -1,4 +1,4 @@ -from app.utils.misc import generate_user_keys, valid_user_session +from app.utils.session_utils import generate_user_keys, valid_user_session def test_generate_user_keys(): diff --git a/test/test_results.py b/test/test_results.py index a943de6..a7aa771 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -1,6 +1,6 @@ from bs4 import BeautifulSoup from app.filter import Filter -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys from datetime import datetime from dateutil.parser import * @@ -55,7 +55,7 @@ def test_recent_results(client): result_divs = get_search_results(rv.data) current_date = datetime.now() - for div in result_divs: + for div in [_ for _ in result_divs if _.find('span')]: date_span = div.find('span').decode_contents() if not date_span or len(date_span) > 15 or len(date_span) < 7: continue diff --git a/test/test_routes.py b/test/test_routes.py index 56c9909..3d08f0a 100644 --- a/test/test_routes.py +++ b/test/test_routes.py @@ -6,7 +6,8 @@ demo_config = { 'near': random.choice(['Seattle', 'New York', 'San Francisco']), 'dark_mode': str(random.getrandbits(1)), 'nojs': str(random.getrandbits(1)), - 'lang': random.choice(Config.LANGUAGES)['value'], + 'lang_interface': random.choice(Config.LANGUAGES)['value'], + 'lang_search': random.choice(Config.LANGUAGES)['value'], 'ctry': random.choice(Config.COUNTRIES)['value'] } From 776d618dad4ce74870fe93b4186574ae169409ba Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Sun, 25 Oct 2020 13:52:30 -0400 Subject: [PATCH 2/2] Fix improper header styling, remove shopping tab links The header template was using Google's classes for the "Whoogle" logo, which meant keeping up with their list of colors used in the logo. The template was updated to only ever use the Whoogle logo color. Accordingly, the logo specific styling in filter.py was removed, since it is no longer needed. Also removes all links to the shopping tab, as it seems that the majority of the links to items are Google specific links (usually google.com/aclk links without any discernible param for determining the true location for the link). The shopping page should be addressed separately with unique filtering/formatting. Further tracking of this task will be followed in #136. (cherry picked from commit f3bb1e22b476e794b3dc320c37a18adfd724dfef) --- app/filter.py | 8 ++++---- app/templates/header.html | 8 ++------ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/app/filter.py b/app/filter.py index e56dc67..71ac763 100644 --- a/app/filter.py +++ b/app/filter.py @@ -31,9 +31,6 @@ class Filter: def reskin(self, page): # Aesthetic only re-skinning - page = page.replace('>G<', '>Wh<') - pattern = re.compile('4285f4|ea4335|fbcc05|34a853|fbbc05', re.IGNORECASE) - page = pattern.sub('685e79', page) if self.dark: page = page.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea') @@ -56,6 +53,7 @@ class Filter: self.fix_question_section() self.update_styling(soup) + for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]: self.update_element_src(img, 'image/png') @@ -147,7 +145,9 @@ class Filter: def update_link(self, link): # Replace href with only the intended destination (no "utm" type tags) href = link['href'].replace('https://www.google.com', '') - if '/advanced_search' in href: + if '/advanced_search' in href or 'tbm=shop' in href: + # TODO: The "Shopping" tab requires further filtering (see #136) + # Temporarily removing all links to that tab for now. link.decompose() return elif self.new_tab: diff --git a/app/templates/header.html b/app/templates/header.html index 5573b99..113cfa4 100644 --- a/app/templates/header.html +++ b/app/templates/header.html @@ -5,9 +5,7 @@
@@ -27,9 +25,7 @@