Block websites from search results via user config (#304)

* Block websites in search results via user config

Adds a new config field "Block" to specify a comma separated list of
websites to block in search results. This is applied for all searches.

* Add test for blocking sites from search results

* Document WHOOGLE_CONFIG_BLOCK usage

* Strip '-site:' filters from query in header template

The 'behind the scenes' site filter applied for blocked sites was
appearing in the query field when navigating between search categories
(all -> images -> news, etc). This prevents the filter from appearing in
all except "images", since the image category uses a separate header.
This should eventually be addressed when the image page can begin using
the standard whoogle header, but until then, the filter will still
appear for image searches.
This commit is contained in:
Ben Busby 2021-05-07 11:45:53 -04:00
parent f5fbd14a14
commit 3aeb53fe93
No known key found for this signature in database
GPG Key ID: 3B08611DF6E62ED2
10 changed files with 176 additions and 115 deletions

View File

@ -263,6 +263,7 @@ These environment variables allow setting default config values, but can be over
| WHOOGLE_CONFIG_COUNTRY | Filter results by hosting country |
| WHOOGLE_CONFIG_LANGUAGE | Set interface language |
| WHOOGLE_CONFIG_SEARCH_LANGUAGE | Set search result language |
| WHOOGLE_CONFIG_BLOCK | Block websites from search results (use comma-separated list) |
| WHOOGLE_CONFIG_DARK | Enable dark theme |
| WHOOGLE_CONFIG_SAFE | Enable safe searches |
| WHOOGLE_CONFIG_ALTS | Use social media site alternatives (nitter, invidious, etc) |

View File

@ -80,6 +80,16 @@
"value": "",
"required": false
},
"WHOOGLE_CONFIG_DISABLE": {
"description": "[CONFIG] Disable ability for client to change config (set to 1 or leave blank)",
"value": "",
"required": false
},
"WHOOGLE_CONFIG_BLOCK": {
"description": "[CONFIG] Block websites from search results (comma-separated list)",
"value": "",
"required": false
},
"WHOOGLE_CONFIG_DARK": {
"description": "[CONFIG] Enable dark mode (set to 1 or leave blank)",
"value": "",

View File

@ -18,6 +18,7 @@ class Config:
'WHOOGLE_CONFIG_STYLE',
open(os.path.join(app_config['STATIC_FOLDER'],
'css/variables.css')).read())
self.block = os.getenv('WHOOGLE_CONFIG_BLOCK', '')
self.ctry = os.getenv('WHOOGLE_CONFIG_COUNTRY', '')
self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE')
self.dark = read_config_bool('WHOOGLE_CONFIG_DARK')

View File

@ -120,6 +120,10 @@ def gen_query(query, args, config, near_city=None) -> str:
) if config.lang_interface else ''
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
# Block all sites specified in the user config
for blocked in config.block.split(','):
query += (' -site:' + blocked) if blocked else ''
for val in param_dict.values():
if not val:
continue

View File

@ -2,7 +2,6 @@ import argparse
import base64
import io
import json
import os
import pickle
import urllib.parse as urlparse
import uuid
@ -17,7 +16,7 @@ from app import app
from app.models.config import Config
from app.request import Request, TorError
from app.utils.bangs import resolve_bang
from app.utils.session import valid_user_session
from app.utils.session import generate_user_key, valid_user_session
from app.utils.search import *
# Load DDG bang json files only on init

View File

@ -22,7 +22,7 @@
style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important;
color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }};
type="text"
value="{{ query }}">
value="{{ query[:query.find('-site:')] }}">
<input style="color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }}" id="search-reset" type="reset" value="x">
<input name="tbm" value="{{ search_type }}" style="display: none">
<input type="submit" style="display: none;">
@ -54,7 +54,7 @@
name="q"
spellcheck="false"
type="text"
value="{{ query }}"
value="{{ query[:query.find('-site:')] }}"
style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important;
color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }};
border-bottom: {{ '2px solid var(--whoogle-dark-element-bg)' if config.dark else '0px' }};">

View File

@ -56,117 +56,123 @@
<input type="submit" id="search-submit" value="Search">
</div>
</form>
<br/>
<button id="config-collapsible" class="collapsible">Configuration</button>
<div class="content">
<div class="config-fields">
<form id="config-form" action="config" method="post">
<div class="config-div config-div-ctry">
<label for="config-ctry">Filter Results by Country: </label>
<select name="ctry" id="config-ctry">
{% for ctry in countries %}
<option value="{{ ctry.value }}"
{% if ctry.value in config.ctry %}
selected
{% endif %}>
{{ ctry.name }}
</option>
{% endfor %}
</select>
<div><span class="info-text"> — Note: If enabled, a website will only appear in the results if it is *hosted* in the selected country.</span></div>
</div>
<div class="config-div config-div-lang">
<label for="config-lang-interface">Interface Language: </label>
<select name="lang_interface" id="config-lang-interface">
{% for lang in languages %}
<option value="{{ lang.value }}"
{% if lang.value in config.lang_interface %}
selected
{% endif %}>
{{ lang.name }}
</option>
{% endfor %}
</select>
</div>
<div class="config-div config-div-search-lang">
<label for="config-lang-search">Search Language: </label>
<select name="lang_search" id="config-lang-search">
{% for lang in languages %}
<option value="{{ lang.value }}"
{% if lang.value in config.lang_search %}
selected
{% endif %}>
{{ lang.name }}
</option>
{% endfor %}
</select>
</div>
<div class="config-div config-div-near">
<label for="config-near">Near: </label>
<input type="text" name="near" id="config-near" placeholder="City Name" value="{{ config.near }}">
</div>
<div class="config-div config-div-nojs">
<label for="config-nojs">Show NoJS Links: </label>
<input type="checkbox" name="nojs" id="config-nojs" {{ 'checked' if config.nojs else '' }}>
</div>
<div class="config-div config-div-dark">
<label for="config-dark">Dark Mode: </label>
<input type="checkbox" name="dark" id="config-dark" {{ 'checked' if config.dark else '' }}>
</div>
<div class="config-div config-div-safe">
<label for="config-safe">Safe Search: </label>
<input type="checkbox" name="safe" id="config-safe" {{ 'checked' if config.safe else '' }}>
</div>
<div class="config-div config-div-alts">
<label class="tooltip" for="config-alts">Replace Social Media Links: </label>
<input type="checkbox" name="alts" id="config-alts" {{ 'checked' if config.alts else '' }}>
<div><span class="info-text"> — Replaces Twitter/YouTube/Instagram/Reddit links
with Nitter/Invidious/Bibliogram/Libreddit links.</span></div>
</div>
<div class="config-div config-div-new-tab">
<label for="config-new-tab">Open Links in New Tab: </label>
<input type="checkbox" name="new_tab" id="config-new-tab" {{ 'checked' if config.new_tab else '' }}>
</div>
<div class="config-div config-div-view-image">
<label for="config-view-image">View image: </label>
<input type="checkbox" name="view_image" id="config-view-image" {{ 'checked' if config.view_image else '' }}>
<div><span class="info-text"> — Adds the "View Image" option to image search results.
Note: This will cause image result thumbnails to be lower resolution.</span></div>
</div>
<div class="config-div config-div-tor">
<label for="config-tor">Use Tor: {{ '' if tor_available else 'Unavailable' }}</label>
<input type="checkbox" name="tor" id="config-tor" {{ '' if tor_available else 'hidden' }} {{ 'checked' if config.tor else '' }}>
</div>
<div class="config-div config-div-get-only">
<label for="config-get-only">GET Requests Only: </label>
<input type="checkbox" name="get_only" id="config-get-only" {{ 'checked' if config.get_only else '' }}>
</div>
<div class="config-div config-div-root-url">
<label for="config-url">Root URL: </label>
<input type="text" name="url" id="config-url" value="{{ config.url }}">
</div>
<div class="config-div config-div-custom-css">
<label for="config-style">Custom CSS:</label>
<textarea
name="style"
id="config-style"
autocapitalize="off"
autocomplete="off"
spellcheck="false"
autocorrect="off"
value="">
{{ config.style }}
</textarea>
</div>
<div class="config-div">
<input type="submit" id="config-load" value="Load">&nbsp;
<input type="submit" id="config-submit" value="Apply">&nbsp;
<input type="submit" id="config-save" value="Save As...">
</div>
</form>
{% if not config_disabled %}
<br/>
<button id="config-collapsible" class="collapsible">Configuration</button>
<div class="content">
<div class="config-fields">
<form id="config-form" action="config" method="post">
<div class="config-div config-div-ctry">
<label for="config-ctry">Filter Results by Country: </label>
<select name="ctry" id="config-ctry">
{% for ctry in countries %}
<option value="{{ ctry.value }}"
{% if ctry.value in config.ctry %}
selected
{% endif %}>
{{ ctry.name }}
</option>
{% endfor %}
</select>
<div><span class="info-text"> — Note: If enabled, a website will only appear in the results if it is *hosted* in the selected country.</span></div>
</div>
<div class="config-div config-div-lang">
<label for="config-lang-interface">Interface Language: </label>
<select name="lang_interface" id="config-lang-interface">
{% for lang in languages %}
<option value="{{ lang.value }}"
{% if lang.value in config.lang_interface %}
selected
{% endif %}>
{{ lang.name }}
</option>
{% endfor %}
</select>
</div>
<div class="config-div config-div-search-lang">
<label for="config-lang-search">Search Language: </label>
<select name="lang_search" id="config-lang-search">
{% for lang in languages %}
<option value="{{ lang.value }}"
{% if lang.value in config.lang_search %}
selected
{% endif %}>
{{ lang.name }}
</option>
{% endfor %}
</select>
</div>
<div class="config-div config-div-near">
<label for="config-near">Near: </label>
<input type="text" name="near" id="config-near" placeholder="City Name" value="{{ config.near }}">
</div>
<div class="config-div config-div-block">
<label for="config-block">Block: </label>
<input type="text" name="block" id="config-block" placeholder="Comma-separated site list" value="{{ config.block }}">
</div>
</div>
</div>
<div class="config-div config-div-nojs">
<label for="config-nojs">Show NoJS Links: </label>
<input type="checkbox" name="nojs" id="config-nojs" {{ 'checked' if config.nojs else '' }}>
</div>
<div class="config-div config-div-dark">
<label for="config-dark">Dark Mode: </label>
<input type="checkbox" name="dark" id="config-dark" {{ 'checked' if config.dark else '' }}>
</div>
<div class="config-div config-div-safe">
<label for="config-safe">Safe Search: </label>
<input type="checkbox" name="safe" id="config-safe" {{ 'checked' if config.safe else '' }}>
</div>
<div class="config-div config-div-alts">
<label class="tooltip" for="config-alts">Replace Social Media Links: </label>
<input type="checkbox" name="alts" id="config-alts" {{ 'checked' if config.alts else '' }}>
<div><span class="info-text"> — Replaces Twitter/YouTube/Instagram/Reddit links
with Nitter/Invidious/Bibliogram/Libreddit links.</span></div>
</div>
<div class="config-div config-div-new-tab">
<label for="config-new-tab">Open Links in New Tab: </label>
<input type="checkbox" name="new_tab" id="config-new-tab" {{ 'checked' if config.new_tab else '' }}>
</div>
<div class="config-div config-div-view-image">
<label for="config-view-image">View Image: </label>
<input type="checkbox" name="view_image" id="config-view-image" {{ 'checked' if config.view_image else '' }}>
<div><span class="info-text"> — Adds the "View Image" option to image search results.
Note: This will cause image result thumbnails to be lower resolution.</span></div>
</div>
<div class="config-div config-div-tor">
<label for="config-tor">Use Tor: {{ '' if tor_available else 'Unavailable' }}</label>
<input type="checkbox" name="tor" id="config-tor" {{ '' if tor_available else 'hidden' }} {{ 'checked' if config.tor else '' }}>
</div>
<div class="config-div config-div-get-only">
<label for="config-get-only">GET Requests Only: </label>
<input type="checkbox" name="get_only" id="config-get-only" {{ 'checked' if config.get_only else '' }}>
</div>
<div class="config-div config-div-root-url">
<label for="config-url">Root URL: </label>
<input type="text" name="url" id="config-url" value="{{ config.url }}">
</div>
<div class="config-div config-div-custom-css">
<label for="config-style">Custom CSS:</label>
<textarea
name="style"
id="config-style"
autocapitalize="off"
autocomplete="off"
spellcheck="false"
autocorrect="off"
value="">
{{ config.style }}
</textarea>
</div>
<div class="config-div">
<input type="submit" id="config-load" value="Load">&nbsp;
<input type="submit" id="config-submit" value="Apply">&nbsp;
<input type="submit" id="config-save" value="Save As...">
</div>
</form>
</div>
</div>
{% endif %}
</div>
<footer>
<p style="color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }};">
Whoogle Search v{{ version_number }} ||

View File

@ -1,5 +1,4 @@
from app.filter import Filter, get_first_link
from app.utils.session import generate_user_key
from app.request import gen_query
from bs4 import BeautifulSoup as bsoup
from cryptography.fernet import Fernet, InvalidToken

View File

@ -3,6 +3,9 @@ from app.filter import Filter
from app.utils.session import generate_user_key
from datetime import datetime
from dateutil.parser import *
from urllib.parse import urlparse
from test.conftest import demo_config
def get_search_results(data):
@ -46,6 +49,29 @@ def test_post_results(client):
assert len(get_search_results(rv.data)) <= 15
def test_block_results(client):
rv = client.post('/search', data=dict(q='pinterest'))
assert rv._status_code == 200
has_pinterest = False
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
if 'pinterest.com' in urlparse(link['href']).netloc:
has_pinterest = True
break
assert has_pinterest
demo_config['block'] = 'pinterest.com'
rv = client.post('/config', data=demo_config)
assert rv._status_code == 302
rv = client.post('/search', data=dict(q='pinterest'))
assert rv._status_code == 200
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
assert 'pinterest.com' not in urlparse(link['href']).netloc
# TODO: Unit test the site alt method instead -- the results returned
# are too unreliable for this test in particular.
# def test_site_alts(client):

View File

@ -18,27 +18,42 @@
# See app/static/settings/countries.json for values
#WHOOGLE_CONFIG_COUNTRY=countryUK
# See app/static/settings/languages.json for values
#WHOOGLE_CONFIG_LANGUAGE=lang_en
# See app/static/settings/languages.json for values
#WHOOGLE_CONFIG_SEARCH_LANGUAGE=lang_en
# Disable changing of config from client
#WHOOGLE_CONFIG_DISABLE=1
# Block websites from search results (comma-separated list)
#WHOOGLE_CONFIG_BLOCK=pinterest.com,whitehouse.gov
# Dark mode
#WHOOGLE_CONFIG_DARK=1
# Safe search mode
#WHOOGLE_CONFIG_SAFE=1
# Use social media site alternatives (nitter, bibliogram, etc)
#WHOOGLE_CONFIG_ALTS=1
# Enable "View Image" option
#WHOOGLE_CONFIG_VIEW_IMAGE=1
# Use Tor if available
#WHOOGLE_CONFIG_TOR=1
# Open results in new tab
#WHOOGLE_CONFIG_NEW_TAB=1
# Search using GET requests only (exposes query in logs)
#WHOOGLE_CONFIG_GET_ONLY=1
# Set instance URL
#WHOOGLE_CONFIG_URL=https://<whoogle url>/
# Set custom CSS styling/theming
#WHOOGLE_CONFIG_STYLE=":root { /* LIGHT THEME COLORS */ --whoogle-background: #d8dee9; --whoogle-accent: #2e3440; --whoogle-text: #3B4252; --whoogle-contrast-text: #eceff4; --whoogle-secondary-text: #70757a; --whoogle-result-bg: #fff; --whoogle-result-title: #4c566a; --whoogle-result-url: #81a1c1; --whoogle-result-visited: #a3be8c; /* DARK THEME COLORS */ --whoogle-dark-background: #222; --whoogle-dark-accent: #685e79; --whoogle-dark-text: #fff; --whoogle-dark-contrast-text: #000; --whoogle-dark-secondary-text: #bbb; --whoogle-dark-result-bg: #000; --whoogle-dark-result-title: #1967d2; --whoogle-dark-result-url: #4b11a8; --whoogle-dark-result-visited: #bbbbff; }"