Merge remote-tracking branch 'origin/master' into heroku-app

This commit is contained in:
Ben Busby 2020-05-22 16:16:28 -06:00
commit 18c194d500
13 changed files with 149 additions and 48 deletions

10
.github/ISSUE_TEMPLATE/question.md vendored Normal file
View File

@ -0,0 +1,10 @@
---
name: Question
about: Ask a (simple) question about Whoogle
title: "[QUESTION] <question here>"
labels: question
assignees: ''
---
Type out your question here. Please make sure that this is a topic that isn't already covered in the README.

View File

@ -5,4 +5,11 @@ before_install:
install: install:
- pip install -r requirements.txt - pip install -r requirements.txt
script: script:
- ./run test - "./run test"
deploy:
provider: pypi
user: __token__
password:
secure: WNEH2Gg84MZF/AZEberFDGPPWb4cYyHAeD/XV8En94QRSI9Aznz6qiDKOvV4eVgjMAIEW5uB3TL1LHf6KU+Hrg6SmhF7JquqP1gsBOCDNFPTljO+k2Hc53uDdSnhi/HLgY7cnFNX4lc2nNrbyxZxMHuSA2oNz/tosyNGBEeyU+JA5va7uX0albGsLiNjimO4aeau83fsI0Hn2eN6ag68pewUMXNxzpyTeO2bRcCd5d5iILs07jMVwFoC2j7W11oNqrVuSWAs8CPe4+kwvNvXWxljUGiBGppNZ7RAsKNLwi6U6kGGUTWjQm09rY/2JBpJ2WEGmIWGIrno75iiFRbjnRp3mnXPvtVTyWhh+hQIUd7bJOVKM34i9eHotYTrkMJObgW1gnRzvI9VYldtgL/iP/Isn2Pv2EeMX8V+C9/8pxv0jkQkZMnFhE6gGlzpz37zTl04B2J7xyV5znM35Lx2Pn3zxdcmdCvD3yT8I4MuBbKqq2/v4emYCfPfOmfwnS0BEVSqr9lbx4xfUZV76tcvLcj4n86DJbx77pA2Ch8FRprpOOBcf0WuqTbZp8c3mb8prFp2EupUknXu7+C2VQ6sqrnzNuDeTGm/nyjjRQ81rlvlD4tqkwsEGEDDO44FF2eUTc5D2MvoHs4cnz095FWjy63gn5IxUjhMi31b5tGRz2Q=
on:
tags: true

View File

@ -11,6 +11,12 @@ VOLUME $config_dir
ENV CONFIG_VOLUME=$config_dir ENV CONFIG_VOLUME=$config_dir
ARG use_https=1 ARG use_https=1
ARG username=''
ENV WHOOGLE_USER=$username
ARG password=''
ENV WHOOGLE_PASS=$password
ENV HTTPS_ONLY=$use_https ENV HTTPS_ONLY=$use_https
ARG whoogle_port=5000 ARG whoogle_port=5000

View File

@ -1,3 +1,4 @@
graft app/static graft app/static
graft app/templates graft app/templates
include requirements.txt
global-exclude *.pyc global-exclude *.pyc

View File

@ -4,7 +4,7 @@
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Build Status](https://travis-ci.com/benbusby/whoogle-search.svg?branch=master)](https://travis-ci.com/benbusby/whoogle-search) [![Build Status](https://travis-ci.com/benbusby/whoogle-search.svg?branch=master)](https://travis-ci.com/benbusby/whoogle-search)
[![codebeat badge](https://codebeat.co/badges/e96cada2-fb6f-4528-8285-7d72abd74e8d)](https://codebeat.co/projects/github-com-benbusby-shoogle-master) [![codebeat badge](https://codebeat.co/badges/e96cada2-fb6f-4528-8285-7d72abd74e8d)](https://codebeat.co/projects/github-com-benbusby-shoogle-master)
![Docker Pulls](https://img.shields.io/docker/pulls/benbusby/whoogle-search) [![Docker Pulls](https://img.shields.io/docker/pulls/benbusby/whoogle-search)](https://hub.docker.com/r/benbusby/whoogle-search)
Get Google search results, but without any ads, javascript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile. Get Google search results, but without any ads, javascript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile.
@ -185,6 +185,8 @@ To filter by a range of time, append ":past <time>" to the end of your search, w
## Extra Steps ## Extra Steps
### Set Whoogle as your primary search engine ### Set Whoogle as your primary search engine
*Note: If you're using a reverse proxy to run Whoogle Search, make sure the "Root URL" config option on the home page is set to your URL before going through these steps.*
Update browser settings: Update browser settings:
- Firefox (Desktop) - Firefox (Desktop)
- Navigate to your app's url, and click the 3 dot menu in the address bar. At the bottom, there should be an option to "Add Search Engine". Once you've clicked this, open your Firefox Preferences menu, click "Search" in the left menu, and use the available dropdown to select "Whoogle" from the list. - Navigate to your app's url, and click the 3 dot menu in the address bar. At the bottom, there should be an option to "Add Search Engine". Once you've clicked this, open your Firefox Preferences menu, click "Search" in the left menu, and use the available dropdown to select "Whoogle" from the list.
@ -236,6 +238,7 @@ Only needed if your setup requires Flask to redirect to HTTPS on its own -- gene
- Default `run` script: Modify the script locally to include the `--https-only` flag at the end of the python run command - Default `run` script: Modify the script locally to include the `--https-only` flag at the end of the python run command
Available config values are `near`, `nojs`, `dark` and `url`. Available config values are `near`, `nojs`, `dark` and `url`.
## FAQ ## FAQ
**What's the difference between this and [Searx](https://github.com/asciimoo/searx)?** **What's the difference between this and [Searx](https://github.com/asciimoo/searx)?**

View File

@ -4,5 +4,9 @@ import os
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static') app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
app.secret_key = Fernet.generate_key() app.secret_key = Fernet.generate_key()
app.config['VERSION_NUMBER'] = '0.1.4'
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json'
from app import routes from app import routes

View File

@ -14,6 +14,43 @@ data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42m
''' '''
def get_first_link(soup):
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
href = a['href'].replace('https://www.google.com', '')
result_link = urlparse.urlparse(href)
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
# Return the first search result URL
if 'url?q=' in href:
return filter_link_args(href)
def filter_link_args(query_link):
parsed_link = urlparse.urlparse(query_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
if len(link_args) == 0 and len(parsed_link) > 0:
return query_link
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
safe_args[arg] = link_args[arg]
# Remove original link query and replace with filtered args
query_link = query_link.replace(parsed_link.query, '')
if len(safe_args) > 0:
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
else:
query_link = query_link.replace('?', '')
return query_link
class Filter: class Filter:
def __init__(self, mobile=False, config=None, secret_key=''): def __init__(self, mobile=False, config=None, secret_key=''):
if config is None: if config is None:
@ -75,14 +112,13 @@ class Filter:
img_src = img['src'] img_src = img['src']
if img_src.startswith('//'): if img_src.startswith('//'):
img_src = 'https:' + img_src img_src = 'https:' + img_src
elif img_src.startswith(LOGO_URL):
# Re-brand with Whoogle logo
img['src'] = '/static/img/logo.png'
img['style'] = 'height:40px;width:162px'
continue
elif img_src.startswith(GOOG_IMG): elif img_src.startswith(GOOG_IMG):
# Special rebranding for image search results img['src'] = BLANK_B64
if img_src.startswith(LOGO_URL):
img['src'] = '/static/img/logo.png'
img['style'] = 'height:40px;width:162px'
else:
img['src'] = BLANK_B64
continue continue
enc_src = Fernet(self.secret_key).encrypt(img_src.encode()) enc_src = Fernet(self.secret_key).encrypt(img_src.encode())
@ -149,32 +185,11 @@ class Filter:
a['href'] = new_search a['href'] = new_search
elif 'url?q=' in href: elif 'url?q=' in href:
# Strip unneeded arguments # Strip unneeded arguments
parsed_link = urlparse.urlparse(query_link) a['href'] = filter_link_args(query_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
if len(link_args) == 0 and len(parsed_link) > 0:
a['href'] = query_link
continue
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
safe_args[arg] = link_args[arg]
# Remove original link query and replace with filtered args
query_link = query_link.replace(parsed_link.query, '')
if len(safe_args) > 0:
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
else:
query_link = query_link.replace('?', '')
a['href'] = query_link
# Add no-js option # Add no-js option
if self.nojs: if self.nojs:
gen_nojs(soup, query_link, a) gen_nojs(soup, a['href'], a)
else: else:
a['href'] = href a['href'] = href
@ -185,4 +200,4 @@ def gen_nojs(soup, link, sibling):
nojs_link['style'] = 'display:block;width:100%;' nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href'] nojs_link.string = 'NoJS Link: ' + nojs_link['href']
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser')) sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
sibling.append(nojs_link) sibling.append(nojs_link)

View File

@ -1,34 +1,47 @@
from app import app from app import app
from app.filter import Filter from app.filter import Filter, get_first_link
from app.models.config import Config from app.models.config import Config
from app.request import Request, gen_query from app.request import Request, gen_query
import argparse import argparse
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from cryptography.fernet import Fernet, InvalidToken from cryptography.fernet import Fernet, InvalidToken
from flask import g, make_response, request, redirect, render_template, send_file from flask import g, make_response, request, redirect, render_template, send_file
from functools import wraps
import io import io
import json import json
import os import os
import urllib.parse as urlparse import urllib.parse as urlparse
import waitress import waitress
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
CONFIG_PATH = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json' def auth_required(f):
@wraps(f)
def decorated(*args, **kwargs):
auth = request.authorization
# Skip if username/password not set
whoogle_user = os.getenv('WHOOGLE_USER', '')
whoogle_pass = os.getenv('WHOOGLE_PASS', '')
if (not whoogle_user or not whoogle_pass) or \
(auth and whoogle_user == auth.username and whoogle_pass == auth.password):
return f(*args, **kwargs)
else:
return make_response('Not logged in', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'})
return decorated
@app.before_request @app.before_request
def before_request_func(): def before_request_func():
# Always redirect to https if HTTPS_ONLY is set (otherwise default to false) # Always redirect to https if HTTPS_ONLY is set (otherwise default to false)
https_only = os.getenv('HTTPS_ONLY', False) https_only = os.getenv('HTTPS_ONLY', False)
config_path = app.config['CONFIG_PATH']
if https_only and request.url.startswith('http://'): if https_only and request.url.startswith('http://'):
url = request.url.replace('http://', 'https://', 1) https_url = request.url.replace('http://', 'https://', 1)
code = 301 code = 308
return redirect(url, code=code) return redirect(https_url, code=code)
json_config = json.load(open(CONFIG_PATH)) if os.path.exists(CONFIG_PATH) else {'url': request.url_root} json_config = json.load(open(config_path)) if os.path.exists(config_path) else {'url': request.url_root}
g.user_config = Config(**json_config) g.user_config = Config(**json_config)
if not g.user_config.url: if not g.user_config.url:
@ -44,6 +57,7 @@ def unknown_page(e):
@app.route('/', methods=['GET']) @app.route('/', methods=['GET'])
@auth_required
def index(): def index():
bg = '#000' if g.user_config.dark else '#fff' bg = '#000' if g.user_config.dark else '#fff'
return render_template('index.html', return render_template('index.html',
@ -51,10 +65,12 @@ def index():
ua=g.user_request.modified_user_agent, ua=g.user_request.modified_user_agent,
languages=Config.LANGUAGES, languages=Config.LANGUAGES,
current_lang=g.user_config.lang, current_lang=g.user_config.lang,
version_number=app.config['VERSION_NUMBER'],
request_type='get' if g.user_config.get_only else 'post') request_type='get' if g.user_config.get_only else 'post')
@app.route('/opensearch.xml', methods=['GET']) @app.route('/opensearch.xml', methods=['GET'])
@auth_required
def opensearch(): def opensearch():
opensearch_url = g.app_location opensearch_url = g.app_location
if opensearch_url.endswith('/'): if opensearch_url.endswith('/'):
@ -72,6 +88,7 @@ def opensearch():
@app.route('/search', methods=['GET', 'POST']) @app.route('/search', methods=['GET', 'POST'])
@auth_required
def search(): def search():
request_params = request.args if request.method == 'GET' else request.form request_params = request.args if request.method == 'GET' else request.form
q = request_params.get('q') q = request_params.get('q')
@ -85,20 +102,29 @@ def search():
except InvalidToken: except InvalidToken:
pass pass
feeling_lucky = q.startswith('! ')
if feeling_lucky: # Well do you, punk?
q = q[2:]
user_agent = request.headers.get('User-Agent') user_agent = request.headers.get('User-Agent')
mobile = 'Android' in user_agent or 'iPhone' in user_agent mobile = 'Android' in user_agent or 'iPhone' in user_agent
content_filter = Filter(mobile, g.user_config, secret_key=app.secret_key) content_filter = Filter(mobile, g.user_config, secret_key=app.secret_key)
full_query = gen_query(q, request_params, content_filter.near, language=g.user_config.lang) full_query = gen_query(q, request_params, content_filter.near, language=g.user_config.lang)
get_body = g.user_request.send(query=full_query) get_body = g.user_request.send(query=full_query)
dirty_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
results = content_filter.reskin(get_body) if feeling_lucky:
formatted_results = content_filter.clean(BeautifulSoup(results, 'html.parser')) return redirect(get_first_link(dirty_soup), 303) # Using 303 so the browser performs a GET request for the URL
else:
formatted_results = content_filter.clean(dirty_soup)
return render_template('display.html', query=urlparse.unquote(q), response=formatted_results) return render_template('display.html', query=urlparse.unquote(q), response=formatted_results)
@app.route('/config', methods=['GET', 'POST']) @app.route('/config', methods=['GET', 'POST'])
@auth_required
def config(): def config():
if request.method == 'GET': if request.method == 'GET':
return json.dumps(g.user_config.__dict__) return json.dumps(g.user_config.__dict__)
@ -107,7 +133,7 @@ def config():
if 'url' not in config_data or not config_data['url']: if 'url' not in config_data or not config_data['url']:
config_data['url'] = g.user_config.url config_data['url'] = g.user_config.url
with open(CONFIG_PATH, 'w') as config_file: with open(app.config['CONFIG_PATH'], 'w') as config_file:
config_file.write(json.dumps(config_data, indent=4)) config_file.write(json.dumps(config_data, indent=4))
config_file.close() config_file.close()
@ -115,6 +141,7 @@ def config():
@app.route('/url', methods=['GET']) @app.route('/url', methods=['GET'])
@auth_required
def url(): def url():
if 'url' in request.args: if 'url' in request.args:
return redirect(request.args.get('url')) return redirect(request.args.get('url'))
@ -127,11 +154,13 @@ def url():
@app.route('/imgres') @app.route('/imgres')
@auth_required
def imgres(): def imgres():
return redirect(request.args.get('imgurl')) return redirect(request.args.get('imgurl'))
@app.route('/tmp') @app.route('/tmp')
@auth_required
def tmp(): def tmp():
cipher_suite = Fernet(app.secret_key) cipher_suite = Fernet(app.secret_key)
img_url = cipher_suite.decrypt(request.args.get('image_url').encode()).decode() img_url = cipher_suite.decrypt(request.args.get('image_url').encode()).decode()
@ -149,6 +178,7 @@ def tmp():
@app.route('/window') @app.route('/window')
@auth_required
def window(): def window():
get_body = g.user_request.send(base_url=request.args.get('location')) get_body = g.user_request.send(base_url=request.args.get('location'))
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"') get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"')
@ -175,7 +205,15 @@ def run_app():
help='Activates debug mode for the server (default False)') help='Activates debug mode for the server (default False)')
parser.add_argument('--https-only', default=False, action='store_true', parser.add_argument('--https-only', default=False, action='store_true',
help='Enforces HTTPS redirects for all requests') help='Enforces HTTPS redirects for all requests')
parser.add_argument('--userpass', default='', metavar='<username:password>',
help='Sets a username/password basic auth combo (default None)')
args = parser.parse_args() args = parser.parse_args()
if args.userpass:
user_pass = args.userpass.split(':')
os.environ['WHOOGLE_USER'] = user_pass[0]
os.environ['WHOOGLE_PASS'] = user_pass[1]
os.environ['HTTPS_ONLY'] = '1' if args.https_only else '' os.environ['HTTPS_ONLY'] = '1' if args.https_only else ''
if args.debug: if args.debug:

View File

@ -1,3 +1,7 @@
body {
font-family: Avenir, Helvetica, Arial, sans-serif;
}
.logo { .logo {
width: 80%; width: 80%;
display: block; display: block;
@ -117,3 +121,11 @@ button::-moz-focus-inner {
.hidden { .hidden {
display: none; display: none;
} }
footer {
position: fixed;
bottom: 0%;
text-align: center;
width: 100%;
z-index: -1;
}

View File

@ -85,6 +85,8 @@
</div> </div>
</div> </div>
</div> </div>
<footer>
<p>Whoogle Search v{{ version_number }} || <a href="https://github.com/benbusby/whoogle-search">View on GitHub</a></p>
</footer>
</body> </body>
</html> </html>

View File

@ -8,8 +8,7 @@ setuptools.setup(
author='Ben Busby', author='Ben Busby',
author_email='benbusby@protonmail.com', author_email='benbusby@protonmail.com',
name='whoogle-search', name='whoogle-search',
version='0.1.0', version='0.1.4',
scripts=['whoogle-search'],
include_package_data=True, include_package_data=True,
install_requires=requirements, install_requires=requirements,
description='Self-hosted, ad-free, privacy-respecting Google metasearch engine', description='Self-hosted, ad-free, privacy-respecting Google metasearch engine',

View File

@ -62,6 +62,6 @@ def test_recent_results(client):
try: try:
date = parse(date_span) date = parse(date_span)
assert (current_date - date).days <= num_days assert (current_date - date).days <= (num_days + 5) # Date can have a little bit of wiggle room
except ParserError: except ParserError:
assert ' ago' in date_span assert ' ago' in date_span

View File

@ -17,6 +17,10 @@ def test_search(client):
rv = client.get('/search?q=test') rv = client.get('/search?q=test')
assert rv._status_code == 200 assert rv._status_code == 200
def test_feeling_lucky(client):
rv = client.get('/search?q=!%20test')
assert rv._status_code == 303
def test_config(client): def test_config(client):
rv = client.post('/config', data=demo_config) rv = client.post('/config', data=demo_config)