Merge remote-tracking branch 'origin/master' into heroku-app

This commit is contained in:
Ben Busby 2020-05-22 16:16:28 -06:00
commit 18c194d500
13 changed files with 149 additions and 48 deletions

10
.github/ISSUE_TEMPLATE/question.md vendored Normal file
View File

@ -0,0 +1,10 @@
---
name: Question
about: Ask a (simple) question about Whoogle
title: "[QUESTION] <question here>"
labels: question
assignees: ''
---
Type out your question here. Please make sure that this is a topic that isn't already covered in the README.

View File

@ -5,4 +5,11 @@ before_install:
install:
- pip install -r requirements.txt
script:
- ./run test
- "./run test"
deploy:
provider: pypi
user: __token__
password:
secure: WNEH2Gg84MZF/AZEberFDGPPWb4cYyHAeD/XV8En94QRSI9Aznz6qiDKOvV4eVgjMAIEW5uB3TL1LHf6KU+Hrg6SmhF7JquqP1gsBOCDNFPTljO+k2Hc53uDdSnhi/HLgY7cnFNX4lc2nNrbyxZxMHuSA2oNz/tosyNGBEeyU+JA5va7uX0albGsLiNjimO4aeau83fsI0Hn2eN6ag68pewUMXNxzpyTeO2bRcCd5d5iILs07jMVwFoC2j7W11oNqrVuSWAs8CPe4+kwvNvXWxljUGiBGppNZ7RAsKNLwi6U6kGGUTWjQm09rY/2JBpJ2WEGmIWGIrno75iiFRbjnRp3mnXPvtVTyWhh+hQIUd7bJOVKM34i9eHotYTrkMJObgW1gnRzvI9VYldtgL/iP/Isn2Pv2EeMX8V+C9/8pxv0jkQkZMnFhE6gGlzpz37zTl04B2J7xyV5znM35Lx2Pn3zxdcmdCvD3yT8I4MuBbKqq2/v4emYCfPfOmfwnS0BEVSqr9lbx4xfUZV76tcvLcj4n86DJbx77pA2Ch8FRprpOOBcf0WuqTbZp8c3mb8prFp2EupUknXu7+C2VQ6sqrnzNuDeTGm/nyjjRQ81rlvlD4tqkwsEGEDDO44FF2eUTc5D2MvoHs4cnz095FWjy63gn5IxUjhMi31b5tGRz2Q=
on:
tags: true

View File

@ -11,6 +11,12 @@ VOLUME $config_dir
ENV CONFIG_VOLUME=$config_dir
ARG use_https=1
ARG username=''
ENV WHOOGLE_USER=$username
ARG password=''
ENV WHOOGLE_PASS=$password
ENV HTTPS_ONLY=$use_https
ARG whoogle_port=5000

View File

@ -1,3 +1,4 @@
graft app/static
graft app/templates
include requirements.txt
global-exclude *.pyc

View File

@ -4,7 +4,7 @@
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Build Status](https://travis-ci.com/benbusby/whoogle-search.svg?branch=master)](https://travis-ci.com/benbusby/whoogle-search)
[![codebeat badge](https://codebeat.co/badges/e96cada2-fb6f-4528-8285-7d72abd74e8d)](https://codebeat.co/projects/github-com-benbusby-shoogle-master)
![Docker Pulls](https://img.shields.io/docker/pulls/benbusby/whoogle-search)
[![Docker Pulls](https://img.shields.io/docker/pulls/benbusby/whoogle-search)](https://hub.docker.com/r/benbusby/whoogle-search)
Get Google search results, but without any ads, javascript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile.
@ -185,6 +185,8 @@ To filter by a range of time, append ":past <time>" to the end of your search, w
## Extra Steps
### Set Whoogle as your primary search engine
*Note: If you're using a reverse proxy to run Whoogle Search, make sure the "Root URL" config option on the home page is set to your URL before going through these steps.*
Update browser settings:
- Firefox (Desktop)
- Navigate to your app's url, and click the 3 dot menu in the address bar. At the bottom, there should be an option to "Add Search Engine". Once you've clicked this, open your Firefox Preferences menu, click "Search" in the left menu, and use the available dropdown to select "Whoogle" from the list.
@ -236,6 +238,7 @@ Only needed if your setup requires Flask to redirect to HTTPS on its own -- gene
- Default `run` script: Modify the script locally to include the `--https-only` flag at the end of the python run command
Available config values are `near`, `nojs`, `dark` and `url`.
## FAQ
**What's the difference between this and [Searx](https://github.com/asciimoo/searx)?**

View File

@ -4,5 +4,9 @@ import os
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
app.secret_key = Fernet.generate_key()
app.config['VERSION_NUMBER'] = '0.1.4'
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json'
from app import routes

View File

@ -14,6 +14,43 @@ 
'''
def get_first_link(soup):
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
href = a['href'].replace('https://www.google.com', '')
result_link = urlparse.urlparse(href)
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
# Return the first search result URL
if 'url?q=' in href:
return filter_link_args(href)
def filter_link_args(query_link):
parsed_link = urlparse.urlparse(query_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
if len(link_args) == 0 and len(parsed_link) > 0:
return query_link
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
safe_args[arg] = link_args[arg]
# Remove original link query and replace with filtered args
query_link = query_link.replace(parsed_link.query, '')
if len(safe_args) > 0:
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
else:
query_link = query_link.replace('?', '')
return query_link
class Filter:
def __init__(self, mobile=False, config=None, secret_key=''):
if config is None:
@ -75,14 +112,13 @@ class Filter:
img_src = img['src']
if img_src.startswith('//'):
img_src = 'https:' + img_src
elif img_src.startswith(LOGO_URL):
# Re-brand with Whoogle logo
img['src'] = '/static/img/logo.png'
img['style'] = 'height:40px;width:162px'
continue
elif img_src.startswith(GOOG_IMG):
# Special rebranding for image search results
if img_src.startswith(LOGO_URL):
img['src'] = '/static/img/logo.png'
img['style'] = 'height:40px;width:162px'
else:
img['src'] = BLANK_B64
img['src'] = BLANK_B64
continue
enc_src = Fernet(self.secret_key).encrypt(img_src.encode())
@ -149,32 +185,11 @@ class Filter:
a['href'] = new_search
elif 'url?q=' in href:
# Strip unneeded arguments
parsed_link = urlparse.urlparse(query_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
if len(link_args) == 0 and len(parsed_link) > 0:
a['href'] = query_link
continue
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
safe_args[arg] = link_args[arg]
# Remove original link query and replace with filtered args
query_link = query_link.replace(parsed_link.query, '')
if len(safe_args) > 0:
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
else:
query_link = query_link.replace('?', '')
a['href'] = query_link
a['href'] = filter_link_args(query_link)
# Add no-js option
if self.nojs:
gen_nojs(soup, query_link, a)
gen_nojs(soup, a['href'], a)
else:
a['href'] = href
@ -185,4 +200,4 @@ def gen_nojs(soup, link, sibling):
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
sibling.append(nojs_link)
sibling.append(nojs_link)

View File

@ -1,34 +1,47 @@
from app import app
from app.filter import Filter
from app.filter import Filter, get_first_link
from app.models.config import Config
from app.request import Request, gen_query
import argparse
from bs4 import BeautifulSoup
from cryptography.fernet import Fernet, InvalidToken
from flask import g, make_response, request, redirect, render_template, send_file
from functools import wraps
import io
import json
import os
import urllib.parse as urlparse
import waitress
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
CONFIG_PATH = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json'
def auth_required(f):
@wraps(f)
def decorated(*args, **kwargs):
auth = request.authorization
# Skip if username/password not set
whoogle_user = os.getenv('WHOOGLE_USER', '')
whoogle_pass = os.getenv('WHOOGLE_PASS', '')
if (not whoogle_user or not whoogle_pass) or \
(auth and whoogle_user == auth.username and whoogle_pass == auth.password):
return f(*args, **kwargs)
else:
return make_response('Not logged in', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'})
return decorated
@app.before_request
def before_request_func():
# Always redirect to https if HTTPS_ONLY is set (otherwise default to false)
https_only = os.getenv('HTTPS_ONLY', False)
config_path = app.config['CONFIG_PATH']
if https_only and request.url.startswith('http://'):
url = request.url.replace('http://', 'https://', 1)
code = 301
return redirect(url, code=code)
https_url = request.url.replace('http://', 'https://', 1)
code = 308
return redirect(https_url, code=code)
json_config = json.load(open(CONFIG_PATH)) if os.path.exists(CONFIG_PATH) else {'url': request.url_root}
json_config = json.load(open(config_path)) if os.path.exists(config_path) else {'url': request.url_root}
g.user_config = Config(**json_config)
if not g.user_config.url:
@ -44,6 +57,7 @@ def unknown_page(e):
@app.route('/', methods=['GET'])
@auth_required
def index():
bg = '#000' if g.user_config.dark else '#fff'
return render_template('index.html',
@ -51,10 +65,12 @@ def index():
ua=g.user_request.modified_user_agent,
languages=Config.LANGUAGES,
current_lang=g.user_config.lang,
version_number=app.config['VERSION_NUMBER'],
request_type='get' if g.user_config.get_only else 'post')
@app.route('/opensearch.xml', methods=['GET'])
@auth_required
def opensearch():
opensearch_url = g.app_location
if opensearch_url.endswith('/'):
@ -72,6 +88,7 @@ def opensearch():
@app.route('/search', methods=['GET', 'POST'])
@auth_required
def search():
request_params = request.args if request.method == 'GET' else request.form
q = request_params.get('q')
@ -85,20 +102,29 @@ def search():
except InvalidToken:
pass
feeling_lucky = q.startswith('! ')
if feeling_lucky: # Well do you, punk?
q = q[2:]
user_agent = request.headers.get('User-Agent')
mobile = 'Android' in user_agent or 'iPhone' in user_agent
content_filter = Filter(mobile, g.user_config, secret_key=app.secret_key)
full_query = gen_query(q, request_params, content_filter.near, language=g.user_config.lang)
get_body = g.user_request.send(query=full_query)
dirty_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
results = content_filter.reskin(get_body)
formatted_results = content_filter.clean(BeautifulSoup(results, 'html.parser'))
if feeling_lucky:
return redirect(get_first_link(dirty_soup), 303) # Using 303 so the browser performs a GET request for the URL
else:
formatted_results = content_filter.clean(dirty_soup)
return render_template('display.html', query=urlparse.unquote(q), response=formatted_results)
@app.route('/config', methods=['GET', 'POST'])
@auth_required
def config():
if request.method == 'GET':
return json.dumps(g.user_config.__dict__)
@ -107,7 +133,7 @@ def config():
if 'url' not in config_data or not config_data['url']:
config_data['url'] = g.user_config.url
with open(CONFIG_PATH, 'w') as config_file:
with open(app.config['CONFIG_PATH'], 'w') as config_file:
config_file.write(json.dumps(config_data, indent=4))
config_file.close()
@ -115,6 +141,7 @@ def config():
@app.route('/url', methods=['GET'])
@auth_required
def url():
if 'url' in request.args:
return redirect(request.args.get('url'))
@ -127,11 +154,13 @@ def url():
@app.route('/imgres')
@auth_required
def imgres():
return redirect(request.args.get('imgurl'))
@app.route('/tmp')
@auth_required
def tmp():
cipher_suite = Fernet(app.secret_key)
img_url = cipher_suite.decrypt(request.args.get('image_url').encode()).decode()
@ -149,6 +178,7 @@ def tmp():
@app.route('/window')
@auth_required
def window():
get_body = g.user_request.send(base_url=request.args.get('location'))
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"')
@ -175,7 +205,15 @@ def run_app():
help='Activates debug mode for the server (default False)')
parser.add_argument('--https-only', default=False, action='store_true',
help='Enforces HTTPS redirects for all requests')
parser.add_argument('--userpass', default='', metavar='<username:password>',
help='Sets a username/password basic auth combo (default None)')
args = parser.parse_args()
if args.userpass:
user_pass = args.userpass.split(':')
os.environ['WHOOGLE_USER'] = user_pass[0]
os.environ['WHOOGLE_PASS'] = user_pass[1]
os.environ['HTTPS_ONLY'] = '1' if args.https_only else ''
if args.debug:

View File

@ -1,3 +1,7 @@
body {
font-family: Avenir, Helvetica, Arial, sans-serif;
}
.logo {
width: 80%;
display: block;
@ -117,3 +121,11 @@ button::-moz-focus-inner {
.hidden {
display: none;
}
footer {
position: fixed;
bottom: 0%;
text-align: center;
width: 100%;
z-index: -1;
}

View File

@ -85,6 +85,8 @@
</div>
</div>
</div>
<footer>
<p>Whoogle Search v{{ version_number }} || <a href="https://github.com/benbusby/whoogle-search">View on GitHub</a></p>
</footer>
</body>
</html>

View File

@ -8,8 +8,7 @@ setuptools.setup(
author='Ben Busby',
author_email='benbusby@protonmail.com',
name='whoogle-search',
version='0.1.0',
scripts=['whoogle-search'],
version='0.1.4',
include_package_data=True,
install_requires=requirements,
description='Self-hosted, ad-free, privacy-respecting Google metasearch engine',

View File

@ -62,6 +62,6 @@ def test_recent_results(client):
try:
date = parse(date_span)
assert (current_date - date).days <= num_days
assert (current_date - date).days <= (num_days + 5) # Date can have a little bit of wiggle room
except ParserError:
assert ' ago' in date_span

View File

@ -17,6 +17,10 @@ def test_search(client):
rv = client.get('/search?q=test')
assert rv._status_code == 200
def test_feeling_lucky(client):
rv = client.get('/search?q=!%20test')
assert rv._status_code == 303
def test_config(client):
rv = client.post('/config', data=demo_config)