Merge remote-tracking branch 'origin/master' into heroku-app
This commit is contained in:
commit
18c194d500
10
.github/ISSUE_TEMPLATE/question.md
vendored
Normal file
10
.github/ISSUE_TEMPLATE/question.md
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
---
|
||||
name: Question
|
||||
about: Ask a (simple) question about Whoogle
|
||||
title: "[QUESTION] <question here>"
|
||||
labels: question
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Type out your question here. Please make sure that this is a topic that isn't already covered in the README.
|
|
@ -5,4 +5,11 @@ before_install:
|
|||
install:
|
||||
- pip install -r requirements.txt
|
||||
script:
|
||||
- ./run test
|
||||
- "./run test"
|
||||
deploy:
|
||||
provider: pypi
|
||||
user: __token__
|
||||
password:
|
||||
secure: WNEH2Gg84MZF/AZEberFDGPPWb4cYyHAeD/XV8En94QRSI9Aznz6qiDKOvV4eVgjMAIEW5uB3TL1LHf6KU+Hrg6SmhF7JquqP1gsBOCDNFPTljO+k2Hc53uDdSnhi/HLgY7cnFNX4lc2nNrbyxZxMHuSA2oNz/tosyNGBEeyU+JA5va7uX0albGsLiNjimO4aeau83fsI0Hn2eN6ag68pewUMXNxzpyTeO2bRcCd5d5iILs07jMVwFoC2j7W11oNqrVuSWAs8CPe4+kwvNvXWxljUGiBGppNZ7RAsKNLwi6U6kGGUTWjQm09rY/2JBpJ2WEGmIWGIrno75iiFRbjnRp3mnXPvtVTyWhh+hQIUd7bJOVKM34i9eHotYTrkMJObgW1gnRzvI9VYldtgL/iP/Isn2Pv2EeMX8V+C9/8pxv0jkQkZMnFhE6gGlzpz37zTl04B2J7xyV5znM35Lx2Pn3zxdcmdCvD3yT8I4MuBbKqq2/v4emYCfPfOmfwnS0BEVSqr9lbx4xfUZV76tcvLcj4n86DJbx77pA2Ch8FRprpOOBcf0WuqTbZp8c3mb8prFp2EupUknXu7+C2VQ6sqrnzNuDeTGm/nyjjRQ81rlvlD4tqkwsEGEDDO44FF2eUTc5D2MvoHs4cnz095FWjy63gn5IxUjhMi31b5tGRz2Q=
|
||||
on:
|
||||
tags: true
|
||||
|
|
|
@ -11,6 +11,12 @@ VOLUME $config_dir
|
|||
ENV CONFIG_VOLUME=$config_dir
|
||||
|
||||
ARG use_https=1
|
||||
|
||||
ARG username=''
|
||||
ENV WHOOGLE_USER=$username
|
||||
ARG password=''
|
||||
ENV WHOOGLE_PASS=$password
|
||||
|
||||
ENV HTTPS_ONLY=$use_https
|
||||
|
||||
ARG whoogle_port=5000
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
graft app/static
|
||||
graft app/templates
|
||||
include requirements.txt
|
||||
global-exclude *.pyc
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://travis-ci.com/benbusby/whoogle-search)
|
||||
[](https://codebeat.co/projects/github-com-benbusby-shoogle-master)
|
||||

|
||||
[](https://hub.docker.com/r/benbusby/whoogle-search)
|
||||
|
||||
Get Google search results, but without any ads, javascript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile.
|
||||
|
||||
|
@ -185,6 +185,8 @@ To filter by a range of time, append ":past <time>" to the end of your search, w
|
|||
|
||||
## Extra Steps
|
||||
### Set Whoogle as your primary search engine
|
||||
*Note: If you're using a reverse proxy to run Whoogle Search, make sure the "Root URL" config option on the home page is set to your URL before going through these steps.*
|
||||
|
||||
Update browser settings:
|
||||
- Firefox (Desktop)
|
||||
- Navigate to your app's url, and click the 3 dot menu in the address bar. At the bottom, there should be an option to "Add Search Engine". Once you've clicked this, open your Firefox Preferences menu, click "Search" in the left menu, and use the available dropdown to select "Whoogle" from the list.
|
||||
|
@ -236,6 +238,7 @@ Only needed if your setup requires Flask to redirect to HTTPS on its own -- gene
|
|||
- Default `run` script: Modify the script locally to include the `--https-only` flag at the end of the python run command
|
||||
|
||||
Available config values are `near`, `nojs`, `dark` and `url`.
|
||||
|
||||
## FAQ
|
||||
**What's the difference between this and [Searx](https://github.com/asciimoo/searx)?**
|
||||
|
||||
|
|
|
@ -4,5 +4,9 @@ import os
|
|||
|
||||
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
|
||||
app.secret_key = Fernet.generate_key()
|
||||
app.config['VERSION_NUMBER'] = '0.1.4'
|
||||
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
|
||||
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
|
||||
app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json'
|
||||
|
||||
from app import routes
|
||||
|
|
|
@ -14,6 +14,43 @@ 
|
|||
'''
|
||||
|
||||
|
||||
def get_first_link(soup):
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
href = a['href'].replace('https://www.google.com', '')
|
||||
|
||||
result_link = urlparse.urlparse(href)
|
||||
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
|
||||
|
||||
# Return the first search result URL
|
||||
if 'url?q=' in href:
|
||||
return filter_link_args(href)
|
||||
|
||||
|
||||
def filter_link_args(query_link):
|
||||
parsed_link = urlparse.urlparse(query_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
if len(link_args) == 0 and len(parsed_link) > 0:
|
||||
return query_link
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
query_link = query_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 0:
|
||||
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
|
||||
else:
|
||||
query_link = query_link.replace('?', '')
|
||||
|
||||
return query_link
|
||||
|
||||
|
||||
class Filter:
|
||||
def __init__(self, mobile=False, config=None, secret_key=''):
|
||||
if config is None:
|
||||
|
@ -75,14 +112,13 @@ class Filter:
|
|||
img_src = img['src']
|
||||
if img_src.startswith('//'):
|
||||
img_src = 'https:' + img_src
|
||||
elif img_src.startswith(LOGO_URL):
|
||||
# Re-brand with Whoogle logo
|
||||
img['src'] = '/static/img/logo.png'
|
||||
img['style'] = 'height:40px;width:162px'
|
||||
continue
|
||||
elif img_src.startswith(GOOG_IMG):
|
||||
# Special rebranding for image search results
|
||||
if img_src.startswith(LOGO_URL):
|
||||
img['src'] = '/static/img/logo.png'
|
||||
img['style'] = 'height:40px;width:162px'
|
||||
else:
|
||||
img['src'] = BLANK_B64
|
||||
|
||||
img['src'] = BLANK_B64
|
||||
continue
|
||||
|
||||
enc_src = Fernet(self.secret_key).encrypt(img_src.encode())
|
||||
|
@ -149,32 +185,11 @@ class Filter:
|
|||
a['href'] = new_search
|
||||
elif 'url?q=' in href:
|
||||
# Strip unneeded arguments
|
||||
parsed_link = urlparse.urlparse(query_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
if len(link_args) == 0 and len(parsed_link) > 0:
|
||||
a['href'] = query_link
|
||||
continue
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
query_link = query_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 0:
|
||||
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
|
||||
else:
|
||||
query_link = query_link.replace('?', '')
|
||||
|
||||
a['href'] = query_link
|
||||
a['href'] = filter_link_args(query_link)
|
||||
|
||||
# Add no-js option
|
||||
if self.nojs:
|
||||
gen_nojs(soup, query_link, a)
|
||||
gen_nojs(soup, a['href'], a)
|
||||
else:
|
||||
a['href'] = href
|
||||
|
||||
|
@ -185,4 +200,4 @@ def gen_nojs(soup, link, sibling):
|
|||
nojs_link['style'] = 'display:block;width:100%;'
|
||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||
sibling.append(nojs_link)
|
||||
sibling.append(nojs_link)
|
|
@ -1,34 +1,47 @@
|
|||
from app import app
|
||||
from app.filter import Filter
|
||||
from app.filter import Filter, get_first_link
|
||||
from app.models.config import Config
|
||||
from app.request import Request, gen_query
|
||||
import argparse
|
||||
from bs4 import BeautifulSoup
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
from flask import g, make_response, request, redirect, render_template, send_file
|
||||
from functools import wraps
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import urllib.parse as urlparse
|
||||
import waitress
|
||||
|
||||
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
|
||||
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
|
||||
|
||||
CONFIG_PATH = os.getenv('CONFIG_VOLUME', app.config['STATIC_FOLDER']) + '/config.json'
|
||||
def auth_required(f):
|
||||
@wraps(f)
|
||||
def decorated(*args, **kwargs):
|
||||
auth = request.authorization
|
||||
|
||||
# Skip if username/password not set
|
||||
whoogle_user = os.getenv('WHOOGLE_USER', '')
|
||||
whoogle_pass = os.getenv('WHOOGLE_PASS', '')
|
||||
if (not whoogle_user or not whoogle_pass) or \
|
||||
(auth and whoogle_user == auth.username and whoogle_pass == auth.password):
|
||||
return f(*args, **kwargs)
|
||||
else:
|
||||
return make_response('Not logged in', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'})
|
||||
return decorated
|
||||
|
||||
|
||||
@app.before_request
|
||||
def before_request_func():
|
||||
# Always redirect to https if HTTPS_ONLY is set (otherwise default to false)
|
||||
https_only = os.getenv('HTTPS_ONLY', False)
|
||||
config_path = app.config['CONFIG_PATH']
|
||||
|
||||
if https_only and request.url.startswith('http://'):
|
||||
url = request.url.replace('http://', 'https://', 1)
|
||||
code = 301
|
||||
return redirect(url, code=code)
|
||||
https_url = request.url.replace('http://', 'https://', 1)
|
||||
code = 308
|
||||
return redirect(https_url, code=code)
|
||||
|
||||
json_config = json.load(open(CONFIG_PATH)) if os.path.exists(CONFIG_PATH) else {'url': request.url_root}
|
||||
json_config = json.load(open(config_path)) if os.path.exists(config_path) else {'url': request.url_root}
|
||||
g.user_config = Config(**json_config)
|
||||
|
||||
if not g.user_config.url:
|
||||
|
@ -44,6 +57,7 @@ def unknown_page(e):
|
|||
|
||||
|
||||
@app.route('/', methods=['GET'])
|
||||
@auth_required
|
||||
def index():
|
||||
bg = '#000' if g.user_config.dark else '#fff'
|
||||
return render_template('index.html',
|
||||
|
@ -51,10 +65,12 @@ def index():
|
|||
ua=g.user_request.modified_user_agent,
|
||||
languages=Config.LANGUAGES,
|
||||
current_lang=g.user_config.lang,
|
||||
version_number=app.config['VERSION_NUMBER'],
|
||||
request_type='get' if g.user_config.get_only else 'post')
|
||||
|
||||
|
||||
@app.route('/opensearch.xml', methods=['GET'])
|
||||
@auth_required
|
||||
def opensearch():
|
||||
opensearch_url = g.app_location
|
||||
if opensearch_url.endswith('/'):
|
||||
|
@ -72,6 +88,7 @@ def opensearch():
|
|||
|
||||
|
||||
@app.route('/search', methods=['GET', 'POST'])
|
||||
@auth_required
|
||||
def search():
|
||||
request_params = request.args if request.method == 'GET' else request.form
|
||||
q = request_params.get('q')
|
||||
|
@ -85,20 +102,29 @@ def search():
|
|||
except InvalidToken:
|
||||
pass
|
||||
|
||||
feeling_lucky = q.startswith('! ')
|
||||
|
||||
if feeling_lucky: # Well do you, punk?
|
||||
q = q[2:]
|
||||
|
||||
user_agent = request.headers.get('User-Agent')
|
||||
mobile = 'Android' in user_agent or 'iPhone' in user_agent
|
||||
|
||||
content_filter = Filter(mobile, g.user_config, secret_key=app.secret_key)
|
||||
full_query = gen_query(q, request_params, content_filter.near, language=g.user_config.lang)
|
||||
get_body = g.user_request.send(query=full_query)
|
||||
dirty_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
|
||||
|
||||
results = content_filter.reskin(get_body)
|
||||
formatted_results = content_filter.clean(BeautifulSoup(results, 'html.parser'))
|
||||
if feeling_lucky:
|
||||
return redirect(get_first_link(dirty_soup), 303) # Using 303 so the browser performs a GET request for the URL
|
||||
else:
|
||||
formatted_results = content_filter.clean(dirty_soup)
|
||||
|
||||
return render_template('display.html', query=urlparse.unquote(q), response=formatted_results)
|
||||
|
||||
|
||||
@app.route('/config', methods=['GET', 'POST'])
|
||||
@auth_required
|
||||
def config():
|
||||
if request.method == 'GET':
|
||||
return json.dumps(g.user_config.__dict__)
|
||||
|
@ -107,7 +133,7 @@ def config():
|
|||
if 'url' not in config_data or not config_data['url']:
|
||||
config_data['url'] = g.user_config.url
|
||||
|
||||
with open(CONFIG_PATH, 'w') as config_file:
|
||||
with open(app.config['CONFIG_PATH'], 'w') as config_file:
|
||||
config_file.write(json.dumps(config_data, indent=4))
|
||||
config_file.close()
|
||||
|
||||
|
@ -115,6 +141,7 @@ def config():
|
|||
|
||||
|
||||
@app.route('/url', methods=['GET'])
|
||||
@auth_required
|
||||
def url():
|
||||
if 'url' in request.args:
|
||||
return redirect(request.args.get('url'))
|
||||
|
@ -127,11 +154,13 @@ def url():
|
|||
|
||||
|
||||
@app.route('/imgres')
|
||||
@auth_required
|
||||
def imgres():
|
||||
return redirect(request.args.get('imgurl'))
|
||||
|
||||
|
||||
@app.route('/tmp')
|
||||
@auth_required
|
||||
def tmp():
|
||||
cipher_suite = Fernet(app.secret_key)
|
||||
img_url = cipher_suite.decrypt(request.args.get('image_url').encode()).decode()
|
||||
|
@ -149,6 +178,7 @@ def tmp():
|
|||
|
||||
|
||||
@app.route('/window')
|
||||
@auth_required
|
||||
def window():
|
||||
get_body = g.user_request.send(base_url=request.args.get('location'))
|
||||
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"')
|
||||
|
@ -175,7 +205,15 @@ def run_app():
|
|||
help='Activates debug mode for the server (default False)')
|
||||
parser.add_argument('--https-only', default=False, action='store_true',
|
||||
help='Enforces HTTPS redirects for all requests')
|
||||
parser.add_argument('--userpass', default='', metavar='<username:password>',
|
||||
help='Sets a username/password basic auth combo (default None)')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.userpass:
|
||||
user_pass = args.userpass.split(':')
|
||||
os.environ['WHOOGLE_USER'] = user_pass[0]
|
||||
os.environ['WHOOGLE_PASS'] = user_pass[1]
|
||||
|
||||
os.environ['HTTPS_ONLY'] = '1' if args.https_only else ''
|
||||
|
||||
if args.debug:
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
body {
|
||||
font-family: Avenir, Helvetica, Arial, sans-serif;
|
||||
}
|
||||
|
||||
.logo {
|
||||
width: 80%;
|
||||
display: block;
|
||||
|
@ -117,3 +121,11 @@ button::-moz-focus-inner {
|
|||
.hidden {
|
||||
display: none;
|
||||
}
|
||||
|
||||
footer {
|
||||
position: fixed;
|
||||
bottom: 0%;
|
||||
text-align: center;
|
||||
width: 100%;
|
||||
z-index: -1;
|
||||
}
|
||||
|
|
|
@ -85,6 +85,8 @@
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<footer>
|
||||
<p>Whoogle Search v{{ version_number }} || <a href="https://github.com/benbusby/whoogle-search">View on GitHub</a></p>
|
||||
</footer>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
|
|
3
setup.py
3
setup.py
|
@ -8,8 +8,7 @@ setuptools.setup(
|
|||
author='Ben Busby',
|
||||
author_email='benbusby@protonmail.com',
|
||||
name='whoogle-search',
|
||||
version='0.1.0',
|
||||
scripts=['whoogle-search'],
|
||||
version='0.1.4',
|
||||
include_package_data=True,
|
||||
install_requires=requirements,
|
||||
description='Self-hosted, ad-free, privacy-respecting Google metasearch engine',
|
||||
|
|
|
@ -62,6 +62,6 @@ def test_recent_results(client):
|
|||
|
||||
try:
|
||||
date = parse(date_span)
|
||||
assert (current_date - date).days <= num_days
|
||||
assert (current_date - date).days <= (num_days + 5) # Date can have a little bit of wiggle room
|
||||
except ParserError:
|
||||
assert ' ago' in date_span
|
||||
|
|
|
@ -17,6 +17,10 @@ def test_search(client):
|
|||
rv = client.get('/search?q=test')
|
||||
assert rv._status_code == 200
|
||||
|
||||
def test_feeling_lucky(client):
|
||||
rv = client.get('/search?q=!%20test')
|
||||
assert rv._status_code == 303
|
||||
|
||||
|
||||
def test_config(client):
|
||||
rv = client.post('/config', data=demo_config)
|
||||
|
|
Loading…
Reference in New Issue
Block a user