Merge branch 'develop' into develop
This commit is contained in:
commit
85f6ec931a
19
Dockerfile
19
Dockerfile
|
@ -1,7 +1,13 @@
|
|||
FROM python:3.8-slim
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev libssl-dev
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
libcurl4-openssl-dev \
|
||||
libssl-dev \
|
||||
tor
|
||||
|
||||
COPY misc/tor/torrc /etc/tor/torrc
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
|
@ -15,6 +21,15 @@ ENV WHOOGLE_USER=$username
|
|||
ARG password=''
|
||||
ENV WHOOGLE_PASS=$password
|
||||
|
||||
ARG proxyuser=''
|
||||
ENV WHOOGLE_PROXY_USER=$proxyuser
|
||||
ARG proxypass=''
|
||||
ENV WHOOGLE_PROXY_PASS=$proxypass
|
||||
ARG proxytype=''
|
||||
ENV WHOOGLE_PROXY_TYPE=$proxytype
|
||||
ARG proxyloc=''
|
||||
ENV WHOOGLE_PROXY_LOC=$proxyloc
|
||||
|
||||
ARG use_https=''
|
||||
ENV HTTPS_ONLY=$use_https
|
||||
|
||||
|
@ -25,4 +40,4 @@ COPY . .
|
|||
|
||||
EXPOSE $EXPOSE_PORT
|
||||
|
||||
CMD ["./run"]
|
||||
CMD misc/tor/start-tor.sh & ./run
|
||||
|
|
|
@ -25,6 +25,7 @@ Contents
|
|||
- No AMP links
|
||||
- No URL tracking tags (i.e. utm=%s)
|
||||
- No referrer header
|
||||
- Tor and HTTP/SOCKS proxy support
|
||||
- Autocomplete/search suggestions
|
||||
- POST request search and suggestion queries (when possible)
|
||||
- View images at full res without site redirect (currently mobile only)
|
||||
|
@ -35,7 +36,7 @@ Contents
|
|||
- Optional location-based searching (i.e. results near \<city\>)
|
||||
- Optional NoJS mode to disable all Javascript in results
|
||||
|
||||
<sup>*If deployed to a remote server</sup>
|
||||
<sup>*If deployed to a remote server, or configured to send requests through a VPN, Tor, proxy, etc.</sup>
|
||||
|
||||
## Dependencies
|
||||
If using Heroku Quick Deploy, **you can skip this section**.
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
from app.request import send_tor_signal
|
||||
from app.utils.session_utils import generate_user_keys
|
||||
from app.utils.gen_ddg_bangs import gen_bangs_json
|
||||
from flask import Flask
|
||||
from flask_session import Session
|
||||
import os
|
||||
from stem import Signal
|
||||
|
||||
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
|
||||
app.user_elements = {}
|
||||
|
@ -25,11 +27,15 @@ if not os.path.exists(app.config['CONFIG_PATH']):
|
|||
if not os.path.exists(app.config['SESSION_FILE_DIR']):
|
||||
os.makedirs(app.config['SESSION_FILE_DIR'])
|
||||
|
||||
# (Re)generate DDG bang filter, and create path if it doesn't exist yet
|
||||
# Generate DDG bang filter, and create path if it doesn't exist yet
|
||||
if not os.path.exists(app.config['BANG_PATH']):
|
||||
os.makedirs(app.config['BANG_PATH'])
|
||||
gen_bangs_json(app.config['BANG_FILE'])
|
||||
if not os.path.exists(app.config['BANG_FILE']):
|
||||
gen_bangs_json(app.config['BANG_FILE'])
|
||||
|
||||
Session(app)
|
||||
|
||||
# Attempt to acquire tor identity, to determine if Tor config is available
|
||||
send_tor_signal(Signal.HEARTBEAT)
|
||||
|
||||
from app import routes
|
||||
|
|
|
@ -305,6 +305,7 @@ class Config:
|
|||
self.safe = False
|
||||
self.dark = False
|
||||
self.nojs = False
|
||||
self.tor = False
|
||||
self.near = ''
|
||||
self.alts = False
|
||||
self.new_tab = False
|
||||
|
|
126
app/request.py
126
app/request.py
|
@ -1,8 +1,12 @@
|
|||
from app.models.config import Config
|
||||
from lxml import etree
|
||||
import random
|
||||
import requests
|
||||
from requests import Response
|
||||
from requests import Response, ConnectionError
|
||||
import urllib.parse as urlparse
|
||||
import os
|
||||
from stem import Signal, SocketError
|
||||
from stem.control import Controller
|
||||
|
||||
# Core Google search URLs
|
||||
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
|
||||
|
@ -15,7 +19,36 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
|||
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr']
|
||||
|
||||
|
||||
def gen_user_agent(is_mobile):
|
||||
class TorError(Exception):
|
||||
"""Exception raised for errors in Tor requests.
|
||||
|
||||
Attributes:
|
||||
message -- a message describing the error that occurred
|
||||
disable -- optionally disables Tor in the user config (note:
|
||||
this should only happen if the connection has been dropped
|
||||
altogether).
|
||||
"""
|
||||
|
||||
def __init__(self, message, disable=False):
|
||||
self.message = message
|
||||
self.disable = disable
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
def send_tor_signal(signal: Signal) -> bool:
|
||||
try:
|
||||
with Controller.from_port(port=9051) as c:
|
||||
c.authenticate()
|
||||
c.signal(signal)
|
||||
os.environ['TOR_AVAILABLE'] = '1'
|
||||
return True
|
||||
except (SocketError, ConnectionRefusedError, ConnectionError):
|
||||
os.environ['TOR_AVAILABLE'] = '0'
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def gen_user_agent(is_mobile) -> str:
|
||||
mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla'
|
||||
firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox'
|
||||
linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux'
|
||||
|
@ -26,7 +59,7 @@ def gen_user_agent(is_mobile):
|
|||
return DESKTOP_UA.format(mozilla, linux, firefox)
|
||||
|
||||
|
||||
def gen_query(query, args, config, near_city=None):
|
||||
def gen_query(query, args, config, near_city=None) -> str:
|
||||
param_dict = {key: '' for key in VALID_PARAMS}
|
||||
|
||||
# Use :past(hour/day/week/month/year) if available
|
||||
|
@ -85,15 +118,56 @@ def gen_query(query, args, config, near_city=None):
|
|||
|
||||
|
||||
class Request:
|
||||
def __init__(self, normal_ua, language='lang_en'):
|
||||
self.language = language
|
||||
"""Class used for handling all outbound requests, including search queries,
|
||||
search suggestions, and loading of external content (images, audio, etc).
|
||||
|
||||
Attributes:
|
||||
normal_ua -- the user's current user agent
|
||||
root_path -- the root path of the whoogle instance
|
||||
config -- the user's current whoogle configuration
|
||||
"""
|
||||
def __init__(self, normal_ua, root_path, config: Config):
|
||||
# Send heartbeat to Tor, used in determining if the user can or cannot
|
||||
# enable Tor for future requests
|
||||
send_tor_signal(Signal.HEARTBEAT)
|
||||
|
||||
self.language = config.lang_search
|
||||
self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
|
||||
self.modified_user_agent = gen_user_agent(self.mobile)
|
||||
|
||||
# Set up proxy, if previously configured
|
||||
if os.environ.get('WHOOGLE_PROXY_LOC'):
|
||||
auth_str = ''
|
||||
if os.environ.get('WHOOGLE_PROXY_USER'):
|
||||
auth_str = os.environ.get('WHOOGLE_PROXY_USER') + \
|
||||
':' + os.environ.get('WHOOGLE_PROXY_PASS')
|
||||
self.proxies = {
|
||||
'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' +
|
||||
auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC'),
|
||||
}
|
||||
self.proxies['https'] = self.proxies['http'].replace('http', 'https')
|
||||
else:
|
||||
self.proxies = {
|
||||
'http': 'socks5://127.0.0.1:9050',
|
||||
'https': 'socks5://127.0.0.1:9050'
|
||||
} if config.tor else {}
|
||||
self.tor = config.tor
|
||||
self.tor_valid = False
|
||||
self.root_path = root_path
|
||||
|
||||
def __getitem__(self, name):
|
||||
return getattr(self, name)
|
||||
|
||||
def autocomplete(self, query):
|
||||
def autocomplete(self, query) -> list:
|
||||
"""Sends a query to Google's search suggestion service
|
||||
|
||||
Args:
|
||||
query: The in-progress query to send
|
||||
|
||||
Returns:
|
||||
list: The list of matches for possible search suggestions
|
||||
|
||||
"""
|
||||
ac_query = dict(hl=self.language, q=query)
|
||||
response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text
|
||||
|
||||
|
@ -103,9 +177,45 @@ class Request:
|
|||
|
||||
return []
|
||||
|
||||
def send(self, base_url=SEARCH_URL, query='') -> Response:
|
||||
def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response:
|
||||
"""Sends an outbound request to a URL. Optionally sends the request using Tor, if
|
||||
enabled by the user.
|
||||
|
||||
Args:
|
||||
base_url: The URL to use in the request
|
||||
query: The optional query string for the request
|
||||
attempt: The number of attempts made for the request (used for cycling
|
||||
through Tor identities, if enabled)
|
||||
|
||||
Returns:
|
||||
Response: The Response object returned by the requests call
|
||||
|
||||
"""
|
||||
headers = {
|
||||
'User-Agent': self.modified_user_agent
|
||||
}
|
||||
|
||||
return requests.get(base_url + query, headers=headers)
|
||||
# Validate Tor connection and request new identity if the last one failed
|
||||
if self.tor and not send_tor_signal(Signal.NEWNYM if attempt > 0 else Signal.HEARTBEAT):
|
||||
raise TorError("Tor was previously enabled, but the connection has been dropped. Please check your " +
|
||||
"Tor configuration and try again.", disable=True)
|
||||
|
||||
# Make sure that the tor connection is valid, if enabled
|
||||
if self.tor:
|
||||
tor_check = requests.get('https://check.torproject.org/', proxies=self.proxies, headers=headers)
|
||||
self.tor_valid = 'Congratulations' in tor_check.text
|
||||
|
||||
if not self.tor_valid:
|
||||
raise TorError("Tor connection succeeded, but the connection could not be validated by torproject.org",
|
||||
disable=True)
|
||||
|
||||
response = requests.get(base_url + query, proxies=self.proxies, headers=headers)
|
||||
|
||||
# Retry query with new identity if using Tor (max 10 attempts)
|
||||
if 'form id="captcha-form"' in response.text and self.tor:
|
||||
attempt += 1
|
||||
if attempt > 10:
|
||||
raise TorError("Tor query failed -- max attempts exceeded 10")
|
||||
return self.send(base_url, query, attempt)
|
||||
|
||||
return response
|
||||
|
|
|
@ -9,12 +9,12 @@ import uuid
|
|||
from functools import wraps
|
||||
|
||||
import waitress
|
||||
from flask import jsonify, make_response, request, redirect, render_template, send_file, session
|
||||
from flask import jsonify, make_response, request, redirect, render_template, send_file, session, url_for
|
||||
from requests import exceptions
|
||||
|
||||
from app import app
|
||||
from app.models.config import Config
|
||||
from app.request import Request
|
||||
from app.request import Request, TorError
|
||||
from app.utils.session_utils import valid_user_session
|
||||
from app.utils.routing_utils import *
|
||||
|
||||
|
@ -62,13 +62,17 @@ def before_request_func():
|
|||
|
||||
if https_only and request.url.startswith('http://'):
|
||||
return redirect(request.url.replace('http://', 'https://', 1), code=308)
|
||||
|
||||
|
||||
g.user_config = Config(**session['config'])
|
||||
|
||||
if not g.user_config.url:
|
||||
g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root
|
||||
|
||||
g.user_request = Request(request.headers.get('User-Agent'), language=g.user_config.lang_search)
|
||||
g.user_request = Request(
|
||||
request.headers.get('User-Agent'),
|
||||
request.url_root,
|
||||
config=g.user_config)
|
||||
|
||||
g.app_location = g.user_config.url
|
||||
|
||||
|
||||
|
@ -103,11 +107,15 @@ def unknown_page(e):
|
|||
def index():
|
||||
# Reset keys
|
||||
session['fernet_keys'] = generate_user_keys(g.cookies_disabled)
|
||||
error_message = session['error_message'] if 'error_message' in session else ''
|
||||
session['error_message'] = ''
|
||||
|
||||
return render_template('index.html',
|
||||
languages=Config.LANGUAGES,
|
||||
countries=Config.COUNTRIES,
|
||||
config=g.user_config,
|
||||
error_message=error_message,
|
||||
tor_available=int(os.environ.get('TOR_AVAILABLE')),
|
||||
version_number=app.config['VERSION_NUMBER'])
|
||||
|
||||
|
||||
|
@ -128,6 +136,10 @@ def opensearch():
|
|||
@app.route('/autocomplete', methods=['GET', 'POST'])
|
||||
def autocomplete():
|
||||
q = g.request_params.get('q')
|
||||
if not q:
|
||||
# FF will occasionally (incorrectly) send the q field without a
|
||||
# mimetype in the format "b'q=<query>'" through the request.data field
|
||||
q = str(request.data).replace('q=', '')
|
||||
|
||||
# Search bangs if the query begins with "!", but not "! " (feeling lucky)
|
||||
if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
|
||||
|
@ -138,7 +150,9 @@ def autocomplete():
|
|||
elif request.data:
|
||||
q = urlparse.unquote_plus(request.data.decode('utf-8').replace('q=', ''))
|
||||
|
||||
return jsonify([q, g.user_request.autocomplete(q)])
|
||||
# Return a list of suggestions for the query
|
||||
# Note: If Tor is enabled, this returns nothing, as the request is almost always rejected
|
||||
return jsonify([q, g.user_request.autocomplete(q) if not g.user_config.tor else []])
|
||||
|
||||
|
||||
@app.route('/search', methods=['GET', 'POST'])
|
||||
|
@ -159,8 +173,14 @@ def search():
|
|||
return redirect('/')
|
||||
|
||||
# Generate response and number of external elements from the page
|
||||
response, elements = search_util.generate_response()
|
||||
if search_util.feeling_lucky:
|
||||
try:
|
||||
response, elements = search_util.generate_response()
|
||||
except TorError as e:
|
||||
session['error_message'] = e.message + ("\\n\\nTor config is now disabled!" if e.disable else "")
|
||||
session['config']['tor'] = False if e.disable else session['config']['tor']
|
||||
return redirect(url_for('.index'))
|
||||
|
||||
if search_util.feeling_lucky or elements < 0:
|
||||
return redirect(response, code=303)
|
||||
|
||||
# Keep count of external elements to fetch before element key can be regenerated
|
||||
|
@ -281,6 +301,12 @@ def run_app():
|
|||
help='Enforces HTTPS redirects for all requests')
|
||||
parser.add_argument('--userpass', default='', metavar='<username:password>',
|
||||
help='Sets a username/password basic auth combo (default None)')
|
||||
parser.add_argument('--proxyauth', default='', metavar='<username:password>',
|
||||
help='Sets a username/password for a HTTP/SOCKS proxy (default None)')
|
||||
parser.add_argument('--proxytype', default='', metavar='<socks4|socks5|http>',
|
||||
help='Sets a proxy type for all connections (default None)')
|
||||
parser.add_argument('--proxyloc', default='', metavar='<location:port>',
|
||||
help='Sets a proxy location for all connections (default None)')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.userpass:
|
||||
|
@ -288,6 +314,14 @@ def run_app():
|
|||
os.environ['WHOOGLE_USER'] = user_pass[0]
|
||||
os.environ['WHOOGLE_PASS'] = user_pass[1]
|
||||
|
||||
if args.proxytype and args.proxyloc:
|
||||
if args.proxyauth:
|
||||
proxy_user_pass = args.proxyauth.split(':')
|
||||
os.environ['WHOOGLE_PROXY_USER'] = proxy_user_pass[0]
|
||||
os.environ['WHOOGLE_PROXY_PASS'] = proxy_user_pass[1]
|
||||
os.environ['WHOOGLE_PROXY_TYPE'] = args.proxytype
|
||||
os.environ['WHOOGLE_PROXY_LOC'] = args.proxyloc
|
||||
|
||||
os.environ['HTTPS_ONLY'] = '1' if args.https_only else ''
|
||||
|
||||
if args.debug:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// Whoogle configurations that use boolean values and checkboxes
|
||||
CONFIG_BOOLS = [
|
||||
"nojs", "dark", "safe", "alts", "new_tab", "get_only"
|
||||
"nojs", "dark", "safe", "alts", "new_tab", "get_only", "tor"
|
||||
];
|
||||
|
||||
// Whoogle configurations that use string values and input fields
|
||||
|
|
|
@ -3,3 +3,4 @@
|
|||
<p>
|
||||
Error parsing "{{ query }}"
|
||||
</p>
|
||||
<a href="/">Return Home</a>
|
||||
|
|
|
@ -29,6 +29,12 @@
|
|||
<title>Whoogle Search</title>
|
||||
</head>
|
||||
<body id="main" style="display: none; background-color: {{ '#000' if config.dark else '#fff' }}">
|
||||
<script>
|
||||
{% if error_message|length > 0 %}
|
||||
let error = "{{ error_message|safe }}";
|
||||
alert(error);
|
||||
{% endif %}
|
||||
</script>
|
||||
<div class="search-container">
|
||||
<img class="logo" src="static/img/logo.png">
|
||||
<form id="search-form" action="search" method="{{ 'get' if config.get_only else 'post' }}">
|
||||
|
@ -110,6 +116,10 @@
|
|||
<label for="config-new-tab">Open Links in New Tab: </label>
|
||||
<input type="checkbox" name="new_tab" id="config-new-tab">
|
||||
</div>
|
||||
<div class="config-div">
|
||||
<label for="config-tor">Use Tor: {{ '' if tor_available else 'Unavailable' }}</label>
|
||||
<input type="checkbox" name="tor" id="config-tor" {{ '' if tor_available else 'hidden' }}>
|
||||
</div>
|
||||
<div class="config-div">
|
||||
<label for="config-get-only">GET Requests Only: </label>
|
||||
<input type="checkbox" name="get_only" id="config-get-only">
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
<Param name="q" value="{searchTerms}"/>
|
||||
</Url>
|
||||
<Url type="application/x-suggestions+json" {{ request_type|safe }} template="{{ main_url }}/autocomplete">
|
||||
<Param name="q" value="{searchTerms}"/>
|
||||
<Param name="q" value="{searchTerms}"/>
|
||||
</Url>
|
||||
<moz:SearchForm>{{ main_url }}/search</moz:SearchForm>
|
||||
</OpenSearchDescription>
|
||||
|
|
|
@ -7,6 +7,9 @@ from flask import g
|
|||
from typing import Any, Tuple
|
||||
|
||||
|
||||
TOR_BANNER = '<hr><h1 style="text-align: center">You are using Tor</h1><hr>'
|
||||
|
||||
|
||||
class RoutingUtils:
|
||||
def __init__(self, request, config, session, cookies_disabled=False):
|
||||
self.request_params = request.args if request.method == 'GET' else request.form
|
||||
|
@ -66,10 +69,13 @@ class RoutingUtils:
|
|||
|
||||
content_filter = Filter(self.session['fernet_keys'], mobile=mobile, config=self.config)
|
||||
full_query = gen_query(self.query, self.request_params, self.config, content_filter.near)
|
||||
get_body = g.user_request.send(query=full_query).text
|
||||
get_body = g.user_request.send(query=full_query)
|
||||
|
||||
# Produce cleanable html soup from response
|
||||
html_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
|
||||
html_soup = BeautifulSoup(content_filter.reskin(get_body.text), 'html.parser')
|
||||
html_soup.insert(0, BeautifulSoup(
|
||||
TOR_BANNER,
|
||||
features='lxml') if g.user_request.tor_valid else BeautifulSoup("", features="lxml"))
|
||||
|
||||
if self.feeling_lucky:
|
||||
return get_first_link(html_soup), 1
|
||||
|
|
7
misc/tor/start-tor.sh
Executable file
7
misc/tor/start-tor.sh
Executable file
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
if [ "$(whoami)" != "root" ]; then
|
||||
tor -f /etc/tor/torrc
|
||||
else
|
||||
service tor start
|
||||
fi
|
8
misc/tor/torrc
Normal file
8
misc/tor/torrc
Normal file
|
@ -0,0 +1,8 @@
|
|||
DataDirectory /var/lib/tor
|
||||
ControlPort 9051
|
||||
CookieAuthentication 1
|
||||
DataDirectoryGroupReadable 1
|
||||
CookieAuthFileGroupReadable 1
|
||||
ExtORPortCookieAuthFileGroupReadable 1
|
||||
CacheDirectoryGroupReadable 1
|
||||
CookieAuthFile /var/lib/tor/control_auth_cookie
|
|
@ -1,20 +1,33 @@
|
|||
attrs==19.3.0
|
||||
beautifulsoup4==4.8.2
|
||||
bs4==0.0.1
|
||||
cachelib==0.1
|
||||
certifi==2020.4.5.1
|
||||
cffi==1.13.2
|
||||
chardet==3.0.4
|
||||
Click==7.0
|
||||
cryptography==3.2
|
||||
Flask==1.1.1
|
||||
Flask-Session==0.3.2
|
||||
idna==2.9
|
||||
itsdangerous==1.1.0
|
||||
Jinja2==2.10.3
|
||||
lxml==4.5.1
|
||||
MarkupSafe==1.1.1
|
||||
more-itertools==8.3.0
|
||||
packaging==20.4
|
||||
pluggy==0.13.1
|
||||
py==1.8.1
|
||||
pycparser==2.19
|
||||
pyOpenSSL==19.1.0
|
||||
pyparsing==2.4.7
|
||||
PySocks==1.7.1
|
||||
pytest==5.4.1
|
||||
python-dateutil==2.8.1
|
||||
requests==2.23.0
|
||||
six==1.14.0
|
||||
soupsieve==1.9.5
|
||||
Werkzeug==0.16.0
|
||||
stem==1.8.0
|
||||
urllib3==1.25.9
|
||||
waitress==1.4.3
|
||||
wcwidth==0.1.9
|
||||
Werkzeug==0.16.0
|
||||
|
|
Loading…
Reference in New Issue
Block a user