Merge remote-tracking branch 'origin/main' into heroku-app

This commit is contained in:
Ben Busby 2021-01-21 12:07:23 -05:00
commit 2de503f5c2
No known key found for this signature in database
GPG Key ID: 3B08611DF6E62ED2
40 changed files with 1330 additions and 518 deletions

9
.github/FUNDING.yml vendored Normal file
View File

@ -0,0 +1,9 @@
# These are supported funding model platforms
github: benbusby
ko_fi: benbusby
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']

26
.github/workflows/buildx.yml vendored Normal file
View File

@ -0,0 +1,26 @@
name: buildx
on:
push:
branches: develop
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: checkout code
uses: actions/checkout@v2
- name: install buildx
id: buildx
uses: crazy-max/ghaction-docker-buildx@v1
with:
version: latest
- name: log in to docker hub
run: |
echo "${{ secrets.DOCKER_PASSWORD }}" | \
docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin
- name: build and push the image
run: |
docker buildx build --push \
--tag benbusby/whoogle-search:buildx-experimental \
--platform linux/amd64,linux/arm/v7,linux/arm64 .

22
.github/workflows/pep8.yml vendored Normal file
View File

@ -0,0 +1,22 @@
name: pep8
on:
push
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pycodestyle
- name: Run pycodestyle
run: |
pycodestyle --show-source --show-pep8 app/*
pycodestyle --show-source --show-pep8 test/*

1
.gitignore vendored
View File

@ -9,6 +9,7 @@ test/static
flask_session/
app/static/config
app/static/custom_config
app/static/bangs
# pip stuff
build/

View File

@ -1,7 +1,16 @@
FROM python:3.8-slim
WORKDIR /usr/src/app
RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev libssl-dev
RUN apt-get update && apt-get install -y \
build-essential \
libcurl4-openssl-dev \
libssl-dev \
libxml2-dev \
libxslt-dev \
libffi-dev \
tor
COPY misc/tor/torrc /etc/tor/torrc
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
@ -17,13 +26,29 @@ ENV WHOOGLE_USER=$username
ARG password=''
ENV WHOOGLE_PASS=$password
ARG proxyuser=''
ENV WHOOGLE_PROXY_USER=$proxyuser
ARG proxypass=''
ENV WHOOGLE_PROXY_PASS=$proxypass
ARG proxytype=''
ENV WHOOGLE_PROXY_TYPE=$proxytype
ARG proxyloc=''
ENV WHOOGLE_PROXY_LOC=$proxyloc
ENV HTTPS_ONLY=$use_https
ARG whoogle_port=5000
ENV EXPOSE_PORT=$whoogle_port
ARG twitter_alt='nitter.net'
ENV WHOOGLE_ALT_TW=$twitter_alt
ARG youtube_alt='invidious.snopyta.org'
ENV WHOOGLE_ALT_YT=$youtube_alt
ARG instagram_alt='bibliogram.art/u'
ENV WHOOGLE_ALT_YT=$instagram_alt
COPY . .
EXPOSE $EXPOSE_PORT
CMD ["./run"]
CMD misc/tor/start-tor.sh & ./run

View File

@ -1,11 +1,11 @@
# Whoogle Search
![Whoogle Search](docs/banner.png)
[![Latest Release](https://img.shields.io/github/v/release/benbusby/whoogle-search)](https://github.com/benbusby/shoogle/releases)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Build Status](https://travis-ci.com/benbusby/whoogle-search.svg?branch=master)](https://travis-ci.com/benbusby/whoogle-search)
[![pep8](https://github.com/benbusby/whoogle-search/workflows/pep8/badge.svg)](https://github.com/benbusby/whoogle-search/actions?query=workflow%3Apep8)
[![codebeat badge](https://codebeat.co/badges/e96cada2-fb6f-4528-8285-7d72abd74e8d)](https://codebeat.co/projects/github-com-benbusby-shoogle-master)
[![Docker Pulls](https://img.shields.io/docker/pulls/benbusby/whoogle-search)](https://hub.docker.com/r/benbusby/whoogle-search)
[![Gitter](https://img.shields.io/gitter/room/benbusby/whoogle-search)](https://gitter.im/whoogle-search/community)
Get Google search results, but without any ads, javascript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile.
@ -13,10 +13,11 @@ Contents
1. [Features](#features)
2. [Dependencies](#dependencies)
3. [Install/Deploy](#install)
4. [Usage](#usage)
5. [Extra Steps](#extra-steps)
6. [FAQ](#faq)
7. [Screenshots](#screenshots)
4. [Environment Variables](#environment-variables)
5. [Usage](#usage)
6. [Extra Steps](#extra-steps)
7. [FAQ](#faq)
8. [Screenshots](#screenshots)
## Features
- No ads or sponsored content
@ -26,16 +27,18 @@ Contents
- No AMP links
- No URL tracking tags (i.e. utm=%s)
- No referrer header
- Tor and HTTP/SOCKS proxy support
- Autocomplete/search suggestions
- POST request search and suggestion queries (when possible)
- View images at full res without site redirect (currently mobile only)
- Dark mode
- Randomly generated User Agent
- Easy to install/deploy
- DDG-style bang (i.e. `!<tag> <query>`) searches
- Optional location-based searching (i.e. results near \<city\>)
- Optional NoJS mode to disable all Javascript in results
<sup>*If deployed to a remote server</sup>
<sup>*If deployed to a remote server, or configured to send requests through a VPN, Tor, proxy, etc.</sup>
## Dependencies
If using Heroku Quick Deploy, **you can skip this section**.
@ -65,7 +68,7 @@ Provides:
[![Run on Repl.it](https://repl.it/badge/github/benbusby/whoogle-search)](https://repl.it/github/benbusby/whoogle-search)
Provides:
- Free deployment of app (can be ran without account)
- Free deployment of app
- Free HTTPS url (https://\<app name\>.\<username\>\.repl\.co)
- Supports custom domains
- Downtime after periods of inactivity \([solution 1](https://repl.it/talk/ask/use-this-pingmat1replco-just-enter/28821/101298), [solution 2](https://repl.it/talk/learn/How-to-use-and-setup-UptimeRobot/9003)\)
@ -96,6 +99,7 @@ optional arguments:
--debug Activates debug mode for the server (default False)
--https-only Enforces HTTPS redirects for all requests (default False)
```
See the [available environment variables](#environment-variables) for additional configuration.
### E) Manual
Clone the repo and run the following commands to start the app in a local-only environment:
@ -108,6 +112,7 @@ source venv/bin/activate
pip install -r requirements.txt
./run
```
See the [available environment variables](#environment-variables) for additional configuration.
#### systemd Configuration
After building the virtual environment, you can add the following to `/lib/systemd/system/whoogle.service` to set up a Whoogle Search systemd service:
@ -117,6 +122,20 @@ After building the virtual environment, you can add the following to `/lib/syste
Description=Whoogle
[Service]
# Basic auth configuration, uncomment to enable
#Environment=WHOOGLE_USER=<username>
#Environment=WHOOGLE_PASS=<password>
# Proxy configuration, uncomment to enable
#Environment=WHOOGLE_PROXY_USER=<proxy username>
#Environment=WHOOGLE_PROXY_PASS=<proxy password>
#Environment=WHOOGLE_PROXY_TYPE=<proxy type (http|proxy4|proxy5)
#Environment=WHOOGLE_PROXY_LOC=<proxy host/ip>
# Site alternative configurations, uncomment to enable
# Note: If not set, the feature will still be available
# with default values.
#Environment=WHOOGLE_ALT_TW=nitter.net
#Environment=WHOOGLE_ALT_YT=invidious.snopyta.org
#Environment=WHOOGLE_ALT_IG=bibliogram.art/u
Type=simple
User=root
WorkingDirectory=<whoogle_directory>
@ -143,6 +162,9 @@ sudo systemctl start whoogle
2. Clone and deploy the docker app using a method below:
#### Docker CLI
***Note:** For ARM machines, use the `buildx-experimental` Docker tag.*
Through Docker Hub:
```bash
docker pull benbusby/whoogle-search
@ -166,6 +188,19 @@ docker build --tag whoogle-search:1.0 .
docker run --publish 5000:5000 --detach --name whoogle-search whoogle-search:1.0
```
Optionally, you can also enable some of the following environment variables to further customize your instance:
```bash
docker run --publish 5000:5000 --detach --name whoogle-search \
-e WHOOGLE_USER=username \
-e WHOOGLE_PASS=password \
-e WHOOGLE_PROXY_USER=username \
-e WHOOGLE_PROXY_PASS=password \
-e WHOOGLE_PROXY_TYPE=socks5 \
-e WHOOGLE_PROXY_LOC=ip \
whoogle-search:1.0
```
And kill with: `docker rm --force whoogle-search`
#### Using [Heroku CLI](https://devcenter.heroku.com/articles/heroku-cli)
@ -181,6 +216,7 @@ heroku open
```
This series of commands can take a while, but once you run it once, you shouldn't have to run it again. The final command, `heroku open` will launch a tab in your web browser, where you can test out Whoogle and even [set it as your primary search engine](https://github.com/benbusby/whoogle#set-whoogle-as-your-primary-search-engine).
You may also edit environment variables from your apps Settings tab in the Heroku Dashboard.
#### Using your own server, or alternative container deployment
There are other methods for deploying docker containers that are well outlined in [this article](https://rollout.io/blog/the-shortlist-of-docker-hosting/), but there are too many to describe set up for each here. Generally it should be about the same amount of effort as the Heroku deployment.
@ -191,6 +227,23 @@ Depending on your preferences, you can also deploy the app yourself on your own
- SSL certificates (free through [Let's Encrypt](https://letsencrypt.org/getting-started/))
- A bit more experience or willingness to work through issues
## Environment Variables
There are a few optional environment variables available for customizing a Whoogle instance:
| Variable | Description |
| ------------------ | -------------------------------------------------------------- |
| WHOOGLE_USER | The username for basic auth. WHOOGLE_PASS must also be set if used. |
| WHOOGLE_PASS | The password for basic auth. WHOOGLE_USER must also be set if used. |
| WHOOGLE_PROXY_USER | The username of the proxy server. |
| WHOOGLE_PROXY_PASS | The password of the proxy server. |
| WHOOGLE_PROXY_TYPE | The type of the proxy server. Can be "socks5", "socks4", or "http". |
| WHOOGLE_PROXY_LOC | The location of the proxy server (host or ip). |
| EXPOSE_PORT | The port where Whoogle will be exposed. |
| HTTPS_ONLY | Enforce HTTPS. (See [here](https://github.com/benbusby/whoogle-search#https-enforcement)) |
| WHOOGLE_ALT_TW | The twitter.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. |
## Usage
Same as most search engines, with the exception of filtering by time range.
@ -256,7 +309,8 @@ Only needed if your setup requires Flask to redirect to HTTPS on its own -- gene
Note: You should have your own domain name and [an https certificate](https://letsencrypt.org/getting-started/) in order for this to work properly.
- Heroku: Ensure that the `Root URL` configuration on the home page begins with `https://` and not `http://`
- Docker: Add `--build-arg use_https=1` to your run command
- Docker build: Add `--build-arg use_https=1` to your run command
- Docker image: Set the environment variable HTTPS_ONLY=1
- Pip/Pipx: Add the `--https-only` flag to the end of the `whoogle-search` command
- Default `run` script: Modify the script locally to include the `--https-only` flag at the end of the python run command
@ -277,7 +331,7 @@ A lot of the app currently piggybacks on Google's existing support for fetching
## Screenshots
#### Desktop
![Whoogle Desktop](app/static/img/docs/screenshot_desktop.jpg)
![Whoogle Desktop](docs/screenshot_desktop.jpg)
#### Mobile
![Whoogle Mobile](app/static/img/docs/screenshot_mobile.jpg)
![Whoogle Mobile](docs/screenshot_mobile.jpg)

View File

@ -1,8 +1,64 @@
{
"name": "Whoogle Search",
"description": "A lightweight, privacy-oriented, containerized Google search proxy for desktop/mobile that removes Javascript, AMP links, tracking, and ads/sponsored content",
"repository": "https://github.com/benbusby/whoogle-search",
"logo": "https://raw.githubusercontent.com/benbusby/whoogle-search/master/app/static/img/favicon/ms-icon-150x150.png",
"keywords": ["search", "metasearch", "flask", "docker", "heroku", "adblock", "degoogle", "privacy"],
"stack": "container"
"name": "Whoogle Search",
"description": "A lightweight, privacy-oriented, containerized Google search proxy for desktop/mobile that removes Javascript, AMP links, tracking, and ads/sponsored content",
"repository": "https://github.com/benbusby/whoogle-search",
"logo": "https://raw.githubusercontent.com/benbusby/whoogle-search/master/app/static/img/favicon/ms-icon-150x150.png",
"keywords": [
"search",
"metasearch",
"flask",
"docker",
"heroku",
"adblock",
"degoogle",
"privacy"
],
"stack": "container",
"env": {
"WHOOGLE_USER": {
"description": "The username for basic auth. WHOOGLE_PASS must also be set if used. Leave empty to disable.",
"value": "",
"required": false
},
"WHOOGLE_PASS": {
"description": "The password for basic auth. WHOOGLE_USER must also be set if used. Leave empty to disable.",
"value": "",
"required": false
},
"WHOOGLE_PROXY_USER": {
"description": "The username of the proxy server. Leave empty to disable.",
"value": "",
"required": false
},
"WHOOGLE_PROXY_PASS": {
"description": "The password of the proxy server. Leave empty to disable.",
"value": "",
"required": false
},
"WHOOGLE_PROXY_TYPE": {
"description": "The type of the proxy server. For example \"socks5\". Leave empty to disable.",
"value": "",
"required": false
},
"WHOOGLE_PROXY_LOC": {
"description": "The location of the proxy server (host or ip). Leave empty to disable.",
"value": "",
"required": false
},
"WHOOGLE_ALT_TW": {
"description": "The site to use as a replacement for twitter.com when site alternatives are enabled in the config.",
"value": "",
"required": false
},
"WHOOGLE_ALT_YT": {
"description": "The site to use as a replacement for youtube.com when site alternatives are enabled in the config.",
"value": "",
"required": false
},
"WHOOGLE_ALT_IG": {
"description": "The site to use as a replacement for instagram.com when site alternatives are enabled in the config.",
"value": "",
"required": false
}
}
}

View File

@ -1,20 +1,45 @@
from app.request import send_tor_signal
from app.utils.session_utils import generate_user_keys
from app.utils.gen_ddg_bangs import gen_bangs_json
from flask import Flask
from flask_session import Session
import json
import os
from stem import Signal
app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static')
app = Flask(__name__, static_folder=os.path.dirname(
os.path.abspath(__file__)) + '/static')
app.user_elements = {}
app.default_key_set = generate_user_keys()
app.no_cookie_ips = []
app.config['SECRET_KEY'] = os.urandom(32)
app.config['SESSION_TYPE'] = 'filesystem'
app.config['VERSION_NUMBER'] = '0.2.1'
app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__)))
app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static'))
app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'config'))
app.config['DEFAULT_CONFIG'] = os.path.join(app.config['CONFIG_PATH'], 'config.json')
app.config['SESSION_FILE_DIR'] = os.path.join(app.config['CONFIG_PATH'], 'session')
app.config['VERSION_NUMBER'] = '0.3.0'
app.config['APP_ROOT'] = os.getenv(
'APP_ROOT',
os.path.dirname(os.path.abspath(__file__)))
app.config['LANGUAGES'] = json.load(open(
os.path.join(app.config['APP_ROOT'], '../misc/languages.json')))
app.config['COUNTRIES'] = json.load(open(
os.path.join(app.config['APP_ROOT'], '../misc/countries.json')))
app.config['STATIC_FOLDER'] = os.getenv(
'STATIC_FOLDER',
os.path.join(app.config['APP_ROOT'], 'static'))
app.config['CONFIG_PATH'] = os.getenv(
'CONFIG_VOLUME',
os.path.join(app.config['STATIC_FOLDER'], 'config'))
app.config['DEFAULT_CONFIG'] = os.path.join(
app.config['CONFIG_PATH'],
'config.json')
app.config['SESSION_FILE_DIR'] = os.path.join(
app.config['CONFIG_PATH'],
'session')
app.config['BANG_PATH'] = os.getenv(
'CONFIG_VOLUME',
os.path.join(app.config['STATIC_FOLDER'], 'bangs'))
app.config['BANG_FILE'] = os.path.join(
app.config['BANG_PATH'],
'bangs.json')
if not os.path.exists(app.config['CONFIG_PATH']):
os.makedirs(app.config['CONFIG_PATH'])
@ -22,6 +47,15 @@ if not os.path.exists(app.config['CONFIG_PATH']):
if not os.path.exists(app.config['SESSION_FILE_DIR']):
os.makedirs(app.config['SESSION_FILE_DIR'])
# Generate DDG bang filter, and create path if it doesn't exist yet
if not os.path.exists(app.config['BANG_PATH']):
os.makedirs(app.config['BANG_PATH'])
if not os.path.exists(app.config['BANG_FILE']):
gen_bangs_json(app.config['BANG_FILE'])
Session(app)
from app import routes
# Attempt to acquire tor identity, to determine if Tor config is available
send_tor_signal(Signal.HEARTBEAT)
from app import routes # noqa

View File

@ -32,20 +32,27 @@ class Filter:
def reskin(self, page):
# Aesthetic only re-skinning
if self.dark:
page = page.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea')
page = page.replace(
'fff', '000').replace(
'202124', 'ddd').replace(
'1967D2', '3b85ea')
return page
def encrypt_path(self, msg, is_element=False):
# Encrypts path to avoid plaintext results in logs
if is_element:
# Element paths are tracked differently in order for the element key to be regenerated
# once all elements have been loaded
enc_path = Fernet(self.user_keys['element_key']).encrypt(msg.encode()).decode()
# Element paths are encrypted separately from text, to allow key
# regeneration once all items have been served to the user
enc_path = Fernet(
self.user_keys['element_key']
).encrypt(msg.encode()).decode()
self._elements += 1
return enc_path
return Fernet(self.user_keys['text_key']).encrypt(msg.encode()).decode()
return Fernet(
self.user_keys['text_key']
).encrypt(msg.encode()).decode()
def clean(self, soup):
self.main_divs = soup.find('div', {'id': 'main'})
@ -75,7 +82,8 @@ class Filter:
footer = soup.find('footer')
if footer:
# Remove divs that have multiple links beyond just page navigation
[_.decompose() for _ in footer.find_all('div', recursive=False) if len(_.find_all('a', href=True)) > 2]
[_.decompose() for _ in footer.find_all('div', recursive=False)
if len(_.find_all('a', href=True)) > 3]
header = soup.find('header')
if header:
@ -88,16 +96,34 @@ class Filter:
return
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
has_ad = len([_ for _ in div.find_all('span', recursive=True) if has_ad_content(_.text)])
_ = div.decompose() if has_ad else None
div_ads = [_ for _ in div.find_all('span', recursive=True)
if has_ad_content(_.text)]
_ = div.decompose() if len(div_ads) else None
def fix_question_section(self):
if not self.main_divs:
return
question_divs = [_ for _ in self.main_divs.find_all('div', recursive=False) if len(_.find_all('h2')) > 0]
question_divs = [_ for _ in self.main_divs.find_all(
'div', recursive=False
) if len(_.find_all('h2')) > 0]
if len(question_divs) == 0:
return
# Wrap section in details element to allow collapse/expand
details = BeautifulSoup(features='html.parser').new_tag('details')
summary = BeautifulSoup(features='html.parser').new_tag('summary')
summary.string = question_divs[0].find('h2').text
question_divs[0].find('h2').decompose()
details.append(summary)
question_divs[0].wrap(details)
for question_div in question_divs:
questions = [_ for _ in question_div.find_all('div', recursive=True) if _.text.endswith('?')]
questions = [_ for _ in question_div.find_all(
'div', recursive=True
) if _.text.endswith('?')]
for question in questions:
question['style'] = 'padding: 10px; font-style: italic;'
@ -107,18 +133,22 @@ class Filter:
element_src = 'https:' + element_src
elif element_src.startswith(LOGO_URL):
# Re-brand with Whoogle logo
element['src'] = '/static/img/logo.png'
element['src'] = 'static/img/logo.png'
element['style'] = 'height:40px;width:162px'
return
elif element_src.startswith(GOOG_IMG):
element['src'] = BLANK_B64
return
element['src'] = '/element?url=' + self.encrypt_path(element_src, is_element=True) + \
'&type=' + urlparse.quote(mime)
# TODO: Non-mobile image results link to website instead of image
element['src'] = 'element?url=' + self.encrypt_path(
element_src,
is_element=True) + '&type=' + urlparse.quote(mime)
# FIXME: Non-mobile image results link to website instead of image
# if not self.mobile:
# img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser'))
# img.append(
# BeautifulSoup(FULL_RES_IMG.format(element_src),
# 'html.parser'))
def update_styling(self, soup):
# Remove unnecessary button(s)
@ -132,8 +162,9 @@ class Filter:
# Update logo
logo = soup.find('a', {'class': 'l'})
if logo and self.mobile:
logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; ' \
'font-size:18px; '
logo['style'] = ('display:flex; justify-content:center; '
'align-items:center; color:#685e79; '
'font-size:18px; ')
# Fix search bar length on mobile
try:
@ -145,8 +176,8 @@ class Filter:
def update_link(self, link):
# Replace href with only the intended destination (no "utm" type tags)
href = link['href'].replace('https://www.google.com', '')
if '/advanced_search' in href or 'tbm=shop' in href:
# TODO: The "Shopping" tab requires further filtering (see #136)
if 'advanced_search' in href or 'tbm=shop' in href:
# FIXME: The "Shopping" tab requires further filtering (see #136)
# Temporarily removing all links to that tab for now.
link.decompose()
return
@ -154,20 +185,26 @@ class Filter:
link['target'] = '_blank'
result_link = urlparse.urlparse(href)
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
query_link = parse_qs(
result_link.query
)['q'][0] if '?q=' in href else ''
if query_link.startswith('/'):
# Internal google links (i.e. mail, maps, etc) should still be forwarded to Google
# Internal google links (i.e. mail, maps, etc) should still
# be forwarded to Google
link['href'] = 'https://google.com' + query_link
elif '/search?q=' in href:
# "li:1" implies the query should be interpreted verbatim, so we wrap it in double quotes
# "li:1" implies the query should be interpreted verbatim,
# which is accomplished by wrapping the query in double quotes
if 'li:1' in href:
query_link = '"' + query_link + '"'
new_search = '/search?q=' + self.encrypt_path(query_link)
new_search = 'search?q=' + self.encrypt_path(query_link)
query_params = parse_qs(urlparse.urlparse(href).query)
for param in VALID_PARAMS:
param_val = query_params[param][0] if param in query_params else ''
if param not in query_params:
continue
param_val = query_params[param][0]
new_search += '&' + param + '=' + param_val
link['href'] = new_search
elif 'url?q=' in href:
@ -182,9 +219,11 @@ class Filter:
# Replace link location if "alts" config is enabled
if self.alt_redirect:
# Search and replace all link descriptions with alternative location
# Search and replace all link descriptions
# with alternative location
link['href'] = get_site_alt(link['href'])
link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys())))
link_desc = link.find_all(
text=re.compile('|'.join(SITE_ALTS.keys())))
if len(link_desc) == 0:
return

View File

@ -1,302 +1,4 @@
class Config:
# Derived from here:
# https://sites.google.com/site/tomihasa/google-language-codes#searchlanguage
LANGUAGES = [
{'name': 'Default (none specified)', 'value': ''},
{'name': 'English', 'value': 'lang_en'},
{'name': 'Afrikaans', 'value': 'lang_af'},
{'name': 'Arabic', 'value': 'lang_ar'},
{'name': 'Armenian', 'value': 'lang_hy'},
{'name': 'Belarusian', 'value': 'lang_be'},
{'name': 'Bulgarian', 'value': 'lang_bg'},
{'name': 'Catalan', 'value': 'lang_ca'},
{'name': 'Chinese (Simplified)', 'value': 'lang_zh-CN'},
{'name': 'Chinese (Traditional)', 'value': 'lang_zh-TW'},
{'name': 'Croatian', 'value': 'lang_hr'},
{'name': 'Czech', 'value': 'lang_cs'},
{'name': 'Danish', 'value': 'lang_da'},
{'name': 'Dutch', 'value': 'lang_nl'},
{'name': 'Esperanto', 'value': 'lang_eo'},
{'name': 'Estonian', 'value': 'lang_et'},
{'name': 'Filipino', 'value': 'lang_tl'},
{'name': 'Finnish', 'value': 'lang_fi'},
{'name': 'French', 'value': 'lang_fr'},
{'name': 'German', 'value': 'lang_de'},
{'name': 'Greek', 'value': 'lang_el'},
{'name': 'Hebrew', 'value': 'lang_iw'},
{'name': 'Hindi', 'value': 'lang_hi'},
{'name': 'Hungarian', 'value': 'lang_hu'},
{'name': 'Icelandic', 'value': 'lang_is'},
{'name': 'Indonesian', 'value': 'lang_id'},
{'name': 'Italian', 'value': 'lang_it'},
{'name': 'Japanese', 'value': 'lang_ja'},
{'name': 'Korean', 'value': 'lang_ko'},
{'name': 'Latvian', 'value': 'lang_lv'},
{'name': 'Lithuanian', 'value': 'lang_lt'},
{'name': 'Norwegian', 'value': 'lang_no'},
{'name': 'Persian', 'value': 'lang_fa'},
{'name': 'Polish', 'value': 'lang_pl'},
{'name': 'Portuguese', 'value': 'lang_pt'},
{'name': 'Romanian', 'value': 'lang_ro'},
{'name': 'Russian', 'value': 'lang_ru'},
{'name': 'Serbian', 'value': 'lang_sr'},
{'name': 'Slovak', 'value': 'lang_sk'},
{'name': 'Slovenian', 'value': 'lang_sl'},
{'name': 'Spanish', 'value': 'lang_es'},
{'name': 'Swahili', 'value': 'lang_sw'},
{'name': 'Swedish', 'value': 'lang_sv'},
{'name': 'Thai', 'value': 'lang_th'},
{'name': 'Turkish', 'value': 'lang_tr'},
{'name': 'Ukrainian', 'value': 'lang_uk'},
{'name': 'Vietnamese', 'value': 'lang_vi'},
]
COUNTRIES = [
{'name': 'Default (none)', 'value': ''},
{'name': 'Afghanistan', 'value': 'countryAF'},
{'name': 'Albania', 'value': 'countryAL'},
{'name': 'Algeria', 'value': 'countryDZ'},
{'name': 'American Samoa', 'value': 'countryAS'},
{'name': 'Andorra', 'value': 'countryAD'},
{'name': 'Angola', 'value': 'countryAO'},
{'name': 'Anguilla', 'value': 'countryAI'},
{'name': 'Antarctica', 'value': 'countryAQ'},
{'name': 'Antigua and Barbuda', 'value': 'countryAG'},
{'name': 'Argentina', 'value': 'countryAR'},
{'name': 'Armenia', 'value': 'countryAM'},
{'name': 'Aruba', 'value': 'countryAW'},
{'name': 'Australia', 'value': 'countryAU'},
{'name': 'Austria', 'value': 'countryAT'},
{'name': 'Azerbaijan', 'value': 'countryAZ'},
{'name': 'Bahamas', 'value': 'countryBS'},
{'name': 'Bahrain', 'value': 'countryBH'},
{'name': 'Bangladesh', 'value': 'countryBD'},
{'name': 'Barbados', 'value': 'countryBB'},
{'name': 'Belarus', 'value': 'countryBY'},
{'name': 'Belgium', 'value': 'countryBE'},
{'name': 'Belize', 'value': 'countryBZ'},
{'name': 'Benin', 'value': 'countryBJ'},
{'name': 'Bermuda', 'value': 'countryBM'},
{'name': 'Bhutan', 'value': 'countryBT'},
{'name': 'Bolivia', 'value': 'countryBO'},
{'name': 'Bosnia and Herzegovina', 'value': 'countryBA'},
{'name': 'Botswana', 'value': 'countryBW'},
{'name': 'Bouvet Island', 'value': 'countryBV'},
{'name': 'Brazil', 'value': 'countryBR'},
{'name': 'British Indian Ocean Territory', 'value': 'countryIO'},
{'name': 'Brunei Darussalam', 'value': 'countryBN'},
{'name': 'Bulgaria', 'value': 'countryBG'},
{'name': 'Burkina Faso', 'value': 'countryBF'},
{'name': 'Burundi', 'value': 'countryBI'},
{'name': 'Cambodia', 'value': 'countryKH'},
{'name': 'Cameroon', 'value': 'countryCM'},
{'name': 'Canada', 'value': 'countryCA'},
{'name': 'Cape Verde', 'value': 'countryCV'},
{'name': 'Cayman Islands', 'value': 'countryKY'},
{'name': 'Central African Republic', 'value': 'countryCF'},
{'name': 'Chad', 'value': 'countryTD'},
{'name': 'Chile', 'value': 'countryCL'},
{'name': 'China', 'value': 'countryCN'},
{'name': 'Christmas Island', 'value': 'countryCX'},
{'name': 'Cocos (Keeling) Islands', 'value': 'countryCC'},
{'name': 'Colombia', 'value': 'countryCO'},
{'name': 'Comoros', 'value': 'countryKM'},
{'name': 'Congo', 'value': 'countryCG'},
{'name': 'Congo, Democratic Republic of the', 'value': 'countryCD'},
{'name': 'Cook Islands', 'value': 'countryCK'},
{'name': 'Costa Rica', 'value': 'countryCR'},
{'name': 'Cote D\'ivoire', 'value': 'countryCI'},
{'name': 'Croatia (Hrvatska)', 'value': 'countryHR'},
{'name': 'Cuba', 'value': 'countryCU'},
{'name': 'Cyprus', 'value': 'countryCY'},
{'name': 'Czech Republic', 'value': 'countryCZ'},
{'name': 'Denmark', 'value': 'countryDK'},
{'name': 'Djibouti', 'value': 'countryDJ'},
{'name': 'Dominica', 'value': 'countryDM'},
{'name': 'Dominican Republic', 'value': 'countryDO'},
{'name': 'East Timor', 'value': 'countryTP'},
{'name': 'Ecuador', 'value': 'countryEC'},
{'name': 'Egypt', 'value': 'countryEG'},
{'name': 'El Salvador', 'value': 'countrySV'},
{'name': 'Equatorial Guinea', 'value': 'countryGQ'},
{'name': 'Eritrea', 'value': 'countryER'},
{'name': 'Estonia', 'value': 'countryEE'},
{'name': 'Ethiopia', 'value': 'countryET'},
{'name': 'European Union', 'value': 'countryEU'},
{'name': 'Falkland Islands (Malvinas)', 'value': 'countryFK'},
{'name': 'Faroe Islands', 'value': 'countryFO'},
{'name': 'Fiji', 'value': 'countryFJ'},
{'name': 'Finland', 'value': 'countryFI'},
{'name': 'France', 'value': 'countryFR'},
{'name': 'France\, Metropolitan', 'value': 'countryFX'},
{'name': 'French Guiana', 'value': 'countryGF'},
{'name': 'French Polynesia', 'value': 'countryPF'},
{'name': 'French Southern Territories', 'value': 'countryTF'},
{'name': 'Gabon', 'value': 'countryGA'},
{'name': 'Gambia', 'value': 'countryGM'},
{'name': 'Georgia', 'value': 'countryGE'},
{'name': 'Germany', 'value': 'countryDE'},
{'name': 'Ghana', 'value': 'countryGH'},
{'name': 'Gibraltar', 'value': 'countryGI'},
{'name': 'Greece', 'value': 'countryGR'},
{'name': 'Greenland', 'value': 'countryGL'},
{'name': 'Grenada', 'value': 'countryGD'},
{'name': 'Guadeloupe', 'value': 'countryGP'},
{'name': 'Guam', 'value': 'countryGU'},
{'name': 'Guatemala', 'value': 'countryGT'},
{'name': 'Guinea', 'value': 'countryGN'},
{'name': 'Guinea-Bissau', 'value': 'countryGW'},
{'name': 'Guyana', 'value': 'countryGY'},
{'name': 'Haiti', 'value': 'countryHT'},
{'name': 'Heard Island and Mcdonald Islands', 'value': 'countryHM'},
{'name': 'Holy See (Vatican City State)', 'value': 'countryVA'},
{'name': 'Honduras', 'value': 'countryHN'},
{'name': 'Hong Kong', 'value': 'countryHK'},
{'name': 'Hungary', 'value': 'countryHU'},
{'name': 'Iceland', 'value': 'countryIS'},
{'name': 'India', 'value': 'countryIN'},
{'name': 'Indonesia', 'value': 'countryID'},
{'name': 'Iran, Islamic Republic of', 'value': 'countryIR'},
{'name': 'Iraq', 'value': 'countryIQ'},
{'name': 'Ireland', 'value': 'countryIE'},
{'name': 'Israel', 'value': 'countryIL'},
{'name': 'Italy', 'value': 'countryIT'},
{'name': 'Jamaica', 'value': 'countryJM'},
{'name': 'Japan', 'value': 'countryJP'},
{'name': 'Jordan', 'value': 'countryJO'},
{'name': 'Kazakhstan', 'value': 'countryKZ'},
{'name': 'Kenya', 'value': 'countryKE'},
{'name': 'Kiribati', 'value': 'countryKI'},
{'name': 'Korea, Democratic People\'s Republic of', 'value': 'countryKP'},
{'name': 'Korea, Republic of', 'value': 'countryKR'},
{'name': 'Kuwait', 'value': 'countryKW'},
{'name': 'Kyrgyzstan', 'value': 'countryKG'},
{'name': 'Lao People\'s Democratic Republic', 'value': 'countryLA'},
{'name': 'Latvia', 'value': 'countryLV'},
{'name': 'Lebanon', 'value': 'countryLB'},
{'name': 'Lesotho', 'value': 'countryLS'},
{'name': 'Liberia', 'value': 'countryLR'},
{'name': 'Libyan Arab Jamahiriya', 'value': 'countryLY'},
{'name': 'Liechtenstein', 'value': 'countryLI'},
{'name': 'Lithuania', 'value': 'countryLT'},
{'name': 'Luxembourg', 'value': 'countryLU'},
{'name': 'Macao', 'value': 'countryMO'},
{'name': 'Macedonia, the Former Yugosalv Republic of', 'value': 'countryMK'},
{'name': 'Madagascar', 'value': 'countryMG'},
{'name': 'Malawi', 'value': 'countryMW'},
{'name': 'Malaysia', 'value': 'countryMY'},
{'name': 'Maldives', 'value': 'countryMV'},
{'name': 'Mali', 'value': 'countryML'},
{'name': 'Malta', 'value': 'countryMT'},
{'name': 'Marshall Islands', 'value': 'countryMH'},
{'name': 'Martinique', 'value': 'countryMQ'},
{'name': 'Mauritania', 'value': 'countryMR'},
{'name': 'Mauritius', 'value': 'countryMU'},
{'name': 'Mayotte', 'value': 'countryYT'},
{'name': 'Mexico', 'value': 'countryMX'},
{'name': 'Micronesia, Federated States of', 'value': 'countryFM'},
{'name': 'Moldova, Republic of', 'value': 'countryMD'},
{'name': 'Monaco', 'value': 'countryMC'},
{'name': 'Mongolia', 'value': 'countryMN'},
{'name': 'Montserrat', 'value': 'countryMS'},
{'name': 'Morocco', 'value': 'countryMA'},
{'name': 'Mozambique', 'value': 'countryMZ'},
{'name': 'Myanmar', 'value': 'countryMM'},
{'name': 'Namibia', 'value': 'countryNA'},
{'name': 'Nauru', 'value': 'countryNR'},
{'name': 'Nepal', 'value': 'countryNP'},
{'name': 'Netherlands', 'value': 'countryNL'},
{'name': 'Netherlands Antilles', 'value': 'countryAN'},
{'name': 'New Caledonia', 'value': 'countryNC'},
{'name': 'New Zealand', 'value': 'countryNZ'},
{'name': 'Nicaragua', 'value': 'countryNI'},
{'name': 'Niger', 'value': 'countryNE'},
{'name': 'Nigeria', 'value': 'countryNG'},
{'name': 'Niue', 'value': 'countryNU'},
{'name': 'Norfolk Island', 'value': 'countryNF'},
{'name': 'Northern Mariana Islands', 'value': 'countryMP'},
{'name': 'Norway', 'value': 'countryNO'},
{'name': 'Oman', 'value': 'countryOM'},
{'name': 'Pakistan', 'value': 'countryPK'},
{'name': 'Palau', 'value': 'countryPW'},
{'name': 'Palestinian Territory', 'value': 'countryPS'},
{'name': 'Panama', 'value': 'countryPA'},
{'name': 'Papua New Guinea', 'value': 'countryPG'},
{'name': 'Paraguay', 'value': 'countryPY'},
{'name': 'Peru', 'value': 'countryPE'},
{'name': 'Philippines', 'value': 'countryPH'},
{'name': 'Pitcairn', 'value': 'countryPN'},
{'name': 'Poland', 'value': 'countryPL'},
{'name': 'Portugal', 'value': 'countryPT'},
{'name': 'Puerto Rico', 'value': 'countryPR'},
{'name': 'Qatar', 'value': 'countryQA'},
{'name': 'Reunion', 'value': 'countryRE'},
{'name': 'Romania', 'value': 'countryRO'},
{'name': 'Russian Federation', 'value': 'countryRU'},
{'name': 'Rwanda', 'value': 'countryRW'},
{'name': 'Saint Helena', 'value': 'countrySH'},
{'name': 'Saint Kitts and Nevis', 'value': 'countryKN'},
{'name': 'Saint Lucia', 'value': 'countryLC'},
{'name': 'Saint Pierre and Miquelon', 'value': 'countryPM'},
{'name': 'Saint Vincent and the Grenadines', 'value': 'countryVC'},
{'name': 'Samoa', 'value': 'countryWS'},
{'name': 'San Marino', 'value': 'countrySM'},
{'name': 'Sao Tome and Principe', 'value': 'countryST'},
{'name': 'Saudi Arabia', 'value': 'countrySA'},
{'name': 'Senegal', 'value': 'countrySN'},
{'name': 'Serbia and Montenegro', 'value': 'countryCS'},
{'name': 'Seychelles', 'value': 'countrySC'},
{'name': 'Sierra Leone', 'value': 'countrySL'},
{'name': 'Singapore', 'value': 'countrySG'},
{'name': 'Slovakia', 'value': 'countrySK'},
{'name': 'Slovenia', 'value': 'countrySI'},
{'name': 'Solomon Islands', 'value': 'countrySB'},
{'name': 'Somalia', 'value': 'countrySO'},
{'name': 'South Africa', 'value': 'countryZA'},
{'name': 'South Georgia and the South Sandwich Islands', 'value': 'countryGS'},
{'name': 'Spain', 'value': 'countryES'},
{'name': 'Sri Lanka', 'value': 'countryLK'},
{'name': 'Sudan', 'value': 'countrySD'},
{'name': 'Suriname', 'value': 'countrySR'},
{'name': 'Svalbard and Jan Mayen', 'value': 'countrySJ'},
{'name': 'Swaziland', 'value': 'countrySZ'},
{'name': 'Sweden', 'value': 'countrySE'},
{'name': 'Switzerland', 'value': 'countryCH'},
{'name': 'Syrian Arab Republic', 'value': 'countrySY'},
{'name': 'Taiwan, Province of China', 'value': 'countryTW'},
{'name': 'Tajikistan', 'value': 'countryTJ'},
{'name': 'Tanzania, United Republic of', 'value': 'countryTZ'},
{'name': 'Thailand', 'value': 'countryTH'},
{'name': 'Togo', 'value': 'countryTG'},
{'name': 'Tokelau', 'value': 'countryTK'},
{'name': 'Tonga', 'value': 'countryTO'},
{'name': 'Trinidad and Tobago', 'value': 'countryTT'},
{'name': 'Tunisia', 'value': 'countryTN'},
{'name': 'Turkey', 'value': 'countryTR'},
{'name': 'Turkmenistan', 'value': 'countryTM'},
{'name': 'Turks and Caicos Islands', 'value': 'countryTC'},
{'name': 'Tuvalu', 'value': 'countryTV'},
{'name': 'Uganda', 'value': 'countryUG'},
{'name': 'Ukraine', 'value': 'countryUA'},
{'name': 'United Arab Emirates', 'value': 'countryAE'},
{'name': 'United Kingdom', 'value': 'countryUK'},
{'name': 'United States', 'value': 'countryUS'},
{'name': 'United States Minor Outlying Islands', 'value': 'countryUM'},
{'name': 'Uruguay', 'value': 'countryUY'},
{'name': 'Uzbekistan', 'value': 'countryUZ'},
{'name': 'Vanuatu', 'value': 'countryVU'},
{'name': 'Venezuela', 'value': 'countryVE'},
{'name': 'Vietnam', 'value': 'countryVN'},
{'name': 'Virgin Islands, British', 'value': 'countryVG'},
{'name': 'Virgin Islands, U.S.', 'value': 'countryVI'},
{'name': 'Wallis and Futuna', 'value': 'countryWF'},
{'name': 'Western Sahara', 'value': 'countryEH'},
{'name': 'Yemen', 'value': 'countryYE'},
{'name': 'Yugoslavia', 'value': 'countryYU'},
{'name': 'Zambia', 'value': 'countryZM'},
{'name': 'Zimbabwe', 'value': 'countryZW'}
]
def __init__(self, **kwargs):
self.url = ''
self.lang_search = ''
@ -305,10 +7,17 @@ class Config:
self.safe = False
self.dark = False
self.nojs = False
self.tor = False
self.near = ''
self.alts = False
self.new_tab = False
self.get_only = False
self.safe_keys = [
'lang_search',
'lang_interface',
'ctry',
'dark'
]
for key, value in kwargs.items():
setattr(self, key, value)
@ -324,3 +33,34 @@ class Config:
def __contains__(self, name):
return hasattr(self, name)
def is_safe_key(self, key) -> bool:
"""Establishes a group of config options that are safe to set
in the url.
Args:
key (str) -- the key to check against
Returns:
bool -- True/False depending on if the key is in the "safe"
array
"""
return key in self.safe_keys
def from_params(self, params) -> 'Config':
"""Modify user config with search parameters. This is primarily
used for specifying configuration on a search-by-search basis on
public instances.
Args:
params -- the url arguments (can be any deemed safe by is_safe())
Returns:
Config -- a modified config object
"""
for param_key in params.keys():
if not self.is_safe_key(param_key):
continue
self[param_key] = params.get(param_key)
return self

View File

@ -1,12 +1,16 @@
from lxml import etree
from app.models.config import Config
import xml.etree.ElementTree as ET
import random
import requests
from requests import Response
from requests import Response, ConnectionError
import urllib.parse as urlparse
import os
from stem import Signal, SocketError
from stem.control import Controller
# Core Google search URLs
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
AUTOCOMPLETE_URL = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
AUTOCOMPLETE_URL = ('https://suggestqueries.google.com/'
'complete/search?client=toolbar&')
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
@ -15,7 +19,36 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr']
def gen_user_agent(is_mobile):
class TorError(Exception):
"""Exception raised for errors in Tor requests.
Attributes:
message -- a message describing the error that occurred
disable -- optionally disables Tor in the user config (note:
this should only happen if the connection has been dropped
altogether).
"""
def __init__(self, message, disable=False):
self.message = message
self.disable = disable
super().__init__(self.message)
def send_tor_signal(signal: Signal) -> bool:
try:
with Controller.from_port(port=9051) as c:
c.authenticate()
c.signal(signal)
os.environ['TOR_AVAILABLE'] = '1'
return True
except (SocketError, ConnectionRefusedError, ConnectionError):
os.environ['TOR_AVAILABLE'] = '0'
return False
def gen_user_agent(is_mobile) -> str:
mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla'
firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox'
linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux'
@ -26,7 +59,7 @@ def gen_user_agent(is_mobile):
return DESKTOP_UA.format(mozilla, linux, firefox)
def gen_query(query, args, config, near_city=None):
def gen_query(query, args, config, near_city=None) -> str:
param_dict = {key: '' for key in VALID_PARAMS}
# Use :past(hour/day/week/month/year) if available
@ -39,11 +72,16 @@ def gen_query(query, args, config, near_city=None):
result_tbs = args.get('tbs')
param_dict['tbs'] = '&tbs=' + result_tbs
# Occasionally the 'tbs' param provided by google also contains a field for 'lr', but formatted
# strangely. This is a (admittedly not very elegant) solution for this.
# Ex/ &tbs=qdr:h,lr:lang_1pl --> the lr param needs to be extracted and have the "1" digit removed in this case
# Occasionally the 'tbs' param provided by google also contains a
# field for 'lr', but formatted strangely. This is a rough solution
# for this.
#
# Example:
# &tbs=qdr:h,lr:lang_1pl
# -- the lr param needs to be extracted and remove the leading '1'
sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _]
sub_lang = sub_lang[0][sub_lang[0].find('lr:') + 3:len(sub_lang[0])] if len(sub_lang) > 0 else ''
sub_lang = sub_lang[0][sub_lang[0].find('lr:') +
3:len(sub_lang[0])] if len(sub_lang) > 0 else ''
# Ensure search query is parsable
query = urlparse.quote(query)
@ -60,20 +98,26 @@ def gen_query(query, args, config, near_city=None):
if near_city:
param_dict['near'] = '&near=' + urlparse.quote(near_city)
# Set language for results (lr) if source isn't set, otherwise use the result
# language param provided by google (but with the strange digit(s) removed)
# Set language for results (lr) if source isn't set, otherwise use the
# result language param provided in the results
if 'source' in args:
param_dict['source'] = '&source=' + args.get('source')
param_dict['lr'] = ('&lr=' + ''.join([_ for _ in sub_lang if not _.isdigit()])) if sub_lang else ''
param_dict['lr'] = ('&lr=' + ''.join(
[_ for _ in sub_lang if not _.isdigit()]
)) if sub_lang else ''
else:
param_dict['lr'] = ('&lr=' + config.lang_search) if config.lang_search else ''
param_dict['lr'] = (
'&lr=' + config.lang_search
) if config.lang_search else ''
# Set autocorrected search ignore
# 'nfpr' defines the exclusion of results from an auto-corrected query
if 'nfpr' in args:
param_dict['nfpr'] = '&nfpr=' + args.get('nfpr')
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
param_dict['hl'] = ('&hl=' + config.lang_interface.replace('lang_', '')) if config.lang_interface else ''
param_dict['hl'] = (
'&hl=' + config.lang_interface.replace('lang_', '')
) if config.lang_interface else ''
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
for val in param_dict.values():
@ -85,27 +129,117 @@ def gen_query(query, args, config, near_city=None):
class Request:
def __init__(self, normal_ua, language='lang_en'):
self.language = language
"""Class used for handling all outbound requests, including search queries,
search suggestions, and loading of external content (images, audio, etc).
Attributes:
normal_ua -- the user's current user agent
root_path -- the root path of the whoogle instance
config -- the user's current whoogle configuration
"""
def __init__(self, normal_ua, root_path, config: Config):
# Send heartbeat to Tor, used in determining if the user can or cannot
# enable Tor for future requests
send_tor_signal(Signal.HEARTBEAT)
self.language = config.lang_search
self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
self.modified_user_agent = gen_user_agent(self.mobile)
# Set up proxy, if previously configured
if os.environ.get('WHOOGLE_PROXY_LOC'):
auth_str = ''
if os.environ.get('WHOOGLE_PROXY_USER'):
auth_str = os.environ.get('WHOOGLE_PROXY_USER') + \
':' + os.environ.get('WHOOGLE_PROXY_PASS')
self.proxies = {
'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' +
auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC'),
}
self.proxies['https'] = self.proxies['http'].replace('http',
'https')
else:
self.proxies = {
'http': 'socks5://127.0.0.1:9050',
'https': 'socks5://127.0.0.1:9050'
} if config.tor else {}
self.tor = config.tor
self.tor_valid = False
self.root_path = root_path
def __getitem__(self, name):
return getattr(self, name)
def autocomplete(self, query):
def autocomplete(self, query) -> list:
"""Sends a query to Google's search suggestion service
Args:
query: The in-progress query to send
Returns:
list: The list of matches for possible search suggestions
"""
ac_query = dict(hl=self.language, q=query)
response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text
response = self.send(base_url=AUTOCOMPLETE_URL,
query=urlparse.urlencode(ac_query)).text
if response:
dom = etree.fromstring(response)
return dom.xpath('//suggestion/@data')
if not response:
return []
return []
root = ET.fromstring(response)
return [_.attrib['data'] for _ in
root.findall('.//suggestion/[@data]')]
def send(self, base_url=SEARCH_URL, query='') -> Response:
def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response:
"""Sends an outbound request to a URL. Optionally sends the request
using Tor, if enabled by the user.
Args:
base_url: The URL to use in the request
query: The optional query string for the request
attempt: The number of attempts made for the request
(used for cycling through Tor identities, if enabled)
Returns:
Response: The Response object returned by the requests call
"""
headers = {
'User-Agent': self.modified_user_agent
}
return requests.get(base_url + query, headers=headers)
# Validate Tor conn and request new identity if the last one failed
if self.tor and not send_tor_signal(
Signal.NEWNYM if attempt > 0 else Signal.HEARTBEAT):
raise TorError(
"Tor was previously enabled, but the connection has been "
"dropped. Please check your Tor configuration and try again.",
disable=True)
# Make sure that the tor connection is valid, if enabled
if self.tor:
tor_check = requests.get('https://check.torproject.org/',
proxies=self.proxies, headers=headers)
self.tor_valid = 'Congratulations' in tor_check.text
if not self.tor_valid:
raise TorError(
"Tor connection succeeded, but the connection could not "
"be validated by torproject.org",
disable=True)
response = requests.get(
base_url + query,
proxies=self.proxies,
headers=headers)
# Retry query with new identity if using Tor (max 10 attempts)
if 'form id="captcha-form"' in response.text and self.tor:
attempt += 1
if attempt > 10:
raise TorError("Tor query failed -- max attempts exceeded 10")
return self.send(base_url, query, attempt)
return response

View File

@ -9,15 +9,19 @@ import uuid
from functools import wraps
import waitress
from flask import jsonify, make_response, request, redirect, render_template, send_file, session
from flask import jsonify, make_response, request, redirect, render_template, \
send_file, session, url_for
from requests import exceptions
from app import app
from app.models.config import Config
from app.request import Request
from app.request import Request, TorError
from app.utils.session_utils import valid_user_session
from app.utils.routing_utils import *
# Load DDG bang json files only on init
bang_json = json.load(open(app.config['BANG_FILE']))
def auth_required(f):
@wraps(f)
@ -27,23 +31,30 @@ def auth_required(f):
# Skip if username/password not set
whoogle_user = os.getenv('WHOOGLE_USER', '')
whoogle_pass = os.getenv('WHOOGLE_PASS', '')
if (not whoogle_user or not whoogle_pass) or \
(auth and whoogle_user == auth.username and whoogle_pass == auth.password):
if (not whoogle_user or not whoogle_pass) or (
auth
and whoogle_user == auth.username
and whoogle_pass == auth.password):
return f(*args, **kwargs)
else:
return make_response('Not logged in', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'})
return make_response('Not logged in', 401, {
'WWW-Authenticate': 'Basic realm="Login Required"'})
return decorated
@app.before_request
def before_request_func():
g.request_params = request.args if request.method == 'GET' else request.form
g.request_params = (
request.args if request.method == 'GET' else request.form
)
g.cookies_disabled = False
# Generate session values for user if unavailable
if not valid_user_session(session):
session['config'] = json.load(open(app.config['DEFAULT_CONFIG'])) \
if os.path.exists(app.config['DEFAULT_CONFIG']) else {'url': request.url_root}
if os.path.exists(app.config['DEFAULT_CONFIG']) else {
'url': request.url_root}
session['uuid'] = str(uuid.uuid4())
session['fernet_keys'] = generate_user_keys(True)
@ -54,18 +65,28 @@ def before_request_func():
if session['uuid'] not in app.user_elements:
app.user_elements.update({session['uuid']: 0})
# Always redirect to https if HTTPS_ONLY is set (otherwise default to False)
# Handle https upgrade
https_only = os.getenv('HTTPS_ONLY', False)
is_heroku = request.url.endswith('.herokuapp.com')
is_http = request.url.startswith('http://')
if (is_heroku and is_http) or (https_only and is_http):
return redirect(
request.url.replace('http://', 'https://', 1),
code=308)
if https_only and request.url.startswith('http://'):
return redirect(request.url.replace('http://', 'https://', 1), code=308)
g.user_config = Config(**session['config'])
if not g.user_config.url:
g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root
g.user_config.url = request.url_root.replace(
'http://',
'https://') if https_only else request.url_root
g.user_request = Request(
request.headers.get('User-Agent'),
request.url_root,
config=g.user_config)
g.user_request = Request(request.headers.get('User-Agent'), language=g.user_config.lang_search)
g.app_location = g.user_config.url
@ -73,13 +94,17 @@ def before_request_func():
def after_request_func(response):
if app.user_elements[session['uuid']] <= 0 and '/element' in request.url:
# Regenerate element key if all elements have been served to user
session['fernet_keys']['element_key'] = '' if not g.cookies_disabled else app.default_key_set['element_key']
session['fernet_keys'][
'element_key'] = '' if not g.cookies_disabled else \
app.default_key_set['element_key']
app.user_elements[session['uuid']] = 0
# Check if address consistently has cookies blocked, in which case start removing session
# files after creation.
# Note: This is primarily done to prevent overpopulation of session directories, since browsers that
# block cookies will still trigger Flask's session creation routine with every request.
# Check if address consistently has cookies blocked,
# in which case start removing session files after creation.
#
# Note: This is primarily done to prevent overpopulation of session
# directories, since browsers that block cookies will still trigger
# Flask's session creation routine with every request.
if g.cookies_disabled and request.remote_addr not in app.no_cookie_ips:
app.no_cookie_ips.append(request.remote_addr)
elif g.cookies_disabled and request.remote_addr in app.no_cookie_ips:
@ -92,6 +117,7 @@ def after_request_func(response):
@app.errorhandler(404)
def unknown_page(e):
app.logger.warn(e)
return redirect(g.app_location)
@ -100,11 +126,16 @@ def unknown_page(e):
def index():
# Reset keys
session['fernet_keys'] = generate_user_keys(g.cookies_disabled)
error_message = session[
'error_message'] if 'error_message' in session else ''
session['error_message'] = ''
return render_template('index.html',
languages=Config.LANGUAGES,
countries=Config.COUNTRIES,
languages=app.config['LANGUAGES'],
countries=app.config['COUNTRIES'],
config=g.user_config,
error_message=error_message,
tor_available=int(os.environ.get('TOR_AVAILABLE')),
version_number=app.config['VERSION_NUMBER'])
@ -115,23 +146,43 @@ def opensearch():
if opensearch_url.endswith('/'):
opensearch_url = opensearch_url[:-1]
get_only = g.user_config.get_only or 'Chrome' in request.headers.get(
'User-Agent')
return render_template(
'opensearch.xml',
main_url=opensearch_url,
request_type='' if g.user_config.get_only else 'method="post"'
request_type='' if get_only else 'method="post"'
), 200, {'Content-Disposition': 'attachment; filename="opensearch.xml"'}
@app.route('/autocomplete', methods=['GET', 'POST'])
def autocomplete():
q = g.request_params.get('q')
if not q:
# FF will occasionally (incorrectly) send the q field without a
# mimetype in the format "b'q=<query>'" through the request.data field
q = str(request.data).replace('q=', '')
# Search bangs if the query begins with "!", but not "! " (feeling lucky)
if q.startswith('!') and len(q) > 1 and not q.startswith('! '):
return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if
_.startswith(q)]])
if not q and not request.data:
return jsonify({'?': []})
elif request.data:
q = urlparse.unquote_plus(request.data.decode('utf-8').replace('q=', ''))
q = urlparse.unquote_plus(
request.data.decode('utf-8').replace('q=', ''))
return jsonify([q, g.user_request.autocomplete(q)])
# Return a list of suggestions for the query
#
# Note: If Tor is enabled, this returns nothing, as the request is
# almost always rejected
return jsonify([
q,
g.user_request.autocomplete(q) if not g.user_config.tor else []
])
@app.route('/search', methods=['GET', 'POST'])
@ -140,19 +191,36 @@ def search():
# Reset element counter
app.user_elements[session['uuid']] = 0
search_util = RoutingUtils(request, g.user_config, session, cookies_disabled=g.cookies_disabled)
# Update user config if specified in search args
g.user_config = g.user_config.from_params(g.request_params)
search_util = RoutingUtils(request, g.user_config, session,
cookies_disabled=g.cookies_disabled)
query = search_util.new_search_query()
resolved_bangs = search_util.bang_operator(bang_json)
if resolved_bangs != '':
return redirect(resolved_bangs)
# Redirect to home if invalid/blank search
if not query:
return redirect('/')
# Generate response and number of external elements from the page
response, elements = search_util.generate_response()
if search_util.feeling_lucky:
try:
response, elements = search_util.generate_response()
except TorError as e:
session['error_message'] = e.message + (
"\\n\\nTor config is now disabled!" if e.disable else "")
session['config']['tor'] = False if e.disable else session['config'][
'tor']
return redirect(url_for('.index'))
if search_util.feeling_lucky or elements < 0:
return redirect(response, code=303)
# Keep count of external elements to fetch before element key can be regenerated
# Keep count of external elements to fetch before
# the element key can be regenerated
app.user_elements[session['uuid']] = elements
return render_template(
@ -162,12 +230,13 @@ def search():
dark_mode=g.user_config.dark,
response=response,
version_number=app.config['VERSION_NUMBER'],
search_header=render_template(
search_header=(render_template(
'header.html',
dark_mode=g.user_config.dark,
query=urlparse.unquote(query),
search_type=search_util.search_type,
mobile=g.user_request.mobile) if 'isch' not in search_util.search_type else '')
mobile=g.user_request.mobile)
if 'isch' not in search_util.search_type else ''))
@app.route('/config', methods=['GET', 'POST', 'PUT'])
@ -177,8 +246,12 @@ def config():
return json.dumps(g.user_config.__dict__)
elif request.method == 'PUT':
if 'name' in request.args:
config_pkl = os.path.join(app.config['CONFIG_PATH'], request.args.get('name'))
session['config'] = pickle.load(open(config_pkl, 'rb')) if os.path.exists(config_pkl) else session['config']
config_pkl = os.path.join(
app.config['CONFIG_PATH'],
request.args.get('name'))
session['config'] = (pickle.load(open(config_pkl, 'rb'))
if os.path.exists(config_pkl)
else session['config'])
return json.dumps(session['config'])
else:
return json.dumps({})
@ -189,11 +262,16 @@ def config():
# Save config by name to allow a user to easily load later
if 'name' in request.args:
pickle.dump(config_data, open(os.path.join(app.config['CONFIG_PATH'], request.args.get('name')), 'wb'))
pickle.dump(
config_data,
open(os.path.join(
app.config['CONFIG_PATH'],
request.args.get('name')), 'wb'))
# Overwrite default config if user has cookies disabled
if g.cookies_disabled:
open(app.config['DEFAULT_CONFIG'], 'w').write(json.dumps(config_data, indent=4))
open(app.config['DEFAULT_CONFIG'], 'w').write(
json.dumps(config_data, indent=4))
session['config'] = config_data
return redirect(config_data['url'])
@ -236,7 +314,8 @@ def element():
except exceptions.RequestException:
pass
empty_gif = base64.b64decode('R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
empty_gif = base64.b64decode(
'R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==')
return send_file(io.BytesIO(empty_gif), mimetype='image/gif')
@ -244,32 +323,62 @@ def element():
@auth_required
def window():
get_body = g.user_request.send(base_url=request.args.get('location')).text
get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"')
get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"')
get_body = get_body.replace('src="/',
'src="' + request.args.get('location') + '"')
get_body = get_body.replace('href="/',
'href="' + request.args.get('location') + '"')
results = BeautifulSoup(get_body, 'html.parser')
results = bsoup(get_body, 'html.parser')
try:
for script in results('script'):
script.decompose()
except Exception:
pass
for script in results('script'):
script.decompose()
return render_template('display.html', response=results)
def run_app():
parser = argparse.ArgumentParser(description='Whoogle Search console runner')
parser.add_argument('--port', default=5000, metavar='<port number>',
help='Specifies a port to run on (default 5000)')
parser.add_argument('--host', default='127.0.0.1', metavar='<ip address>',
help='Specifies the host address to use (default 127.0.0.1)')
parser.add_argument('--debug', default=False, action='store_true',
help='Activates debug mode for the server (default False)')
parser.add_argument('--https-only', default=False, action='store_true',
help='Enforces HTTPS redirects for all requests')
parser.add_argument('--userpass', default='', metavar='<username:password>',
help='Sets a username/password basic auth combo (default None)')
parser = argparse.ArgumentParser(
description='Whoogle Search console runner')
parser.add_argument(
'--port',
default=5000,
metavar='<port number>',
help='Specifies a port to run on (default 5000)')
parser.add_argument(
'--host',
default='127.0.0.1',
metavar='<ip address>',
help='Specifies the host address to use (default 127.0.0.1)')
parser.add_argument(
'--debug',
default=False,
action='store_true',
help='Activates debug mode for the server (default False)')
parser.add_argument(
'--https-only',
default=False,
action='store_true',
help='Enforces HTTPS redirects for all requests')
parser.add_argument(
'--userpass',
default='',
metavar='<username:password>',
help='Sets a username/password basic auth combo (default None)')
parser.add_argument(
'--proxyauth',
default='',
metavar='<username:password>',
help='Sets a username/password for a HTTP/SOCKS proxy (default None)')
parser.add_argument(
'--proxytype',
default='',
metavar='<socks4|socks5|http>',
help='Sets a proxy type for all connections (default None)')
parser.add_argument(
'--proxyloc',
default='',
metavar='<location:port>',
help='Sets a proxy location for all connections (default None)')
args = parser.parse_args()
if args.userpass:
@ -277,6 +386,14 @@ def run_app():
os.environ['WHOOGLE_USER'] = user_pass[0]
os.environ['WHOOGLE_PASS'] = user_pass[1]
if args.proxytype and args.proxyloc:
if args.proxyauth:
proxy_user_pass = args.proxyauth.split(':')
os.environ['WHOOGLE_PROXY_USER'] = proxy_user_pass[0]
os.environ['WHOOGLE_PROXY_PASS'] = proxy_user_pass[1]
os.environ['WHOOGLE_PROXY_TYPE'] = args.proxytype
os.environ['WHOOGLE_PROXY_LOC'] = args.proxyloc
os.environ['HTTPS_ONLY'] = '1' if args.https_only else ''
if args.debug:

View File

@ -1,5 +1,5 @@
html {
background-color: #000 !important;
background-color: #222 !important;
}
body {
@ -7,7 +7,14 @@ body {
}
div {
/*background-color: #111 !important;*/
color: #fff !important;
}
li a {
color: #4b8eaa !important;
}
li {
color: #fff !important;
}
@ -34,9 +41,17 @@ input {
#search-bar {
color: #fff !important;
background-color: #000 !important;
background-color: #222 !important;
}
.search-container {
background-color: #000 !important;
background-color: #222 !important;
}
.ZINbbc{
background-color: #1a1a1a !important;
}
.bRsWnc{
background-color: #1a1a1a !important;
}

View File

@ -52,4 +52,11 @@ header {
width: 100%;
-webkit-tap-highlight-color: rgba(0,0,0,0);
overflow: hidden;
}
}
.tracking-link {
font-size: large;
text-align: center;
margin: 15px;
display: block;
}

View File

@ -21,7 +21,7 @@
padding: 10px;
cursor: pointer;
color: #fff;
background-color: #000;
background-color: #222;
border-bottom: 1px solid #242424;
}
@ -32,4 +32,9 @@
.autocomplete-active {
background-color: #685e79 !important;
color: #ffffff;
}
}
details summary {
padding: 10px;
font-weight: bold;
}

View File

@ -31,4 +31,9 @@
.autocomplete-active {
background-color: #685e79 !important;
color: #ffffff;
}
}
details summary {
padding: 10px;
font-weight: bold;
}

View File

@ -1,6 +1,6 @@
const handleUserInput = searchBar => {
let xhrRequest = new XMLHttpRequest();
xhrRequest.open("POST", "/autocomplete");
xhrRequest.open("POST", "autocomplete");
xhrRequest.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
xhrRequest.onload = function () {
if (xhrRequest.readyState === 4 && xhrRequest.status !== 200) {
@ -93,8 +93,14 @@ const autocomplete = (searchInput, autocompleteResults) => {
removeActive(suggestion);
suggestion[currentFocus].classList.add("autocomplete-active");
// Autofill search bar with suggestion content
searchBar.value = suggestion[currentFocus].textContent;
// Autofill search bar with suggestion content (minus the "bang name" if using a bang operator)
let searchContent = suggestion[currentFocus].textContent;
if (searchContent.indexOf('(') > 0) {
searchBar.value = searchContent.substring(0, searchContent.indexOf('('));
} else {
searchBar.value = searchContent;
}
searchBar.focus();
};
@ -117,4 +123,4 @@ const autocomplete = (searchInput, autocompleteResults) => {
document.addEventListener("click", function (e) {
closeAllLists(e.target);
});
};
};

View File

@ -1,6 +1,6 @@
// Whoogle configurations that use boolean values and checkboxes
CONFIG_BOOLS = [
"nojs", "dark", "safe", "alts", "new_tab", "get_only"
"nojs", "dark", "safe", "alts", "new_tab", "get_only", "tor"
];
// Whoogle configurations that use string values and input fields
@ -31,7 +31,7 @@ const setupSearchLayout = () => {
const fillConfigValues = () => {
// Request existing config info
let xhrGET = new XMLHttpRequest();
xhrGET.open("GET", "/config");
xhrGET.open("GET", "config");
xhrGET.onload = function() {
if (xhrGET.readyState === 4 && xhrGET.status !== 200) {
alert("Error loading Whoogle config");
@ -82,7 +82,7 @@ const loadConfig = event => {
}
let xhrPUT = new XMLHttpRequest();
xhrPUT.open("PUT", "/config?name=" + config + ".conf");
xhrPUT.open("PUT", "config?name=" + config + ".conf");
xhrPUT.onload = function() {
if (xhrPUT.readyState === 4 && xhrPUT.status !== 200) {
alert("Error loading Whoogle config");
@ -104,7 +104,7 @@ const saveConfig = event => {
}
let configForm = document.getElementById("config-form");
configForm.action = '/config?name=' + config + ".conf";
configForm.action = 'config?name=' + config + ".conf";
configForm.submit();
};

60
app/static/js/utils.js Normal file
View File

@ -0,0 +1,60 @@
const checkForTracking = () => {
const mainDiv = document.getElementById("main");
const query = document.getElementById("search-bar").value.replace(/\s+/g, '');
// Note: regex functions for checking for tracking queries were derived
// from here -- https://stackoverflow.com/questions/619977
const matchTracking = {
"ups": {
"link": `https://www.ups.com/track?tracknum=${query}`,
"expr": [
/\b(1Z ?[0-9A-Z]{3} ?[0-9A-Z]{3} ?[0-9A-Z]{2} ?[0-9A-Z]{4} ?[0-9A-Z]{3} ?[0-9A-Z]|[\dT]\d\d\d ?\d\d\d\d ?\d\d\d)\b/
]
},
"usps": {
"link": `https://tools.usps.com/go/TrackConfirmAction?tLabels=${query}`,
"expr": [
/(\b\d{30}\b)|(\b91\d+\b)|(\b\d{20}\b)/,
/^E\D{1}\d{9}\D{2}$|^9\d{15,21}$/,
/^91[0-9]+$/,
/^[A-Za-z]{2}[0-9]+US$/
]
},
"fedex": {
"link": `https://www.fedex.com/apps/fedextrack/?tracknumbers=${query}`,
"expr": [
/(\b96\d{20}\b)|(\b\d{15}\b)|(\b\d{12}\b)/,
/\b((98\d\d\d\d\d?\d\d\d\d|98\d\d) ?\d\d\d\d ?\d\d\d\d( ?\d\d\d)?)\b/,
/^[0-9]{15}$/
]
}
}
// Creates a link to a UPS/USPS/FedEx tracking page
const createTrackingLink = href => {
let link = document.createElement("a");
link.className = "tracking-link";
link.innerHTML = "View Tracking Info";
link.href = href;
mainDiv.prepend(link);
}
// Compares the query against a set of regex patterns
// for tracking numbers
const compareQuery = provider => {
provider.expr.some(regex => {
if (query.match(regex)) {
createTrackingLink(provider.link);
return true;
}
});
}
for (const key of Object.keys(matchTracking)) {
compareQuery(matchTracking[key]);
}
}
document.addEventListener("DOMContentLoaded", function() {
checkForTracking();
});

View File

@ -1,15 +1,16 @@
<html>
<head>
<link rel="shortcut icon" href="/static/img/favicon.ico" type="image/x-icon">
<link rel="icon" href="/static/img/favicon.ico" type="image/x-icon">
<link rel="search" href="/opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
<link rel="shortcut icon" href="static/img/favicon.ico" type="image/x-icon">
<link rel="icon" href="static/img/favicon.ico" type="image/x-icon">
<link rel="search" href="opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="referrer" content="no-referrer">
<script type="text/javascript" src="/static/js/autocomplete.js"></script>
<link rel="stylesheet" href="/static/css/{{ 'search-dark' if dark_mode else 'search' }}.css">
<link rel="stylesheet" href="/static/css/header.css">
<script type="text/javascript" src="static/js/autocomplete.js"></script>
<script type="text/javascript" src="static/js/utils.js"></script>
<link rel="stylesheet" href="static/css/{{ 'search-dark' if dark_mode else 'search' }}.css">
<link rel="stylesheet" href="static/css/header.css">
{% if dark_mode %}
<link rel="stylesheet" href="/static/css/dark-theme.css"/>
<link rel="stylesheet" href="static/css/dark-theme.css"/>
{% endif %}
<title>{{ query }} - Whoogle Search</title>
</head>

View File

@ -3,3 +3,4 @@
<p>
Error parsing "{{ query }}"
</p>
<a href="/">Return Home</a>

View File

@ -15,6 +15,7 @@
border: {{ '1px solid #685e79' if dark_mode else '' }}"
spellcheck="false" type="text" value="{{ query }}">
<input name="tbm" value="{{ search_type }}" style="display: none">
<input type="submit" style="display: none;">
<div class="sc"></div>
</div>
</div>
@ -38,6 +39,7 @@
color: {{ '#685e79' if dark_mode else '#000' }};
border: {{ '1px solid #685e79' if dark_mode else '' }}">
<input name="tbm" value="{{ search_type }}" style="display: none">
<input type="submit" style="display: none;">
<div class="sc"></div>
</div>
</div>
@ -56,4 +58,4 @@
document.getElementById("search-form").submit();
}
});
</script>
</script>

View File

@ -1,30 +1,30 @@
<html>
<head>
<link rel="apple-touch-icon" sizes="57x57" href="/static/img/favicon/apple-icon-57x57.png">
<link rel="apple-touch-icon" sizes="60x60" href="/static/img/favicon/apple-icon-60x60.png">
<link rel="apple-touch-icon" sizes="72x72" href="/static/img/favicon/apple-icon-72x72.png">
<link rel="apple-touch-icon" sizes="76x76" href="/static/img/favicon/apple-icon-76x76.png">
<link rel="apple-touch-icon" sizes="114x114" href="/static/img/favicon/apple-icon-114x114.png">
<link rel="apple-touch-icon" sizes="120x120" href="/static/img/favicon/apple-icon-120x120.png">
<link rel="apple-touch-icon" sizes="144x144" href="/static/img/favicon/apple-icon-144x144.png">
<link rel="apple-touch-icon" sizes="152x152" href="/static/img/favicon/apple-icon-152x152.png">
<link rel="apple-touch-icon" sizes="180x180" href="/static/img/favicon/apple-icon-180x180.png">
<link rel="icon" type="image/png" sizes="192x192" href="/static/img/favicon/android-icon-192x192.png">
<link rel="icon" type="image/png" sizes="32x32" href="/static/img/favicon/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="96x96" href="/static/img/favicon/favicon-96x96.png">
<link rel="icon" type="image/png" sizes="16x16" href="/static/img/favicon/favicon-16x16.png">
<link rel="manifest" href="/static/img/favicon/manifest.json">
<link rel="apple-touch-icon" sizes="57x57" href="static/img/favicon/apple-icon-57x57.png">
<link rel="apple-touch-icon" sizes="60x60" href="static/img/favicon/apple-icon-60x60.png">
<link rel="apple-touch-icon" sizes="72x72" href="static/img/favicon/apple-icon-72x72.png">
<link rel="apple-touch-icon" sizes="76x76" href="static/img/favicon/apple-icon-76x76.png">
<link rel="apple-touch-icon" sizes="114x114" href="static/img/favicon/apple-icon-114x114.png">
<link rel="apple-touch-icon" sizes="120x120" href="static/img/favicon/apple-icon-120x120.png">
<link rel="apple-touch-icon" sizes="144x144" href="static/img/favicon/apple-icon-144x144.png">
<link rel="apple-touch-icon" sizes="152x152" href="static/img/favicon/apple-icon-152x152.png">
<link rel="apple-touch-icon" sizes="180x180" href="static/img/favicon/apple-icon-180x180.png">
<link rel="icon" type="image/png" sizes="192x192" href="static/img/favicon/android-icon-192x192.png">
<link rel="icon" type="image/png" sizes="32x32" href="static/img/favicon/favicon-32x32.png">
<link rel="icon" type="image/png" sizes="96x96" href="static/img/favicon/favicon-96x96.png">
<link rel="icon" type="image/png" sizes="16x16" href="static/img/favicon/favicon-16x16.png">
<link rel="manifest" href="static/img/favicon/manifest.json">
<meta name="referrer" content="no-referrer">
<meta name="msapplication-TileColor" content="#ffffff">
<meta name="msapplication-TileImage" content="/static/img/favicon/ms-icon-144x144.png">
<script type="text/javascript" src="/static/js/autocomplete.js"></script>
<script type="text/javascript" src="/static/js/controller.js"></script>
<link rel="search" href="/opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
<meta name="msapplication-TileImage" content="static/img/favicon/ms-icon-144x144.png">
<script type="text/javascript" src="static/js/autocomplete.js"></script>
<script type="text/javascript" src="static/js/controller.js"></script>
<link rel="search" href="opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="/static/css/{{ 'search-dark' if config.dark else 'search' }}.css">
<link rel="stylesheet" href="/static/css/main.css">
<link rel="stylesheet" href="static/css/{{ 'search-dark' if config.dark else 'search' }}.css">
<link rel="stylesheet" href="static/css/main.css">
{% if config.dark %}
<link rel="stylesheet" href="/static/css/dark-theme.css"/>
<link rel="stylesheet" href="static/css/dark-theme.css"/>
{% endif %}
<noscript>
<style>
@ -36,9 +36,15 @@
<title>Whoogle Search</title>
</head>
<body id="main" style="display: none; background-color: {{ '#000' if config.dark else '#fff' }}">
<script>
{% if error_message|length > 0 %}
let error = "{{ error_message|safe }}";
alert(error);
{% endif %}
</script>
<div class="search-container">
<img class="logo" src="/static/img/logo.png">
<form id="search-form" action="/search" method="{{ 'get' if config.get_only else 'post' }}">
<img class="logo" src="static/img/logo.png">
<form id="search-form" action="search" method="{{ 'get' if config.get_only else 'post' }}">
<div class="search-fields">
<div class="autocomplete">
<input type="text" name="q" id="search-bar" autofocus="autofocus" autocomplete="off">
@ -50,7 +56,7 @@
<button id="config-collapsible" class="collapsible">Configuration</button>
<div class="content">
<div class="config-fields">
<form id="config-form" action="/config" method="post">
<form id="config-form" action="config" method="post">
<div class="config-div">
<label for="config-ctry">Filter Results by Country: </label>
<select name="ctry" id="config-ctry">
@ -117,6 +123,10 @@
<label for="config-new-tab">Open Links in New Tab: </label>
<input type="checkbox" name="new_tab" id="config-new-tab">
</div>
<div class="config-div">
<label for="config-tor">Use Tor: {{ '' if tor_available else 'Unavailable' }}</label>
<input type="checkbox" name="tor" id="config-tor" {{ '' if tor_available else 'hidden' }}>
</div>
<div class="config-div">
<label for="config-get-only">GET Requests Only: </label>
<input type="checkbox" name="get_only" id="config-get-only">

View File

@ -9,7 +9,7 @@
<Param name="q" value="{searchTerms}"/>
</Url>
<Url type="application/x-suggestions+json" {{ request_type|safe }} template="{{ main_url }}/autocomplete">
<Param name="q" value="{searchTerms}"/>
<Param name="q" value="{searchTerms}"/>
</Url>
<moz:SearchForm>{{ main_url }}/search</moz:SearchForm>
</OpenSearchDescription>

View File

@ -1,4 +1,5 @@
from bs4 import BeautifulSoup
import os
import urllib.parse as urlparse
from urllib.parse import parse_qs
@ -6,25 +7,28 @@ SKIP_ARGS = ['ref_src', 'utm']
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
LOGO_URL = GOOG_IMG + '_desk'
BLANK_B64 = '''
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC
'''
BLANK_B64 = ('data:image/png;base64,'
'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw'
'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC')
# Ad keywords
BLACKLIST = [
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio'
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama',
'Реклама', 'Anunț', '광고', 'annons', 'Annonse', 'Iklan', '広告', 'Augl.',
'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', 'آگهی',
'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio'
]
SITE_ALTS = {
'twitter.com': 'nitter.net',
'youtube.com': 'invidious.snopyta.org',
'instagram.com': 'bibliogram.art/u'
'twitter.com': os.getenv('WHOOGLE_ALT_TW', 'nitter.net'),
'youtube.com': os.getenv('WHOOGLE_ALT_YT', 'invidious.snopyta.org'),
'instagram.com': os.getenv('WHOOGLE_ALT_IG', 'bibliogram.art/u')
}
def has_ad_content(element: str):
return element.upper() in (value.upper() for value in BLACKLIST) or '' in element
return element.upper() in (value.upper() for value in BLACKLIST) \
or '' in element
def get_first_link(soup):
@ -71,7 +75,7 @@ def filter_link_args(query_link):
def gen_nojs(sibling):
nojs_link = BeautifulSoup().new_tag('a')
nojs_link = BeautifulSoup(features='html.parser').new_tag('a')
nojs_link['href'] = '/window?location=' + sibling['href']
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']

View File

@ -0,0 +1,26 @@
import json
import requests
def gen_bangs_json(bangs_file):
# Request list
try:
r = requests.get('https://duckduckgo.com/bang.v255.js')
r.raise_for_status()
except requests.exceptions.HTTPError as err:
raise SystemExit(err)
# Convert to json
data = json.loads(r.text)
# Set up a json object (with better formatting) for all available bangs
bangs_data = {}
for row in data:
bang_command = '!' + row['t']
bangs_data[bang_command] = {
'url': row['u'].replace('{{{s}}}', '{}'),
'suggestion': bang_command + ' (' + row['s'] + ')'
}
json.dump(bangs_data, open(bangs_file, 'w'))

View File

@ -1,22 +1,26 @@
from app.filter import Filter, get_first_link
from app.utils.session_utils import generate_user_keys
from app.request import gen_query
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup as bsoup
from cryptography.fernet import Fernet, InvalidToken
from flask import g
from typing import Any, Tuple
TOR_BANNER = '<hr><h1 style="text-align: center">You are using Tor</h1><hr>'
class RoutingUtils:
def __init__(self, request, config, session, cookies_disabled=False):
self.request_params = request.args if request.method == 'GET' else request.form
method = request.method
self.request_params = request.args if method == 'GET' else request.form
self.user_agent = request.headers.get('User-Agent')
self.feeling_lucky = False
self.config = config
self.session = session
self.query = ''
self.cookies_disabled = cookies_disabled
self.search_type = self.request_params.get('tbm') if 'tbm' in self.request_params else ''
self.search_type = self.request_params.get(
'tbm') if 'tbm' in self.request_params else ''
def __getitem__(self, name):
return getattr(self, name)
@ -42,7 +46,9 @@ class RoutingUtils:
else:
# Attempt to decrypt if this is an internal link
try:
q = Fernet(self.session['fernet_keys']['text_key']).decrypt(q.encode()).decode()
q = Fernet(
self.session['fernet_keys']['text_key']
).decrypt(q.encode()).decode()
except InvalidToken:
pass
@ -50,23 +56,55 @@ class RoutingUtils:
self.session['fernet_keys']['text_key'] = generate_user_keys(
cookies_disabled=self.cookies_disabled)['text_key']
# Format depending on whether or not the query is a "feeling lucky" query
# Strip leading '! ' for "feeling lucky" queries
self.feeling_lucky = q.startswith('! ')
self.query = q[2:] if self.feeling_lucky else q
return self.query
def bang_operator(self, bangs_dict: dict) -> str:
for operator in bangs_dict.keys():
if self.query.split(' ')[0] != operator:
continue
return bangs_dict[operator]['url'].format(
self.query.replace(operator, '').strip())
return ''
def generate_response(self) -> Tuple[Any, int]:
mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent
content_filter = Filter(self.session['fernet_keys'], mobile=mobile, config=self.config)
full_query = gen_query(self.query, self.request_params, self.config, content_filter.near)
get_body = g.user_request.send(query=full_query).text
content_filter = Filter(
self.session['fernet_keys'],
mobile=mobile,
config=self.config)
full_query = gen_query(
self.query,
self.request_params,
self.config,
content_filter.near)
get_body = g.user_request.send(query=full_query)
# Produce cleanable html soup from response
html_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
html_soup = bsoup(content_filter.reskin(get_body.text), 'html.parser')
html_soup.insert(
0,
bsoup(TOR_BANNER, 'html.parser')
if g.user_request.tor_valid else bsoup('', 'html.parser'))
if self.feeling_lucky:
return get_first_link(html_soup), 1
else:
formatted_results = content_filter.clean(html_soup)
# Append user config to all search links, if available
param_str = ''.join('&{}={}'.format(k, v)
for k, v in
self.request_params.to_dict(flat=True).items()
if self.config.is_safe_key(k))
for link in formatted_results.find_all('a', href=True):
if 'search?' not in link['href'] or link['href'].index(
'search?') > 1:
continue
link['href'] += param_str
return formatted_results, content_filter.elements

View File

@ -4,6 +4,21 @@ services:
whoogle-search:
image: benbusby/whoogle-search
container_name: whoogle-search
#environment: # Uncomment to configure environment variables
# Basic auth configuration, uncomment to enable
#- WHOOGLE_USER=<auth username>
#- WHOOGLE_PASS=<auth password>
# Proxy configuration, uncomment to enable
#- WHOOGLE_PROXY_USER=<proxy username>
#- WHOOGLE_PROXY_PASS=<proxy password>
#- WHOOGLE_PROXY_TYPE=<proxy type (http|socks4|socks5)
#- WHOOGLE_PROXY_LOC=<proxy host/ip>
# Site alternative configurations, uncomment to enable
# Note: If not set, the feature will still be available
# with default values.
#- WHOOGLE_ALT_TW=nitter.net
#- WHOOGLE_ALT_YT=invidious.snopyta.org
#- WHOOGLE_ALT_IG=bibliogram.art/u
ports:
- 5000:5000
restart: unless-stopped

BIN
docs/banner.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

View File

Before

Width:  |  Height:  |  Size: 215 KiB

After

Width:  |  Height:  |  Size: 215 KiB

View File

Before

Width:  |  Height:  |  Size: 139 KiB

After

Width:  |  Height:  |  Size: 139 KiB

248
misc/countries.json Normal file
View File

@ -0,0 +1,248 @@
[
{"name": "Default (none)", "value": ""},
{"name": "Afghanistan", "value": "countryAF"},
{"name": "Albania", "value": "countryAL"},
{"name": "Algeria", "value": "countryDZ"},
{"name": "American Samoa", "value": "countryAS"},
{"name": "Andorra", "value": "countryAD"},
{"name": "Angola", "value": "countryAO"},
{"name": "Anguilla", "value": "countryAI"},
{"name": "Antarctica", "value": "countryAQ"},
{"name": "Antigua and Barbuda", "value": "countryAG"},
{"name": "Argentina", "value": "countryAR"},
{"name": "Armenia", "value": "countryAM"},
{"name": "Aruba", "value": "countryAW"},
{"name": "Australia", "value": "countryAU"},
{"name": "Austria", "value": "countryAT"},
{"name": "Azerbaijan", "value": "countryAZ"},
{"name": "Bahamas", "value": "countryBS"},
{"name": "Bahrain", "value": "countryBH"},
{"name": "Bangladesh", "value": "countryBD"},
{"name": "Barbados", "value": "countryBB"},
{"name": "Belarus", "value": "countryBY"},
{"name": "Belgium", "value": "countryBE"},
{"name": "Belize", "value": "countryBZ"},
{"name": "Benin", "value": "countryBJ"},
{"name": "Bermuda", "value": "countryBM"},
{"name": "Bhutan", "value": "countryBT"},
{"name": "Bolivia", "value": "countryBO"},
{"name": "Bosnia and Herzegovina", "value": "countryBA"},
{"name": "Botswana", "value": "countryBW"},
{"name": "Bouvet Island", "value": "countryBV"},
{"name": "Brazil", "value": "countryBR"},
{"name": "British Indian Ocean Territory", "value": "countryIO"},
{"name": "Brunei Darussalam", "value": "countryBN"},
{"name": "Bulgaria", "value": "countryBG"},
{"name": "Burkina Faso", "value": "countryBF"},
{"name": "Burundi", "value": "countryBI"},
{"name": "Cambodia", "value": "countryKH"},
{"name": "Cameroon", "value": "countryCM"},
{"name": "Canada", "value": "countryCA"},
{"name": "Cape Verde", "value": "countryCV"},
{"name": "Cayman Islands", "value": "countryKY"},
{"name": "Central African Republic", "value": "countryCF"},
{"name": "Chad", "value": "countryTD"},
{"name": "Chile", "value": "countryCL"},
{"name": "China", "value": "countryCN"},
{"name": "Christmas Island", "value": "countryCX"},
{"name": "Cocos (Keeling) Islands", "value": "countryCC"},
{"name": "Colombia", "value": "countryCO"},
{"name": "Comoros", "value": "countryKM"},
{"name": "Congo", "value": "countryCG"},
{"name": "Congo, Democratic Republic of the", "value": "countryCD"},
{"name": "Cook Islands", "value": "countryCK"},
{"name": "Costa Rica", "value": "countryCR"},
{"name": "Cote D\"ivoire", "value": "countryCI"},
{"name": "Croatia (Hrvatska)", "value": "countryHR"},
{"name": "Cuba", "value": "countryCU"},
{"name": "Cyprus", "value": "countryCY"},
{"name": "Czech Republic", "value": "countryCZ"},
{"name": "Denmark", "value": "countryDK"},
{"name": "Djibouti", "value": "countryDJ"},
{"name": "Dominica", "value": "countryDM"},
{"name": "Dominican Republic", "value": "countryDO"},
{"name": "East Timor", "value": "countryTP"},
{"name": "Ecuador", "value": "countryEC"},
{"name": "Egypt", "value": "countryEG"},
{"name": "El Salvador", "value": "countrySV"},
{"name": "Equatorial Guinea", "value": "countryGQ"},
{"name": "Eritrea", "value": "countryER"},
{"name": "Estonia", "value": "countryEE"},
{"name": "Ethiopia", "value": "countryET"},
{"name": "European Union", "value": "countryEU"},
{"name": "Falkland Islands (Malvinas)", "value": "countryFK"},
{"name": "Faroe Islands", "value": "countryFO"},
{"name": "Fiji", "value": "countryFJ"},
{"name": "Finland", "value": "countryFI"},
{"name": "France", "value": "countryFR"},
{"name": "France, Metropolitan", "value": "countryFX"},
{"name": "French Guiana", "value": "countryGF"},
{"name": "French Polynesia", "value": "countryPF"},
{"name": "French Southern Territories", "value": "countryTF"},
{"name": "Gabon", "value": "countryGA"},
{"name": "Gambia", "value": "countryGM"},
{"name": "Georgia", "value": "countryGE"},
{"name": "Germany", "value": "countryDE"},
{"name": "Ghana", "value": "countryGH"},
{"name": "Gibraltar", "value": "countryGI"},
{"name": "Greece", "value": "countryGR"},
{"name": "Greenland", "value": "countryGL"},
{"name": "Grenada", "value": "countryGD"},
{"name": "Guadeloupe", "value": "countryGP"},
{"name": "Guam", "value": "countryGU"},
{"name": "Guatemala", "value": "countryGT"},
{"name": "Guinea", "value": "countryGN"},
{"name": "Guinea-Bissau", "value": "countryGW"},
{"name": "Guyana", "value": "countryGY"},
{"name": "Haiti", "value": "countryHT"},
{"name": "Heard Island and Mcdonald Islands", "value": "countryHM"},
{"name": "Holy See (Vatican City State)", "value": "countryVA"},
{"name": "Honduras", "value": "countryHN"},
{"name": "Hong Kong", "value": "countryHK"},
{"name": "Hungary", "value": "countryHU"},
{"name": "Iceland", "value": "countryIS"},
{"name": "India", "value": "countryIN"},
{"name": "Indonesia", "value": "countryID"},
{"name": "Iran, Islamic Republic of", "value": "countryIR"},
{"name": "Iraq", "value": "countryIQ"},
{"name": "Ireland", "value": "countryIE"},
{"name": "Israel", "value": "countryIL"},
{"name": "Italy", "value": "countryIT"},
{"name": "Jamaica", "value": "countryJM"},
{"name": "Japan", "value": "countryJP"},
{"name": "Jordan", "value": "countryJO"},
{"name": "Kazakhstan", "value": "countryKZ"},
{"name": "Kenya", "value": "countryKE"},
{"name": "Kiribati", "value": "countryKI"},
{"name": "Korea, Democratic People\"s Republic of",
"value": "countryKP"},
{"name": "Korea, Republic of", "value": "countryKR"},
{"name": "Kuwait", "value": "countryKW"},
{"name": "Kyrgyzstan", "value": "countryKG"},
{"name": "Lao People\"s Democratic Republic", "value": "countryLA"},
{"name": "Latvia", "value": "countryLV"},
{"name": "Lebanon", "value": "countryLB"},
{"name": "Lesotho", "value": "countryLS"},
{"name": "Liberia", "value": "countryLR"},
{"name": "Libyan Arab Jamahiriya", "value": "countryLY"},
{"name": "Liechtenstein", "value": "countryLI"},
{"name": "Lithuania", "value": "countryLT"},
{"name": "Luxembourg", "value": "countryLU"},
{"name": "Macao", "value": "countryMO"},
{"name": "Macedonia, the Former Yugosalv Republic of",
"value": "countryMK"},
{"name": "Madagascar", "value": "countryMG"},
{"name": "Malawi", "value": "countryMW"},
{"name": "Malaysia", "value": "countryMY"},
{"name": "Maldives", "value": "countryMV"},
{"name": "Mali", "value": "countryML"},
{"name": "Malta", "value": "countryMT"},
{"name": "Marshall Islands", "value": "countryMH"},
{"name": "Martinique", "value": "countryMQ"},
{"name": "Mauritania", "value": "countryMR"},
{"name": "Mauritius", "value": "countryMU"},
{"name": "Mayotte", "value": "countryYT"},
{"name": "Mexico", "value": "countryMX"},
{"name": "Micronesia, Federated States of", "value": "countryFM"},
{"name": "Moldova, Republic of", "value": "countryMD"},
{"name": "Monaco", "value": "countryMC"},
{"name": "Mongolia", "value": "countryMN"},
{"name": "Montserrat", "value": "countryMS"},
{"name": "Morocco", "value": "countryMA"},
{"name": "Mozambique", "value": "countryMZ"},
{"name": "Myanmar", "value": "countryMM"},
{"name": "Namibia", "value": "countryNA"},
{"name": "Nauru", "value": "countryNR"},
{"name": "Nepal", "value": "countryNP"},
{"name": "Netherlands", "value": "countryNL"},
{"name": "Netherlands Antilles", "value": "countryAN"},
{"name": "New Caledonia", "value": "countryNC"},
{"name": "New Zealand", "value": "countryNZ"},
{"name": "Nicaragua", "value": "countryNI"},
{"name": "Niger", "value": "countryNE"},
{"name": "Nigeria", "value": "countryNG"},
{"name": "Niue", "value": "countryNU"},
{"name": "Norfolk Island", "value": "countryNF"},
{"name": "Northern Mariana Islands", "value": "countryMP"},
{"name": "Norway", "value": "countryNO"},
{"name": "Oman", "value": "countryOM"},
{"name": "Pakistan", "value": "countryPK"},
{"name": "Palau", "value": "countryPW"},
{"name": "Palestinian Territory", "value": "countryPS"},
{"name": "Panama", "value": "countryPA"},
{"name": "Papua New Guinea", "value": "countryPG"},
{"name": "Paraguay", "value": "countryPY"},
{"name": "Peru", "value": "countryPE"},
{"name": "Philippines", "value": "countryPH"},
{"name": "Pitcairn", "value": "countryPN"},
{"name": "Poland", "value": "countryPL"},
{"name": "Portugal", "value": "countryPT"},
{"name": "Puerto Rico", "value": "countryPR"},
{"name": "Qatar", "value": "countryQA"},
{"name": "Reunion", "value": "countryRE"},
{"name": "Romania", "value": "countryRO"},
{"name": "Russian Federation", "value": "countryRU"},
{"name": "Rwanda", "value": "countryRW"},
{"name": "Saint Helena", "value": "countrySH"},
{"name": "Saint Kitts and Nevis", "value": "countryKN"},
{"name": "Saint Lucia", "value": "countryLC"},
{"name": "Saint Pierre and Miquelon", "value": "countryPM"},
{"name": "Saint Vincent and the Grenadines", "value": "countryVC"},
{"name": "Samoa", "value": "countryWS"},
{"name": "San Marino", "value": "countrySM"},
{"name": "Sao Tome and Principe", "value": "countryST"},
{"name": "Saudi Arabia", "value": "countrySA"},
{"name": "Senegal", "value": "countrySN"},
{"name": "Serbia and Montenegro", "value": "countryCS"},
{"name": "Seychelles", "value": "countrySC"},
{"name": "Sierra Leone", "value": "countrySL"},
{"name": "Singapore", "value": "countrySG"},
{"name": "Slovakia", "value": "countrySK"},
{"name": "Slovenia", "value": "countrySI"},
{"name": "Solomon Islands", "value": "countrySB"},
{"name": "Somalia", "value": "countrySO"},
{"name": "South Africa", "value": "countryZA"},
{"name": "South Georgia and the South Sandwich Islands",
"value": "countryGS"},
{"name": "Spain", "value": "countryES"},
{"name": "Sri Lanka", "value": "countryLK"},
{"name": "Sudan", "value": "countrySD"},
{"name": "Suriname", "value": "countrySR"},
{"name": "Svalbard and Jan Mayen", "value": "countrySJ"},
{"name": "Swaziland", "value": "countrySZ"},
{"name": "Sweden", "value": "countrySE"},
{"name": "Switzerland", "value": "countryCH"},
{"name": "Syrian Arab Republic", "value": "countrySY"},
{"name": "Taiwan, Province of China", "value": "countryTW"},
{"name": "Tajikistan", "value": "countryTJ"},
{"name": "Tanzania, United Republic of", "value": "countryTZ"},
{"name": "Thailand", "value": "countryTH"},
{"name": "Togo", "value": "countryTG"},
{"name": "Tokelau", "value": "countryTK"},
{"name": "Tonga", "value": "countryTO"},
{"name": "Trinidad and Tobago", "value": "countryTT"},
{"name": "Tunisia", "value": "countryTN"},
{"name": "Turkey", "value": "countryTR"},
{"name": "Turkmenistan", "value": "countryTM"},
{"name": "Turks and Caicos Islands", "value": "countryTC"},
{"name": "Tuvalu", "value": "countryTV"},
{"name": "Uganda", "value": "countryUG"},
{"name": "Ukraine", "value": "countryUA"},
{"name": "United Arab Emirates", "value": "countryAE"},
{"name": "United Kingdom", "value": "countryUK"},
{"name": "United States", "value": "countryUS"},
{"name": "United States Minor Outlying Islands", "value": "countryUM"},
{"name": "Uruguay", "value": "countryUY"},
{"name": "Uzbekistan", "value": "countryUZ"},
{"name": "Vanuatu", "value": "countryVU"},
{"name": "Venezuela", "value": "countryVE"},
{"name": "Vietnam", "value": "countryVN"},
{"name": "Virgin Islands, British", "value": "countryVG"},
{"name": "Virgin Islands, U.S.", "value": "countryVI"},
{"name": "Wallis and Futuna", "value": "countryWF"},
{"name": "Western Sahara", "value": "countryEH"},
{"name": "Yemen", "value": "countryYE"},
{"name": "Yugoslavia", "value": "countryYU"},
{"name": "Zambia", "value": "countryZM"},
{"name": "Zimbabwe", "value": "countryZW"}
]

49
misc/languages.json Normal file
View File

@ -0,0 +1,49 @@
[
{"name": "Default (none specified)", "value": ""},
{"name": "English", "value": "lang_en"},
{"name": "Afrikaans", "value": "lang_af"},
{"name": "Arabic", "value": "lang_ar"},
{"name": "Armenian", "value": "lang_hy"},
{"name": "Belarusian", "value": "lang_be"},
{"name": "Bulgarian", "value": "lang_bg"},
{"name": "Catalan", "value": "lang_ca"},
{"name": "Chinese (Simplified)", "value": "lang_zh-CN"},
{"name": "Chinese (Traditional)", "value": "lang_zh-TW"},
{"name": "Croatian", "value": "lang_hr"},
{"name": "Czech", "value": "lang_cs"},
{"name": "Danish", "value": "lang_da"},
{"name": "Dutch", "value": "lang_nl"},
{"name": "Esperanto", "value": "lang_eo"},
{"name": "Estonian", "value": "lang_et"},
{"name": "Filipino", "value": "lang_tl"},
{"name": "Finnish", "value": "lang_fi"},
{"name": "French", "value": "lang_fr"},
{"name": "German", "value": "lang_de"},
{"name": "Greek", "value": "lang_el"},
{"name": "Hebrew", "value": "lang_iw"},
{"name": "Hindi", "value": "lang_hi"},
{"name": "Hungarian", "value": "lang_hu"},
{"name": "Icelandic", "value": "lang_is"},
{"name": "Indonesian", "value": "lang_id"},
{"name": "Italian", "value": "lang_it"},
{"name": "Japanese", "value": "lang_ja"},
{"name": "Korean", "value": "lang_ko"},
{"name": "Latvian", "value": "lang_lv"},
{"name": "Lithuanian", "value": "lang_lt"},
{"name": "Norwegian", "value": "lang_no"},
{"name": "Persian", "value": "lang_fa"},
{"name": "Polish", "value": "lang_pl"},
{"name": "Portuguese", "value": "lang_pt"},
{"name": "Romanian", "value": "lang_ro"},
{"name": "Russian", "value": "lang_ru"},
{"name": "Serbian", "value": "lang_sr"},
{"name": "Slovak", "value": "lang_sk"},
{"name": "Slovenian", "value": "lang_sl"},
{"name": "Spanish", "value": "lang_es"},
{"name": "Swahili", "value": "lang_sw"},
{"name": "Swedish", "value": "lang_sv"},
{"name": "Thai", "value": "lang_th"},
{"name": "Turkish", "value": "lang_tr"},
{"name": "Ukrainian", "value": "lang_uk"},
{"name": "Vietnamese", "value": "lang_vi"}
]

7
misc/tor/start-tor.sh Executable file
View File

@ -0,0 +1,7 @@
#!/bin/bash
if [ "$(whoami)" != "root" ]; then
tor -f /etc/tor/torrc
else
service tor start
fi

8
misc/tor/torrc Normal file
View File

@ -0,0 +1,8 @@
DataDirectory /var/lib/tor
ControlPort 9051
CookieAuthentication 1
DataDirectoryGroupReadable 1
CookieAuthFileGroupReadable 1
ExtORPortCookieAuthFileGroupReadable 1
CacheDirectoryGroupReadable 1
CookieAuthFile /var/lib/tor/control_auth_cookie

View File

@ -1,20 +1,33 @@
attrs==19.3.0
beautifulsoup4==4.8.2
bs4==0.0.1
cachelib==0.1
certifi==2020.4.5.1
cffi==1.13.2
chardet==3.0.4
Click==7.0
cryptography==2.8
cryptography==3.2
Flask==1.1.1
Flask-Session==0.3.2
idna==2.9
itsdangerous==1.1.0
Jinja2==2.10.3
lxml==4.5.1
MarkupSafe==1.1.1
more-itertools==8.3.0
packaging==20.4
pluggy==0.13.1
py==1.8.1
pycodestyle==2.6.0
pycparser==2.19
pyOpenSSL==19.1.0
pyparsing==2.4.7
PySocks==1.7.1
pytest==5.4.1
python-dateutil==2.8.1
requests==2.23.0
six==1.14.0
soupsieve==1.9.5
Werkzeug==0.16.0
stem==1.8.0
urllib3==1.25.9
waitress==1.4.3
wcwidth==0.1.9
Werkzeug==0.16.0

View File

@ -8,7 +8,7 @@ setuptools.setup(
author='Ben Busby',
author_email='benbusby@protonmail.com',
name='whoogle-search',
version='0.2.1',
version='0.3.0',
include_package_data=True,
install_requires=requirements,
description='Self-hosted, ad-free, privacy-respecting Google metasearch engine',

View File

@ -1,6 +1,16 @@
from app import app
from app.utils.session_utils import generate_user_keys
import pytest
import random
demo_config = {
'near': random.choice(['Seattle', 'New York', 'San Francisco']),
'dark_mode': str(random.getrandbits(1)),
'nojs': str(random.getrandbits(1)),
'lang_interface': random.choice(app.config['LANGUAGES'])['value'],
'lang_search': random.choice(app.config['LANGUAGES'])['value'],
'ctry': random.choice(app.config['COUNTRIES'])['value']
}
@pytest.fixture

View File

@ -7,7 +7,8 @@ from dateutil.parser import *
def get_search_results(data):
secret_key = generate_user_keys()
soup = Filter(user_keys=secret_key).clean(BeautifulSoup(data, 'html.parser'))
soup = Filter(user_keys=secret_key).clean(
BeautifulSoup(data, 'html.parser'))
main_divs = soup.find('div', {'id': 'main'})
assert len(main_divs) > 1
@ -15,7 +16,9 @@ def get_search_results(data):
result_divs = []
for div in main_divs:
# Result divs should only have 1 inner div
if len(list(div.children)) != 1 or not div.findChild() or 'div' not in div.findChild().name:
if (len(list(div.children)) != 1
or not div.findChild()
or 'div' not in div.findChild().name):
continue
result_divs.append(div)
@ -43,6 +46,20 @@ def test_post_results(client):
assert len(get_search_results(rv.data)) <= 15
# TODO: Unit test the site alt method instead -- the results returned
# are too unreliable for this test in particular.
# def test_site_alts(client):
# rv = client.post('/search', data=dict(q='twitter official account'))
# assert b'twitter.com/Twitter' in rv.data
# client.post('/config', data=dict(alts=True))
# assert json.loads(client.get('/config').data)['alts']
# rv = client.post('/search', data=dict(q='twitter official account'))
# assert b'twitter.com/Twitter' not in rv.data
# assert b'nitter.net/Twitter' in rv.data
def test_recent_results(client):
times = {
'past year': 365,
@ -62,6 +79,7 @@ def test_recent_results(client):
try:
date = parse(date_span)
assert (current_date - date).days <= (num_days + 5) # Date can have a little bit of wiggle room
# Date can have a little bit of wiggle room
assert (current_date - date).days <= (num_days + 5)
except ParserError:
pass

View File

@ -1,15 +1,6 @@
from app.models.config import Config
import json
import random
demo_config = {
'near': random.choice(['Seattle', 'New York', 'San Francisco']),
'dark_mode': str(random.getrandbits(1)),
'nojs': str(random.getrandbits(1)),
'lang_interface': random.choice(Config.LANGUAGES)['value'],
'lang_search': random.choice(Config.LANGUAGES)['value'],
'ctry': random.choice(Config.COUNTRIES)['value']
}
from test.conftest import demo_config
def test_main(client):
@ -27,6 +18,16 @@ def test_feeling_lucky(client):
assert rv._status_code == 303
def test_ddg_bang(client):
rv = client.get('/search?q=!gh%20whoogle')
assert rv._status_code == 302
assert rv.headers.get('Location').startswith('https://github.com')
rv = client.get('/search?q=!w%20github')
assert rv._status_code == 302
assert rv.headers.get('Location').startswith('https://en.wikipedia.org')
def test_config(client):
rv = client.post('/config', data=demo_config)
assert rv._status_code == 302
@ -38,8 +39,14 @@ def test_config(client):
for key in demo_config.keys():
assert config[key] == demo_config[key]
# Test setting config via search
custom_config = '&dark=1&lang_interface=lang_en'
rv = client.get('/search?q=test' + custom_config)
assert rv._status_code == 200
assert custom_config.replace('&', '&amp;') in str(rv.data)
def test_opensearch(client):
rv = client.get('/opensearch.xml')
assert rv._status_code == 200
assert 'Whoogle' in str(rv.data)
assert '<ShortName>Whoogle</ShortName>' in str(rv.data)