diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 9da6d04..f91a033 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -7,6 +7,12 @@ assignees: '' --- + + **Describe the feature you'd like to see added** A short description of the feature, and what it would accomplish. diff --git a/.github/workflows/buildx.yml b/.github/workflows/buildx.yml index 47fc086..11de639 100644 --- a/.github/workflows/buildx.yml +++ b/.github/workflows/buildx.yml @@ -21,6 +21,8 @@ jobs: docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - name: build and push the image run: | + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + docker buildx ls docker buildx build --push \ --tag benbusby/whoogle-search:buildx-experimental \ --platform linux/amd64,linux/arm/v7,linux/arm64 . diff --git a/Dockerfile b/Dockerfile index 92c1b57..7cb8ebf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,18 +1,23 @@ -FROM python:3.8-slim +FROM python:3.8-slim as builder -WORKDIR /usr/src/app RUN apt-get update && apt-get install -y \ build-essential \ - libcurl4-openssl-dev \ - libssl-dev \ libxml2-dev \ libxslt-dev \ - libffi-dev \ - tor + libssl-dev \ + libffi-dev -COPY config/tor/torrc /etc/tor/torrc COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt + +RUN pip install --prefix /install --no-warn-script-location --no-cache-dir -r requirements.txt + +FROM python:3.8-slim + +RUN apt-get update && apt-get install -y \ + libcurl4-openssl-dev \ + tor \ + wget \ + && rm -rf /var/lib/apt/lists/* ARG config_dir=/config RUN mkdir -p $config_dir @@ -35,6 +40,10 @@ ENV WHOOGLE_PROXY_TYPE=$proxytype ARG proxyloc='' ENV WHOOGLE_PROXY_LOC=$proxyloc +ARG whoogle_dotenv='' +ENV WHOOGLE_DOTENV=$whoogle_dotenv + +ARG use_https=1 ENV HTTPS_ONLY=$use_https ARG whoogle_port=5000 @@ -45,10 +54,22 @@ ENV WHOOGLE_ALT_TW=$twitter_alt ARG youtube_alt='invidious.snopyta.org' ENV WHOOGLE_ALT_YT=$youtube_alt ARG instagram_alt='bibliogram.art/u' -ENV WHOOGLE_ALT_YT=$instagram_alt +ENV WHOOGLE_ALT_IG=$instagram_alt +ARG reddit_alt='libredd.it' +ENV WHOOGLE_ALT_RD=$reddit_alt -COPY . . +WORKDIR /whoogle + +COPY --from=builder /install /usr/local +COPY misc/tor/torrc /etc/tor/torrc +COPY misc/tor/start-tor.sh misc/tor/start-tor.sh +COPY app/ app/ +COPY run . +COPY whoogle.env . EXPOSE $EXPOSE_PORT -CMD config/tor/start-tor.sh & ./run +HEALTHCHECK --interval=5m --timeout=5s \ + CMD wget --no-verbose --tries=1 http://localhost:${EXPOSE_PORT}/ || exit 1 + +CMD misc/tor/start-tor.sh & ./run diff --git a/README.md b/README.md index e5c084a..83f11bf 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,8 @@ Contents 5. [Usage](#usage) 6. [Extra Steps](#extra-steps) 7. [FAQ](#faq) -8. [Screenshots](#screenshots) +8. [Public Instances](#public-instances) +9. [Screenshots](#screenshots) ## Features - No ads or sponsored content @@ -55,7 +56,7 @@ If using Heroku Quick Deploy, **you can skip this section**. There are a few different ways to begin using the app, depending on your preferences: ### A) [Heroku Quick Deploy](https://heroku.com/about) -[![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/benbusby/whoogle-search/tree/heroku-app) +[![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/benbusby/whoogle-search/tree/heroku-app-beta) *Note: Requires a (free) Heroku account* @@ -136,6 +137,9 @@ Description=Whoogle #Environment=WHOOGLE_ALT_TW=nitter.net #Environment=WHOOGLE_ALT_YT=invidious.snopyta.org #Environment=WHOOGLE_ALT_IG=bibliogram.art/u +#Environment=WHOOGLE_ALT_RD=libredd.it +# Load values from dotenv only +#Environment=WHOOGLE_DOTENV=1 Type=simple User=root WorkingDirectory= @@ -218,6 +222,9 @@ heroku open This series of commands can take a while, but once you run it once, you shouldn't have to run it again. The final command, `heroku open` will launch a tab in your web browser, where you can test out Whoogle and even [set it as your primary search engine](https://github.com/benbusby/whoogle#set-whoogle-as-your-primary-search-engine). You may also edit environment variables from your app’s Settings tab in the Heroku Dashboard. +#### Arch Linux & Arch-based Distributions +There is an [AUR package available](https://aur.archlinux.org/packages/whoogle-git/), as well as a pre-built and daily updated package available at [Chaotic-AUR](https://chaotic.cx). + #### Using your own server, or alternative container deployment There are other methods for deploying docker containers that are well outlined in [this article](https://rollout.io/blog/the-shortlist-of-docker-hosting/), but there are too many to describe set up for each here. Generally it should be about the same amount of effort as the Heroku deployment. @@ -228,21 +235,39 @@ Depending on your preferences, you can also deploy the app yourself on your own - A bit more experience or willingness to work through issues ## Environment Variables -There are a few optional environment variables available for customizing a Whoogle instance: +There are a few optional environment variables available for customizing a Whoogle instance. These can be set manually, or copied into `whoogle.env` and enabled by setting `WHOOGLE_DOTENV=1`. -| Variable | Description | -| ------------------ | -------------------------------------------------------------- | -| WHOOGLE_USER | The username for basic auth. WHOOGLE_PASS must also be set if used. | -| WHOOGLE_PASS | The password for basic auth. WHOOGLE_USER must also be set if used. | -| WHOOGLE_PROXY_USER | The username of the proxy server. | -| WHOOGLE_PROXY_PASS | The password of the proxy server. | -| WHOOGLE_PROXY_TYPE | The type of the proxy server. Can be "socks5", "socks4", or "http". | -| WHOOGLE_PROXY_LOC | The location of the proxy server (host or ip). | -| EXPOSE_PORT | The port where Whoogle will be exposed. | +| Variable | Description | +| ------------------ | ----------------------------------------------------------------------------------------- | +| WHOOGLE_DOTENV | Load environment variables in `whoogle.env` | +| WHOOGLE_USER | The username for basic auth. WHOOGLE_PASS must also be set if used. | +| WHOOGLE_PASS | The password for basic auth. WHOOGLE_USER must also be set if used. | +| WHOOGLE_PROXY_USER | The username of the proxy server. | +| WHOOGLE_PROXY_PASS | The password of the proxy server. | +| WHOOGLE_PROXY_TYPE | The type of the proxy server. Can be "socks5", "socks4", or "http". | +| WHOOGLE_PROXY_LOC | The location of the proxy server (host or ip). | +| EXPOSE_PORT | The port where Whoogle will be exposed. | | HTTPS_ONLY | Enforce HTTPS. (See [here](https://github.com/benbusby/whoogle-search#https-enforcement)) | -| WHOOGLE_ALT_TW | The twitter.com alternative to use when site alternatives are enabled in the config. | -| WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. | -| WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. | +| WHOOGLE_ALT_TW | The twitter.com alternative to use when site alternatives are enabled in the config. | +| WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. | +| WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. | +| WHOOGLE_ALT_RD | The reddit.com alternative to use when site alternatives are enabled in the config. | + +### Config Environment Variables +These environment variables allow setting default config values, but can be overwritten manually by using the home page config menu. These allow a shortcut for destroying/rebuilding an instance to the same config state every time. + +| Variable | Description | +| ----------------------- | --------------------------------------------------------------- | +| WHOOGLE_CONFIG_COUNTRY | Filter results by hosting country | +| WHOOGLE_CONFIG_LANGUAGE | Set interface and search result language | +| WHOOGLE_CONFIG_DARK | Enable dark theme | +| WHOOGLE_CONFIG_SAFE | Enable safe searches | +| WHOOGLE_CONFIG_ALTS | Use social media site alternatives (nitter, invidious, etc) | +| WHOOGLE_CONFIG_TOR | Use Tor routing (if available) | +| WHOOGLE_CONFIG_NEW_TAB | Always open results in new tab | +| WHOOGLE_CONFIG_GET_ONLY | Search using GET requests only | +| WHOOGLE_CONFIG_URL | The root url of the instance (`https:///`) | +| WHOOGLE_CONFIG_STYLE | The custom CSS to use for styling (must be single line) | ## Usage Same as most search engines, with the exception of filtering by time range. @@ -329,6 +354,16 @@ I'm a huge fan of Searx though and encourage anyone to use that instead if they A lot of the app currently piggybacks on Google's existing support for fetching results pages with Javascript disabled. To their credit, they've done an excellent job with styling pages, but it seems that the image results page - particularly on mobile - is a little rough. Moving forward, with enough interest, I'd like to transition to fetching the results and parsing them into a unique Whoogle-fied interface that I can style myself. +## Public Instances + +*Note: Use public instances at your own discretion. Maintainers of Whoogle do not personally validate the integrity of these instances, and popular public instances are more likely to be rate-limited or blocked.* + +- [https://whoogle.sdf.org](https://whoogle.sdf.org) +- [https://whoogle.himiko.cloud](https://whoogle.himiko.cloud) +- [https://whoogle.kavin.rocks](https://whoogle.kavin.rocks) or [http://whoogledq5f5wly5p4i2ohnvjwlihnlg4oajjum2oeddfwqdwupbuhqd.onion](http://whoogledq5f5wly5p4i2ohnvjwlihnlg4oajjum2oeddfwqdwupbuhqd.onion) +- [https://search.garudalinux.org](https://search.garudalinux.org) +- [https://whooglesearch.net/](https://whooglesearch.net/) + ## Screenshots #### Desktop ![Whoogle Desktop](docs/screenshot_desktop.jpg) diff --git a/app.json b/app.json index 2e70f01..eb80f95 100644 --- a/app.json +++ b/app.json @@ -47,18 +47,68 @@ }, "WHOOGLE_ALT_TW": { "description": "The site to use as a replacement for twitter.com when site alternatives are enabled in the config.", - "value": "", + "value": "nitter.net", "required": false }, "WHOOGLE_ALT_YT": { "description": "The site to use as a replacement for youtube.com when site alternatives are enabled in the config.", - "value": "", + "value": "invidious.snopyta.org", "required": false }, "WHOOGLE_ALT_IG": { "description": "The site to use as a replacement for instagram.com when site alternatives are enabled in the config.", - "value": "", + "value": "bibliogram.art/u", "required": false + }, + "WHOOGLE_ALT_RD": { + "description": "The site to use as a replacement for reddit.com when site alternatives are enabled in the config.", + "value": "libredd.it", + "required": false + }, + "WHOOGLE_CONFIG_COUNTRY": { + "description": "[CONFIG] The country to use for restricting search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/countries.json)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_LANGUAGE": { + "description": "[CONFIG] The language to use for search results and interface (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/languages.json)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_DARK": { + "description": "[CONFIG] Enable dark mode (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_SAFE": { + "description": "[CONFIG] Use safe mode for searches (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_ALTS": { + "description": "[CONFIG] Use social media alternatives (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_TOR": { + "description": "[CONFIG] Use Tor, if available (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_NEW_TAB": { + "description": "[CONFIG] Always open results in new tab (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_GET_ONLY": { + "description": "[CONFIG] Search using GET requests only (set to 1 or leave blank)", + "value": "", + "required": false + }, + "WHOOGLE_CONFIG_STYLE": { + "description": "[CONFIG] Custom CSS styling (paste in CSS or leave blank)", + "value": "", + "required": false } } } diff --git a/app/__init__.py b/app/__init__.py index 13350a1..67e6b76 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,30 +1,37 @@ from app.request import send_tor_signal -from app.utils.session_utils import generate_user_keys -from app.utils.gen_ddg_bangs import gen_bangs_json +from app.utils.session import generate_user_key +from app.utils.bangs import gen_bangs_json from flask import Flask from flask_session import Session import json import os from stem import Signal +from dotenv import load_dotenv app = Flask(__name__, static_folder=os.path.dirname( os.path.abspath(__file__)) + '/static') -app.user_elements = {} -app.default_key_set = generate_user_keys() + +# Load .env file if enabled +if os.getenv("WHOOGLE_DOTENV", ''): + dotenv_path = '../whoogle.env' + load_dotenv(os.path.join(os.path.dirname(os.path.abspath(__file__)), + dotenv_path)) + +app.default_key = generate_user_key() app.no_cookie_ips = [] app.config['SECRET_KEY'] = os.urandom(32) app.config['SESSION_TYPE'] = 'filesystem' -app.config['VERSION_NUMBER'] = '0.3.2' +app.config['VERSION_NUMBER'] = '0.4.0' app.config['APP_ROOT'] = os.getenv( 'APP_ROOT', os.path.dirname(os.path.abspath(__file__))) -app.config['LANGUAGES'] = json.load(open( - os.path.join(app.config['APP_ROOT'], 'misc/languages.json'))) -app.config['COUNTRIES'] = json.load(open( - os.path.join(app.config['APP_ROOT'], 'misc/countries.json'))) app.config['STATIC_FOLDER'] = os.getenv( 'STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) +app.config['LANGUAGES'] = json.load(open( + os.path.join(app.config['STATIC_FOLDER'], 'settings/languages.json'))) +app.config['COUNTRIES'] = json.load(open( + os.path.join(app.config['STATIC_FOLDER'], 'settings/countries.json'))) app.config['CONFIG_PATH'] = os.getenv( 'CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'config')) @@ -40,6 +47,14 @@ app.config['BANG_PATH'] = os.getenv( app.config['BANG_FILE'] = os.path.join( app.config['BANG_PATH'], 'bangs.json') +app.config['CSP'] = 'default-src \'none\';' \ + 'manifest-src \'self\';' \ + 'img-src \'self\';' \ + 'style-src \'self\' \'unsafe-inline\';' \ + 'script-src \'self\';' \ + 'media-src \'self\';' \ + 'connect-src \'self\';' \ + 'form-action \'self\';' if not os.path.exists(app.config['CONFIG_PATH']): os.makedirs(app.config['CONFIG_PATH']) diff --git a/app/filter.py b/app/filter.py index ccd6af3..8f457f3 100644 --- a/app/filter.py +++ b/app/filter.py @@ -1,14 +1,16 @@ from app.request import VALID_PARAMS -from app.utils.filter_utils import * -from bs4.element import ResultSet +from app.utils.results import * +from bs4 import BeautifulSoup +from bs4.element import ResultSet, Tag from cryptography.fernet import Fernet +from flask import render_template import re import urllib.parse as urlparse from urllib.parse import parse_qs class Filter: - def __init__(self, user_keys: dict, mobile=False, config=None): + def __init__(self, user_key: str, mobile=False, config=None) -> None: if config is None: config = {} @@ -18,7 +20,7 @@ class Filter: self.new_tab = config['new_tab'] if 'new_tab' in config else False self.alt_redirect = config['alts'] if 'alts' in config else False self.mobile = mobile - self.user_keys = user_keys + self.user_key = user_key self.main_divs = ResultSet('') self._elements = 0 @@ -29,7 +31,7 @@ class Filter: def elements(self): return self._elements - def reskin(self, page): + def reskin(self, page: str) -> str: # Aesthetic only re-skinning if self.dark: page = page.replace( @@ -39,22 +41,18 @@ class Filter: return page - def encrypt_path(self, msg, is_element=False): + def encrypt_path(self, path, is_element=False) -> str: # Encrypts path to avoid plaintext results in logs if is_element: # Element paths are encrypted separately from text, to allow key # regeneration once all items have been served to the user - enc_path = Fernet( - self.user_keys['element_key'] - ).encrypt(msg.encode()).decode() + enc_path = Fernet(self.user_key).encrypt(path.encode()).decode() self._elements += 1 return enc_path - return Fernet( - self.user_keys['text_key'] - ).encrypt(msg.encode()).decode() + return Fernet(self.user_key).encrypt(path.encode()).decode() - def clean(self, soup): + def clean(self, soup) -> BeautifulSoup: self.main_divs = soup.find('div', {'id': 'main'}) self.remove_ads() self.fix_question_section() @@ -90,7 +88,12 @@ class Filter: return soup - def remove_ads(self): + def remove_ads(self) -> None: + """Removes ads found in the list of search result divs + + Returns: + None (The soup object is modified directly) + """ if not self.main_divs: return @@ -99,7 +102,16 @@ class Filter: if has_ad_content(_.text)] _ = div.decompose() if len(div_ads) else None - def fix_question_section(self): + def fix_question_section(self) -> None: + """Collapses the "People Also Asked" section into a "details" element + + These sections are typically the only sections in the results page that + are structured as

Title

...
, so they are + extracted by checking all result divs for h2 children. + + Returns: + None (The soup object is modified directly) + """ if not self.main_divs: return @@ -126,30 +138,33 @@ class Filter: for question in questions: question['style'] = 'padding: 10px; font-style: italic;' - def update_element_src(self, element, mime): - element_src = element['src'] - if element_src.startswith('//'): - element_src = 'https:' + element_src - elif element_src.startswith(LOGO_URL): + def update_element_src(self, element: Tag, mime: str) -> None: + """Encrypts the original src of an element and rewrites the element src + to use the "/element?src=" pass-through. + + Returns: + None (The soup element is modified directly) + + """ + src = element['src'] + + if src.startswith('//'): + src = 'https:' + src + + if src.startswith(LOGO_URL): # Re-brand with Whoogle logo - element['src'] = 'static/img/logo.png' - element['style'] = 'height:40px;width:162px' + element.replace_with(BeautifulSoup(render_template('logo.html'))) return - elif element_src.startswith(GOOG_IMG): + elif src.startswith(GOOG_IMG) or GOOG_STATIC in src: element['src'] = BLANK_B64 return element['src'] = 'element?url=' + self.encrypt_path( - element_src, + src, is_element=True) + '&type=' + urlparse.quote(mime) - # FIXME: Non-mobile image results link to website instead of image - # if not self.mobile: - # img.append( - # BeautifulSoup(FULL_RES_IMG.format(element_src), - # 'html.parser')) - - def update_styling(self, soup): + def update_styling(self, soup) -> None: + """""" # Remove unnecessary button(s) for button in soup.find_all('button'): button.decompose() @@ -172,7 +187,17 @@ class Filter: except AttributeError: pass - def update_link(self, link): + def update_link(self, link: Tag) -> None: + """Update internal link paths with encrypted path, otherwise remove + unnecessary redirects and/or marketing params from the url + + Args: + link: A bs4 Tag element to inspect and update + + Returns: + None (the tag is updated directly) + + """ # Replace href with only the intended destination (no "utm" type tags) href = link['href'].replace('https://www.google.com', '') if 'advanced_search' in href or 'tbm=shop' in href: @@ -212,7 +237,7 @@ class Filter: # Add no-js option if self.nojs: - gen_nojs(link) + append_nojs(link) else: link['href'] = href diff --git a/app/models/config.py b/app/models/config.py index 3916be2..b2ebc57 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -1,17 +1,24 @@ +from flask import current_app +import os + + class Config: def __init__(self, **kwargs): - self.url = '' - self.lang_search = '' - self.lang_interface = '' - self.ctry = '' - self.safe = False - self.dark = False - self.nojs = False - self.tor = False - self.near = '' - self.alts = False - self.new_tab = False - self.get_only = False + app_config = current_app.config + self.url = os.getenv('WHOOGLE_CONFIG_URL', '') + self.lang_search = os.getenv('WHOOGLE_CONFIG_LANGUAGE', '') + self.lang_interface = os.getenv('WHOOGLE_CONFIG_LANGUAGE', '') + self.style = open(os.path.join(app_config['STATIC_FOLDER'], + 'css/variables.css')).read() + self.ctry = os.getenv('WHOOGLE_CONFIG_COUNTRY', '') + self.safe = bool(os.getenv('WHOOGLE_CONFIG_SAFE', False)) + self.dark = bool(os.getenv('WHOOGLE_CONFIG_DARK', False)) + self.alts = bool(os.getenv('WHOOGLE_CONFIG_ALTS', False)) + self.nojs = bool(os.getenv('WHOOGLE_CONFIG_NOJS', False)) + self.tor = bool(os.getenv('WHOOGLE_CONFIG_TOR', False)) + self.near = os.getenv('WHOOGLE_CONFIG_NEAR', '') + self.new_tab = bool(os.getenv('WHOOGLE_CONFIG_NEW_TAB', False)) + self.get_only = bool(os.getenv('WHOOGLE_CONFIG_GET_ONLY', False)) self.safe_keys = [ 'lang_search', 'lang_interface', @@ -20,6 +27,8 @@ class Config: ] for key, value in kwargs.items(): + if not value: + continue setattr(self, key, value) def __getitem__(self, name): diff --git a/app/request.py b/app/request.py index 446d844..4cd9fcf 100644 --- a/app/request.py +++ b/app/request.py @@ -23,16 +23,16 @@ class TorError(Exception): """Exception raised for errors in Tor requests. Attributes: - message -- a message describing the error that occurred - disable -- optionally disables Tor in the user config (note: + message: a message describing the error that occurred + disable: optionally disables Tor in the user config (note: this should only happen if the connection has been dropped altogether). """ - def __init__(self, message, disable=False): + def __init__(self, message, disable=False) -> None: self.message = message self.disable = disable - super().__init__(self.message) + super().__init__(message) def send_tor_signal(signal: Signal) -> bool: @@ -64,7 +64,7 @@ def gen_query(query, args, config, near_city=None) -> str: # Use :past(hour/day/week/month/year) if available # example search "new restaurants :past month" - sub_lang = '' + lang = '' if ':past' in query and 'tbs' not in args: time_range = str.strip(query.split(':past', 1)[-1]) param_dict['tbs'] = '&tbs=' + ('qdr:' + str.lower(time_range[0])) @@ -79,9 +79,10 @@ def gen_query(query, args, config, near_city=None) -> str: # Example: # &tbs=qdr:h,lr:lang_1pl # -- the lr param needs to be extracted and remove the leading '1' - sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _] - sub_lang = sub_lang[0][sub_lang[0].find('lr:') + - 3:len(sub_lang[0])] if len(sub_lang) > 0 else '' + result_params = [_ for _ in result_tbs.split(',') if 'lr:' in _] + if len(result_params) > 0: + result_param = result_params[0] + lang = result_param[result_param.find('lr:') + 3:len(result_param)] # Ensure search query is parsable query = urlparse.quote(query) @@ -103,11 +104,11 @@ def gen_query(query, args, config, near_city=None) -> str: if 'source' in args: param_dict['source'] = '&source=' + args.get('source') param_dict['lr'] = ('&lr=' + ''.join( - [_ for _ in sub_lang if not _.isdigit()] - )) if sub_lang else '' + [_ for _ in lang if not _.isdigit()] + )) if lang else '' else: param_dict['lr'] = ( - '&lr=' + config.lang_search + '&lr=' + config.lang_search ) if config.lang_search else '' # 'nfpr' defines the exclusion of results from an auto-corrected query @@ -116,7 +117,7 @@ def gen_query(query, args, config, near_city=None) -> str: param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else '' param_dict['hl'] = ( - '&hl=' + config.lang_interface.replace('lang_', '') + '&hl=' + config.lang_interface.replace('lang_', '') ) if config.lang_interface else '' param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off') @@ -133,9 +134,9 @@ class Request: search suggestions, and loading of external content (images, audio, etc). Attributes: - normal_ua -- the user's current user agent - root_path -- the root path of the whoogle instance - config -- the user's current whoogle configuration + normal_ua: the user's current user agent + root_path: the root path of the whoogle instance + config: the user's current whoogle configuration """ def __init__(self, normal_ua, root_path, config: Config): @@ -150,12 +151,12 @@ class Request: # Set up proxy, if previously configured if os.environ.get('WHOOGLE_PROXY_LOC'): auth_str = '' - if os.environ.get('WHOOGLE_PROXY_USER'): - auth_str = os.environ.get('WHOOGLE_PROXY_USER') + \ - ':' + os.environ.get('WHOOGLE_PROXY_PASS') + if os.environ.get('WHOOGLE_PROXY_USER', ''): + auth_str = os.environ.get('WHOOGLE_PROXY_USER', '') + \ + ':' + os.environ.get('WHOOGLE_PROXY_PASS', '') self.proxies = { - 'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' + - auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC'), + 'http': os.environ.get('WHOOGLE_PROXY_TYPE', '') + '://' + + auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC', ''), } self.proxies['https'] = self.proxies['http'].replace('http', 'https') diff --git a/app/routes.py b/app/routes.py index b175d77..3ae805e 100644 --- a/app/routes.py +++ b/app/routes.py @@ -16,8 +16,9 @@ from requests import exceptions from app import app from app.models.config import Config from app.request import Request, TorError -from app.utils.session_utils import valid_user_session -from app.utils.routing_utils import * +from app.utils.bangs import resolve_bang +from app.utils.session import valid_user_session +from app.utils.search import * # Load DDG bang json files only on init bang_json = json.load(open(app.config['BANG_FILE'])) @@ -53,18 +54,14 @@ def before_request_func(): # Generate session values for user if unavailable if not valid_user_session(session): session['config'] = json.load(open(app.config['DEFAULT_CONFIG'])) \ - if os.path.exists(app.config['DEFAULT_CONFIG']) else { - 'url': request.url_root} + if os.path.exists(app.config['DEFAULT_CONFIG']) else {} session['uuid'] = str(uuid.uuid4()) - session['fernet_keys'] = generate_user_keys(True) + session['key'] = generate_user_key(True) # Flag cookies as possibly disabled in order to prevent against # unnecessary session directory expansion g.cookies_disabled = True - if session['uuid'] not in app.user_elements: - app.user_elements.update({session['uuid']: 0}) - # Handle https upgrade if needs_https(request.url): return redirect( @@ -87,14 +84,7 @@ def before_request_func(): @app.after_request -def after_request_func(response): - if app.user_elements[session['uuid']] <= 0 and '/element' in request.url: - # Regenerate element key if all elements have been served to user - session['fernet_keys'][ - 'element_key'] = '' if not g.cookies_disabled else \ - app.default_key_set['element_key'] - app.user_elements[session['uuid']] = 0 - +def after_request_func(resp): # Check if address consistently has cookies blocked, # in which case start removing session files after creation. # @@ -108,7 +98,11 @@ def after_request_func(response): for key in session_list: session.pop(key) - return response + resp.headers['Content-Security-Policy'] = app.config['CSP'] + if os.environ.get('HTTPS_ONLY', False): + resp.headers['Content-Security-Policy'] += 'upgrade-insecure-requests' + + return resp @app.errorhandler(404) @@ -121,22 +115,26 @@ def unknown_page(e): @auth_required def index(): # Reset keys - session['fernet_keys'] = generate_user_keys(g.cookies_disabled) - error_message = session[ - 'error_message'] if 'error_message' in session else '' - session['error_message'] = '' + session['key'] = generate_user_key(g.cookies_disabled) + + # Redirect if an error was raised + if 'error_message' in session and session['error_message']: + error_message = session['error_message'] + session['error_message'] = '' + return render_template('error.html', error_message=error_message) return render_template('index.html', languages=app.config['LANGUAGES'], countries=app.config['COUNTRIES'], + logo=render_template( + 'logo.html', + config=g.user_config), config=g.user_config, - error_message=error_message, tor_available=int(os.environ.get('TOR_AVAILABLE')), version_number=app.config['VERSION_NUMBER']) @app.route('/opensearch.xml', methods=['GET']) -@auth_required def opensearch(): opensearch_url = g.app_location if opensearch_url.endswith('/'): @@ -188,19 +186,16 @@ def autocomplete(): @app.route('/search', methods=['GET', 'POST']) @auth_required def search(): - # Reset element counter - app.user_elements[session['uuid']] = 0 - # Update user config if specified in search args g.user_config = g.user_config.from_params(g.request_params) - search_util = RoutingUtils(request, g.user_config, session, - cookies_disabled=g.cookies_disabled) + search_util = Search(request, g.user_config, session, + cookies_disabled=g.cookies_disabled) query = search_util.new_search_query() - resolved_bangs = search_util.bang_operator(bang_json) - if resolved_bangs != '': - return redirect(resolved_bangs) + bang = resolve_bang(query=query, bangs_dict=bang_json) + if bang != '': + return redirect(bang) # Redirect to home if invalid/blank search if not query: @@ -208,7 +203,7 @@ def search(): # Generate response and number of external elements from the page try: - response, elements = search_util.generate_response() + response = search_util.generate_response() except TorError as e: session['error_message'] = e.message + ( "\\n\\nTor config is now disabled!" if e.disable else "") @@ -216,27 +211,27 @@ def search(): 'tor'] return redirect(url_for('.index')) - if search_util.feeling_lucky or elements < 0: + if search_util.feeling_lucky: return redirect(response, code=303) - # Keep count of external elements to fetch before - # the element key can be regenerated - app.user_elements[session['uuid']] = elements + # Return 503 if temporarily blocked by captcha + resp_code = 503 if has_captcha(str(response)) else 200 return render_template( 'display.html', query=urlparse.unquote(query), search_type=search_util.search_type, - dark_mode=g.user_config.dark, + config=g.user_config, response=response, version_number=app.config['VERSION_NUMBER'], search_header=(render_template( 'header.html', - dark_mode=g.user_config.dark, + config=g.user_config, + logo=render_template('logo.html'), query=urlparse.unquote(query), search_type=search_util.search_type, mobile=g.user_request.mobile) - if 'isch' not in search_util.search_type else '')) + if 'isch' not in search_util.search_type else '')), resp_code @app.route('/config', methods=['GET', 'POST', 'PUT']) @@ -287,7 +282,9 @@ def url(): if len(q) > 0 and 'http' in q: return redirect(q) else: - return render_template('error.html', query=q) + return render_template( + 'error.html', + error_message='Unable to resolve query: ' + q) @app.route('/imgres') @@ -299,13 +296,12 @@ def imgres(): @app.route('/element') @auth_required def element(): - cipher_suite = Fernet(session['fernet_keys']['element_key']) + cipher_suite = Fernet(session['key']) src_url = cipher_suite.decrypt(request.args.get('url').encode()).decode() src_type = request.args.get('type') try: file_data = g.user_request.send(base_url=src_url).content - app.user_elements[session['uuid']] -= 1 tmp_mem = io.BytesIO() tmp_mem.write(file_data) tmp_mem.seek(0) @@ -336,7 +332,7 @@ def window(): return render_template('display.html', response=results) -def run_app(): +def run_app() -> None: parser = argparse.ArgumentParser( description='Whoogle Search console runner') parser.add_argument( diff --git a/app/static/css/dark-theme.css b/app/static/css/dark-theme.css index 4bd6af1..720f27a 100644 --- a/app/static/css/dark-theme.css +++ b/app/static/css/dark-theme.css @@ -1,13 +1,17 @@ html { - background-color: #222 !important; + background: var(--whoogle-dark-page-bg) !important; } body { - background-color: #222 !important; + background: var(--whoogle-dark-page-bg) !important; } div { - color: #fff !important; + color: var(--whoogle-dark-text) !important; +} + +label { + color: var(--whoogle-dark-contrast-text) !important; } li a { @@ -15,43 +19,113 @@ li a { } li { - color: #fff !important; + color: var(--whoogle-dark-text) !important; +} + +textarea { + background: var(--whoogle-dark-page-bg) !important; + color: var(--whoogle-dark-text) !important; } a:visited h3 div { - color: #bbbbff !important; + color: var(--whoogle-dark-result-visited) !important; } a:link h3 div { - color: #4b8eea !important; + color: var(--whoogle-dark-result-title) !important; } a:link div { - color: #aaffaa !important; + color: var(--whoogle-dark-result-url) !important; } div span { - color: #bbb !important; + color: var(--whoogle-dark-secondary-text) !important; } input { - background-color: #111 !important; - color: #fff !important; + background-color: var(--whoogle-dark-page-bg) !important; + color: var(--whoogle-dark-text) !important; } -#search-bar { - color: #fff !important; - background-color: #222 !important; +select { + background: var(--whoogle-dark-page-bg) !important; + color: var(--whoogle-dark-text) !important; } .search-container { - background-color: #222 !important; + background-color: var(--whoogle-dark-page-bg) !important; } -.ZINbbc{ - background-color: #1a1a1a !important; +.ZINbbc { + background-color: var(--whoogle-dark-result-bg) !important; } -.bRsWnc{ - background-color: #1a1a1a !important; +.bRsWnc { + background-color: var(--whoogle-dark-result-bg) !important; +} + +#search-bar { + border: 2px solid var(--whoogle-dark-element-bg) !important; + color: var(--whoogle-dark-text) !important; +} + +#search-bar:focus { + color: var(--whoogle-dark-text) !important; +} + +#search-submit { + border: 1px solid var(--whoogle-dark-element-bg) !important; + background: var(--whoogle-dark-element-bg) !important; + color: var(--whoogle-dark-contrast-text) !important; +} + +.info-text { + color: var(--whoogle-dark-contrast-text) !important; + opacity: 75%; +} + +.collapsible { + color: var(--whoogle-dark-element-bg) !important; +} + +.collapsible:after { + color: var(--whoogle-dark-element-bg) !important; +} + +.active { + background-color: var(--whoogle-dark-element-bg) !important; + color: var(--whoogle-dark-contrast-text) !important; +} + +.content { + background-color: var(--whoogle-dark-element-bg) !important; + color: var(--whoogle-contrast-text) !important; +} + +.active:after { + color: var(--whoogle-dark-contrast-text); +} + +#gh-link { + color: var(--whoogle-dark-element-bg); +} + +.autocomplete-items { + border: 1px solid #685e79; +} + +.autocomplete-items div { + color: #fff; + background-color: #222; + border-bottom: 1px solid #242424; +} + +.autocomplete-items div:hover { + background-color: #404040; +} + +.autocomplete-active { + background-color: var(--whoogle-dark-element-bg) !important; + color: var(--whoogle-dark-contrast-text) !important; } diff --git a/app/static/css/header.css b/app/static/css/header.css index 3b24fdc..a7c8461 100644 --- a/app/static/css/header.css +++ b/app/static/css/header.css @@ -60,3 +60,9 @@ header { margin: 15px; display: block; } + + +#main>div:focus-within { + border-radius: 8px; + box-shadow: 0 0 6px 1px #2375e8; +} diff --git a/app/static/css/light-theme.css b/app/static/css/light-theme.css new file mode 100644 index 0000000..b4da052 --- /dev/null +++ b/app/static/css/light-theme.css @@ -0,0 +1,130 @@ +html { + background: var(--whoogle-page-bg) !important; +} + +body { + background: var(--whoogle-page-bg) !important; +} + +div { + color: var(--whoogle-text) !important; +} + +label { + color: var(--whoogle-contrast-text) !important; +} + +li a { + color: #4b8eaa !important; +} + +li { + color: var(--whoogle-text) !important; +} + +textarea { + background: var(--whoogle-page-bg) !important; + color: var(--whoogle-text) !important; +} + +select { + background: var(--whoogle-page-bg) !important; + color: var(--whoogle-text) !important; +} + +.ZINbbc { + background-color: var(--whoogle-result-bg) !important; +} + +.bRsWnc { + background-color: var(--whoogle-result-bg) !important; +} + +a:visited h3 div { + color: var(--whoogle-result-visited) !important; +} + +a:link h3 div { + color: var(--whoogle-result-title) !important; +} + +a:link div { + color: var(--whoogle-result-url) !important; +} + +div span { + color: var(--whoogle-secondary-text) !important; +} + +input { + background-color: var(--whoogle-page-bg) !important; + color: var(--whoogle-text) !important; +} + +#search-bar { + color: var(--whoogle-text) !important; + background-color: var(--whoogle-page-bg); +} + +.home-search { + border: 3px solid var(--whoogle-element-bg) !important; +} + +.search-container { + background-color: var(--whoogle-page-bg) !important; +} + +#search-submit { + border: 1px solid var(--whoogle-element-bg) !important; + background: var(--whoogle-element-bg) !important; + color: var(--whoogle-contrast-text) !important; +} + +.info-text { + color: var(--whoogle-contrast-text) !important; + opacity: 75%; +} + +.collapsible { + color: var(--whoogle-element-bg) !important; +} + +.collapsible:after { + color: var(--whoogle-element-bg) !important; +} + +.active { + background-color: var(--whoogle-element-bg) !important; + color: var(--whoogle-contrast-text) !important; +} + +.content { + background-color: var(--whoogle-element-bg) !important; + color: var(--whoogle-contrast-text) !important; +} + +.active:after { + color: var(--whoogle-contrast-text); +} + +#gh-link { + color: var(--whoogle-element-bg); +} + +.autocomplete-items { + border: 1px solid #d4d4d4; +} + +.autocomplete-items div { + background-color: #fff; + border-bottom: 1px solid #d4d4d4; +} + +.autocomplete-items div:hover { + background-color: #e9e9e9; +} + +.autocomplete-active { + background-color: var(--whoogle-element-bg) !important; + color: var(--whoogle-contrast-text) !important; +} diff --git a/app/static/css/logo.css b/app/static/css/logo.css new file mode 100644 index 0000000..6aebfa4 --- /dev/null +++ b/app/static/css/logo.css @@ -0,0 +1,17 @@ +.cls-1 { + fill: transparent; +} + +svg { + height: inherit; +} + +a { + height: inherit; +} + +@media (max-width: 1000px) { + svg { + margin-top: .7em; + } +} diff --git a/app/static/css/main.css b/app/static/css/main.css index 5b35bf6..0a1e9cd 100644 --- a/app/static/css/main.css +++ b/app/static/css/main.css @@ -9,7 +9,12 @@ body { padding-bottom: 10px; } +.logo-container { + max-height: 500px; +} + .search-container { + background: transparent !important; width: 80%; position: absolute; top: 50%; @@ -26,29 +31,21 @@ body { } #search-bar { + background: transparent !important; width: 100%; - border: 3px solid #685e79; padding: 5px; height: 40px; outline: none; font-size: 24px; - color: #685e79; border-radius: 10px 10px 0 0; max-width: 600px; background: rgba(0, 0, 0, 0); } -#search-bar:focus { - color: #685e79; -} - #search-submit { width: 100%; height: 40px; - border: 1px solid #685e79; - background: #685e79 !important; text-align: center; - color: #fff; cursor: pointer; font-size: 20px; align-content: center; @@ -70,7 +67,6 @@ button::-moz-focus-inner { .collapsible { outline: 0; background-color: rgba(0, 0, 0, 0); - color: #685e79; cursor: pointer; padding: 18px; width: 100%; @@ -81,14 +77,8 @@ button::-moz-focus-inner { border-radius: 10px 10px 0 0; } -.active { - background-color: #685e79; - color: white; -} - .collapsible:after { content: '\002B'; - color: #685e79; font-weight: bold; float: right; margin-left: 5px; @@ -96,7 +86,6 @@ button::-moz-focus-inner { .active:after { content: "\2212"; - color: white; } .content { @@ -104,8 +93,6 @@ button::-moz-focus-inner { max-height: 0; overflow: hidden; transition: max-height 0.2s ease-out; - background-color: #685e79; - color: white; border-radius: 0 0 10px 10px; } @@ -113,12 +100,6 @@ button::-moz-focus-inner { padding-bottom: 20px; } -.ua-span { - color: white; - -webkit-box-decoration-break: clone; - box-decoration-break: clone; -} - .hidden { display: none; } @@ -135,3 +116,49 @@ footer { font-style: italic; font-size: 12px; } + +#config-style { + resize: none; + overflow-y: scroll; + width: 100%; + height: 100px; +} + +.whoogle-logo { + display: none; +} + +.whoogle-svg { + width: 80%; + display: block; + margin: auto; + padding-bottom: 10px; +} + +.autocomplete { + position: relative; + display: inline-block; + width: 100%; +} + +.autocomplete-items { + position: absolute; + border-bottom: none; + border-top: none; + z-index: 99; + + /*position the autocomplete items to be the same width as the container:*/ + top: 100%; + left: 0; + right: 0; +} + +.autocomplete-items div { + padding: 10px; + cursor: pointer; +} + +details summary { + padding: 10px; + font-weight: bold; +} diff --git a/app/static/css/search-dark.css b/app/static/css/search-dark.css deleted file mode 100644 index efd923e..0000000 --- a/app/static/css/search-dark.css +++ /dev/null @@ -1,40 +0,0 @@ -.autocomplete { - position: relative; - display: inline-block; - width: 100%; -} - -.autocomplete-items { - position: absolute; - border: 1px solid #685e79; - border-bottom: none; - border-top: none; - z-index: 99; - - /*position the autocomplete items to be the same width as the container:*/ - top: 100%; - left: 0; - right: 0; -} - -.autocomplete-items div { - padding: 10px; - cursor: pointer; - color: #fff; - background-color: #222; - border-bottom: 1px solid #242424; -} - -.autocomplete-items div:hover { - background-color: #404040; -} - -.autocomplete-active { - background-color: #685e79 !important; - color: #ffffff; -} - -details summary { - padding: 10px; - font-weight: bold; -} diff --git a/app/static/css/search.css b/app/static/css/search.css index 155cfcf..e456218 100644 --- a/app/static/css/search.css +++ b/app/static/css/search.css @@ -6,7 +6,6 @@ .autocomplete-items { position: absolute; - border: 1px solid #d4d4d4; border-bottom: none; border-top: none; z-index: 99; @@ -20,17 +19,6 @@ .autocomplete-items div { padding: 10px; cursor: pointer; - background-color: #fff; - border-bottom: 1px solid #d4d4d4; -} - -.autocomplete-items div:hover { - background-color: #e9e9e9; -} - -.autocomplete-active { - background-color: #685e79 !important; - color: #ffffff; } details summary { diff --git a/app/static/css/variables.css b/app/static/css/variables.css new file mode 100644 index 0000000..76f94ac --- /dev/null +++ b/app/static/css/variables.css @@ -0,0 +1,26 @@ +/* Colors */ +:root { + /* LIGHT THEME COLORS */ + --whoogle-logo: #685e79; + --whoogle-page-bg: #ffffff; + --whoogle-element-bg: #685e79; + --whoogle-text: #000000; + --whoogle-contrast-text: #ffffff; + --whoogle-secondary-text: #70757a; + --whoogle-result-bg: #ffffff; + --whoogle-result-title: #1967d2; + --whoogle-result-url: #0d652d; + --whoogle-result-visited: #4b11a8; + + /* DARK THEME COLORS */ + --whoogle-dark-logo: #685e79; + --whoogle-dark-page-bg: #222222; + --whoogle-dark-element-bg: #685e79; + --whoogle-dark-text: #ffffff; + --whoogle-dark-contrast-text: #000000; + --whoogle-dark-secondary-text: #bbbbbb; + --whoogle-dark-result-bg: #000000; + --whoogle-dark-result-title: #1967d2; + --whoogle-dark-result-url: #4b11a8; + --whoogle-dark-result-visited: #bbbbff; +} diff --git a/app/static/img/favicon/manifest.json b/app/static/img/favicon/manifest.json index 013d4a6..b9a8f9d 100644 --- a/app/static/img/favicon/manifest.json +++ b/app/static/img/favicon/manifest.json @@ -1,41 +1,44 @@ { - "name": "App", + "name": "Whoogle Search", + "short_name": "Whoogle", + "display": "fullscreen", + "scope": "/", "icons": [ { - "src": "\/android-icon-36x36.png", + "src": "android-icon-36x36.png", "sizes": "36x36", "type": "image\/png", "density": "0.75" }, { - "src": "\/android-icon-48x48.png", + "src": "android-icon-48x48.png", "sizes": "48x48", "type": "image\/png", "density": "1.0" }, { - "src": "\/android-icon-72x72.png", + "src": "android-icon-72x72.png", "sizes": "72x72", "type": "image\/png", "density": "1.5" }, { - "src": "\/android-icon-96x96.png", + "src": "android-icon-96x96.png", "sizes": "96x96", "type": "image\/png", "density": "2.0" }, { - "src": "\/android-icon-144x144.png", + "src": "android-icon-144x144.png", "sizes": "144x144", "type": "image\/png", "density": "3.0" }, { - "src": "\/android-icon-192x192.png", + "src": "android-icon-192x192.png", "sizes": "192x192", "type": "image\/png", "density": "4.0" } ] -} \ No newline at end of file +} diff --git a/app/static/img/whoogle.svg b/app/static/img/whoogle.svg new file mode 100644 index 0000000..41a783e --- /dev/null +++ b/app/static/img/whoogle.svg @@ -0,0 +1 @@ + diff --git a/app/static/js/controller.js b/app/static/js/controller.js index 3ab8ca7..9dc88a2 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -1,14 +1,3 @@ -// Whoogle configurations that use boolean values and checkboxes -CONFIG_BOOLS = [ - "nojs", "dark", "safe", "alts", "new_tab", "get_only", "tor" -]; - -// Whoogle configurations that use string values and input fields -CONFIG_STRS = [ - "near", "url" -]; - - const setupSearchLayout = () => { // Setup search field const searchBar = document.getElementById("search-bar"); @@ -28,33 +17,6 @@ const setupSearchLayout = () => { }); }; -const fillConfigValues = () => { - // Request existing config info - let xhrGET = new XMLHttpRequest(); - xhrGET.open("GET", "config"); - xhrGET.onload = function() { - if (xhrGET.readyState === 4 && xhrGET.status !== 200) { - alert("Error loading Whoogle config"); - return; - } - - // Allow for updating/saving config values - let configSettings = JSON.parse(xhrGET.responseText); - - CONFIG_STRS.forEach(function(item) { - let configElement = document.getElementById("config-" + item.replace("_", "-")); - configElement.value = configSettings[item] ? configSettings[item] : ""; - }); - - CONFIG_BOOLS.forEach(function(item) { - let configElement = document.getElementById("config-" + item.replace("_", "-")); - configElement.checked = !!configSettings[item]; - }); - }; - - xhrGET.send(); -}; - const setupConfigLayout = () => { // Setup whoogle config const collapsible = document.getElementById("config-collapsible"); @@ -69,8 +31,6 @@ const setupConfigLayout = () => { content.classList.toggle("open"); }); - - fillConfigValues(); }; const loadConfig = event => { @@ -116,6 +76,9 @@ document.addEventListener("DOMContentLoaded", function() { setupSearchLayout(); setupConfigLayout(); + document.getElementById("config-load").addEventListener("click", loadConfig); + document.getElementById("config-save").addEventListener("click", saveConfig); + // Focusing on the search input field requires a delay for elements to finish // loading (seemingly only on FF) setTimeout(function() { document.getElementById("search-bar").focus(); }, 250); diff --git a/app/static/js/header.js b/app/static/js/header.js new file mode 100644 index 0000000..02d8581 --- /dev/null +++ b/app/static/js/header.js @@ -0,0 +1,11 @@ +document.addEventListener("DOMContentLoaded", () => { + const searchBar = document.getElementById("search-bar"); + + searchBar.addEventListener("keyup", function (event) { + if (event.keyCode !== 13) { + handleUserInput(searchBar); + } else { + document.getElementById("search-form").submit(); + } + }); +}); diff --git a/app/static/js/keyboard.js b/app/static/js/keyboard.js new file mode 100644 index 0000000..7fd05ef --- /dev/null +++ b/app/static/js/keyboard.js @@ -0,0 +1,44 @@ +(function () { + let searchBar, results; + const keymap = { + ArrowUp: goUp, + ArrowDown: goDown, + k: goUp, + j: goDown, + '/': focusSearch, + }; + let activeIdx = -1; + + document.addEventListener('DOMContentLoaded', () => { + searchBar = document.querySelector('#search-bar'); + results = document.querySelectorAll('#main>div>div>div>a'); + }); + + document.addEventListener('keydown', (e) => { + if (e.target.tagName === 'INPUT') return true; + if (typeof keymap[e.key] === 'function') { + e.preventDefault(); + keymap[e.key](); + } + }); + + function goUp () { + if (activeIdx > 0) focusResult(activeIdx - 1); + else focusSearch(); + } + + function goDown () { + if (activeIdx < results.length - 1) focusResult(activeIdx + 1); + } + + function focusResult (idx) { + activeIdx = idx; + results[activeIdx].scrollIntoView({ behavior: 'smooth', block: 'center', inline: 'nearest' }); + results[activeIdx].focus(); + } + + function focusSearch () { + activeIdx = -1; + searchBar.focus(); + } +}()); diff --git a/app/static/js/utils.js b/app/static/js/utils.js index 775e072..56e052f 100644 --- a/app/static/js/utils.js +++ b/app/static/js/utils.js @@ -28,7 +28,7 @@ const checkForTracking = () => { /^[0-9]{15}$/ ] } - } + }; // Creates a link to a UPS/USPS/FedEx tracking page const createTrackingLink = href => { @@ -37,7 +37,7 @@ const checkForTracking = () => { link.innerHTML = "View Tracking Info"; link.href = href; mainDiv.prepend(link); - } + }; // Compares the query against a set of regex patterns // for tracking numbers @@ -48,12 +48,12 @@ const checkForTracking = () => { return true; } }); - } + }; for (const key of Object.keys(matchTracking)) { compareQuery(matchTracking[key]); } -} +}; document.addEventListener("DOMContentLoaded", function() { checkForTracking(); diff --git a/app/misc/countries.json b/app/static/settings/countries.json similarity index 100% rename from app/misc/countries.json rename to app/static/settings/countries.json diff --git a/app/misc/languages.json b/app/static/settings/languages.json similarity index 100% rename from app/misc/languages.json rename to app/static/settings/languages.json diff --git a/app/templates/display.html b/app/templates/display.html index f1dc572..4eb0512 100644 --- a/app/templates/display.html +++ b/app/templates/display.html @@ -5,13 +5,11 @@ - - - + + - {% if dark_mode %} - - {% endif %} + + {{ query }} - Whoogle Search @@ -19,9 +17,12 @@ {{ response|safe }} + + + diff --git a/app/templates/error.html b/app/templates/error.html index 9546e23..efa3f79 100644 --- a/app/templates/error.html +++ b/app/templates/error.html @@ -1,6 +1,6 @@

Error


- Error parsing "{{ query }}" + Error: "{{ error_message|safe }}"

Return Home diff --git a/app/templates/header.html b/app/templates/header.html index 7ae980f..5973274 100644 --- a/app/templates/header.html +++ b/app/templates/header.html @@ -4,15 +4,17 @@
@@ -26,7 +28,9 @@
@@ -35,9 +39,9 @@
+ style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important; + color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }}; + border: {{ '2px solid var(--whoogle-dark-element-bg)' if config.dark else '' }}; border-radius: 8px;">
@@ -48,14 +52,4 @@
{% endif %} - + diff --git a/app/templates/index.html b/app/templates/index.html index 766fab1..c49943e 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -21,33 +21,28 @@ - + - {% if config.dark %} - - {% endif %} + + Whoogle Search -
- +
+ {{ logo|safe }} +
- +
@@ -57,7 +52,7 @@
-
+
— Note: If enabled, a website will only appear in the results if it is *hosted* in the selected country.
-
+
-
+
-
+
- +
-
+
- +
-
+
- +
-
+
- +
-
+
- -
— Replaces Twitter/YouTube/Instagram links - with Nitter/Invidious/Bibliogram links.
+ +
— Replaces Twitter/YouTube/Instagram/Reddit links + with Nitter/Invidious/Bibliogram/Libreddit links.
-
+
- +
-
+
- +
-
+
- +
-
+
- + +
+
+ +
-   +     - +
@@ -147,7 +146,7 @@ diff --git a/app/templates/logo.html b/app/templates/logo.html new file mode 100644 index 0000000..bbd99b6 --- /dev/null +++ b/app/templates/logo.html @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/app/utils/bangs.py b/app/utils/bangs.py new file mode 100644 index 0000000..56daf4f --- /dev/null +++ b/app/utils/bangs.py @@ -0,0 +1,61 @@ +import json +import requests + +DDG_BANGS = 'https://duckduckgo.com/bang.v255.js' + + +def gen_bangs_json(bangs_file: str) -> None: + """Generates a json file from the DDG bangs list + + Args: + bangs_file: The str path to the new DDG bangs json file + + Returns: + None + + """ + try: + # Request full list from DDG + r = requests.get(DDG_BANGS) + r.raise_for_status() + except requests.exceptions.HTTPError as err: + raise SystemExit(err) + + # Convert to json + data = json.loads(r.text) + + # Set up a json object (with better formatting) for all available bangs + bangs_data = {} + + for row in data: + bang_command = '!' + row['t'] + bangs_data[bang_command] = { + 'url': row['u'].replace('{{{s}}}', '{}'), + 'suggestion': bang_command + ' (' + row['s'] + ')' + } + + json.dump(bangs_data, open(bangs_file, 'w')) + + +def resolve_bang(query: str, bangs_dict: dict) -> str: + """Transform's a user's query to a bang search, if an operator is found + + Args: + query: The search query + bangs_dict: The dict of available bang operators, with corresponding + format string search URLs + (i.e. "!w": "https://en.wikipedia.org...?search={}") + + Returns: + str: A formatted redirect for a bang search, or an empty str if there + wasn't a match or didn't contain a bang operator + + """ + split_query = query.split(' ') + for operator in bangs_dict.keys(): + if operator not in split_query: + continue + + return bangs_dict[operator]['url'].format( + query.replace(operator, '').strip()) + return '' diff --git a/app/utils/gen_ddg_bangs.py b/app/utils/gen_ddg_bangs.py deleted file mode 100644 index 0ed3953..0000000 --- a/app/utils/gen_ddg_bangs.py +++ /dev/null @@ -1,26 +0,0 @@ -import json -import requests - - -def gen_bangs_json(bangs_file): - # Request list - try: - r = requests.get('https://duckduckgo.com/bang.v255.js') - r.raise_for_status() - except requests.exceptions.HTTPError as err: - raise SystemExit(err) - - # Convert to json - data = json.loads(r.text) - - # Set up a json object (with better formatting) for all available bangs - bangs_data = {} - - for row in data: - bang_command = '!' + row['t'] - bangs_data[bang_command] = { - 'url': row['u'].replace('{{{s}}}', '{}'), - 'suggestion': bang_command + ' (' + row['s'] + ')' - } - - json.dump(bangs_data, open(bangs_file, 'w')) diff --git a/app/utils/filter_utils.py b/app/utils/results.py similarity index 50% rename from app/utils/filter_utils.py rename to app/utils/results.py index d1a2604..7db53ed 100644 --- a/app/utils/filter_utils.py +++ b/app/utils/results.py @@ -3,14 +3,17 @@ import os import urllib.parse as urlparse from urllib.parse import parse_qs + SKIP_ARGS = ['ref_src', 'utm'] -FULL_RES_IMG = '
Full Image' +SKIP_PREFIX = ['//www.', '//mobile.', '//m.'] +GOOG_STATIC = 'www.gstatic.com' GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' LOGO_URL = GOOG_IMG + '_desk' BLANK_B64 = ('data:image/png;base64,' 'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw' 'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC') + # Ad keywords BLACKLIST = [ 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', @@ -22,24 +25,54 @@ BLACKLIST = [ SITE_ALTS = { 'twitter.com': os.getenv('WHOOGLE_ALT_TW', 'nitter.net'), 'youtube.com': os.getenv('WHOOGLE_ALT_YT', 'invidious.snopyta.org'), - 'instagram.com': os.getenv('WHOOGLE_ALT_IG', 'bibliogram.art/u') + 'instagram.com': os.getenv('WHOOGLE_ALT_IG', 'bibliogram.art/u'), + 'reddit.com': os.getenv('WHOOGLE_ALT_RD', 'libredd.it') } -def has_ad_content(element: str): - return element.upper() in (value.upper() for value in BLACKLIST) \ - or 'ⓘ' in element +def has_ad_content(element: str) -> bool: + """Inspects an HTML element for ad related content + + Args: + element: The HTML element to inspect + + Returns: + bool: True/False for the element containing an ad + + """ + return (element.upper() in (value.upper() for value in BLACKLIST) + or 'ⓘ' in element) -def get_first_link(soup): +def get_first_link(soup: BeautifulSoup) -> str: + """Retrieves the first result link from the query response + + Args: + soup: The BeautifulSoup response body + + Returns: + str: A str link to the first result + + """ # Replace hrefs with only the intended destination (no "utm" type tags) for a in soup.find_all('a', href=True): # Return the first search result URL if 'url?q=' in a['href']: return filter_link_args(a['href']) + return '' -def get_site_alt(link: str): +def get_site_alt(link: str) -> str: + """Returns an alternative to a particular site, if one is configured + + Args: + link: A string result URL to check against the SITE_ALTS map + + Returns: + str: An updated (or ignored) result link + + """ + for site_key in SITE_ALTS.keys(): if site_key not in link: continue @@ -47,16 +80,28 @@ def get_site_alt(link: str): link = link.replace(site_key, SITE_ALTS[site_key]) break - return link.replace('www.', '').replace('//m.', '//') + for prefix in SKIP_PREFIX: + link = link.replace(prefix, '//') + + return link -def filter_link_args(query_link): - parsed_link = urlparse.urlparse(query_link) +def filter_link_args(link: str) -> str: + """Filters out unnecessary URL args from a result link + + Args: + link: The string result link to check for extraneous URL params + + Returns: + str: An updated (or ignored) result link + + """ + parsed_link = urlparse.urlparse(link) link_args = parse_qs(parsed_link.query) safe_args = {} if len(link_args) == 0 and len(parsed_link) > 0: - return query_link + return link for arg in link_args.keys(): if arg in SKIP_ARGS: @@ -65,19 +110,28 @@ def filter_link_args(query_link): safe_args[arg] = link_args[arg] # Remove original link query and replace with filtered args - query_link = query_link.replace(parsed_link.query, '') + link = link.replace(parsed_link.query, '') if len(safe_args) > 0: - query_link = query_link + urlparse.urlencode(safe_args, doseq=True) + link = link + urlparse.urlencode(safe_args, doseq=True) else: - query_link = query_link.replace('?', '') + link = link.replace('?', '') - return query_link + return link -def gen_nojs(sibling): +def append_nojs(result: BeautifulSoup) -> None: + """Appends a no-Javascript alternative for a search result + + Args: + result: The search result to append a no-JS link to + + Returns: + None + + """ nojs_link = BeautifulSoup(features='html.parser').new_tag('a') - nojs_link['href'] = '/window?location=' + sibling['href'] + nojs_link['href'] = '/window?location=' + result['href'] nojs_link['style'] = 'display:block;width:100%;' nojs_link.string = 'NoJS Link: ' + nojs_link['href'] - sibling.append(BeautifulSoup('


', 'html.parser')) - sibling.append(nojs_link) + result.append(BeautifulSoup('


', 'html.parser')) + result.append(nojs_link) diff --git a/app/utils/routing_utils.py b/app/utils/search.py similarity index 51% rename from app/utils/routing_utils.py rename to app/utils/search.py index 55a6253..b71e6dd 100644 --- a/app/utils/routing_utils.py +++ b/app/utils/search.py @@ -1,5 +1,5 @@ from app.filter import Filter, get_first_link -from app.utils.session_utils import generate_user_keys +from app.utils.session import generate_user_key from app.request import gen_query from bs4 import BeautifulSoup as bsoup from cryptography.fernet import Fernet, InvalidToken @@ -8,17 +8,51 @@ from typing import Any, Tuple import os TOR_BANNER = '

You are using Tor


' +CAPTCHA = 'div class="g-recaptcha"' def needs_https(url: str) -> bool: - https_only = os.getenv('HTTPS_ONLY', False) + """Checks if the current instance needs to be upgraded to HTTPS + + Note that all Heroku instances are available by default over HTTPS, but + do not automatically set up a redirect when visited over HTTP. + + Args: + url: The instance url + + Returns: + bool: True/False representing the need to upgrade + + """ + https_only = bool(os.getenv('HTTPS_ONLY', 0)) is_heroku = url.endswith('.herokuapp.com') is_http = url.startswith('http://') return (is_heroku and is_http) or (https_only and is_http) -class RoutingUtils: +def has_captcha(results: str) -> bool: + """Checks to see if the search results are blocked by a captcha + + Args: + results: The search page html as a string + + Returns: + bool: True/False indicating if a captcha element was found + + """ + return CAPTCHA in results + + +class Search: + """Search query preprocessor - used before submitting the query or + redirecting to another site + + Attributes: + request: the incoming flask request + config: the current user config settings + session: the flask user session + """ def __init__(self, request, config, session, cookies_disabled=False): method = request.method self.request_params = request.args if method == 'GET' else request.form @@ -31,23 +65,28 @@ class RoutingUtils: self.search_type = self.request_params.get( 'tbm') if 'tbm' in self.request_params else '' - def __getitem__(self, name): + def __getitem__(self, name) -> Any: return getattr(self, name) - def __setitem__(self, name, value): + def __setitem__(self, name, value) -> None: return setattr(self, name, value) - def __delitem__(self, name): + def __delitem__(self, name) -> None: return delattr(self, name) - def __contains__(self, name): + def __contains__(self, name) -> bool: return hasattr(self, name) def new_search_query(self) -> str: - # Generate a new element key each time a new search is performed - self.session['fernet_keys']['element_key'] = generate_user_keys( - cookies_disabled=self.cookies_disabled)['element_key'] + """Parses a plaintext query into a valid string for submission + Also decrypts the query string, if encrypted (in the case of + paginated results). + + Returns: + str: A valid query string + + """ q = self.request_params.get('q') if q is None or len(q) == 0: @@ -55,53 +94,45 @@ class RoutingUtils: else: # Attempt to decrypt if this is an internal link try: - q = Fernet( - self.session['fernet_keys']['text_key'] - ).decrypt(q.encode()).decode() + q = Fernet(self.session['key']).decrypt(q.encode()).decode() except InvalidToken: pass - # Reset text key - self.session['fernet_keys']['text_key'] = generate_user_keys( - cookies_disabled=self.cookies_disabled)['text_key'] - # Strip leading '! ' for "feeling lucky" queries self.feeling_lucky = q.startswith('! ') self.query = q[2:] if self.feeling_lucky else q return self.query - def bang_operator(self, bangs_dict: dict) -> str: - for operator in bangs_dict.keys(): - if self.query.split(' ')[0] != operator: - continue + def generate_response(self) -> str: + """Generates a response for the user's query - return bangs_dict[operator]['url'].format( - self.query.replace(operator, '').strip()) - return '' + Returns: + str: A string response to the search query, in the form of a URL + or string representation of HTML content. - def generate_response(self) -> Tuple[Any, int]: + """ mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent - content_filter = Filter( - self.session['fernet_keys'], - mobile=mobile, - config=self.config) - full_query = gen_query( - self.query, - self.request_params, - self.config, - content_filter.near) + content_filter = Filter(self.session['key'], + mobile=mobile, + config=self.config) + full_query = gen_query(self.query, + self.request_params, + self.config, + content_filter.near) get_body = g.user_request.send(query=full_query) # Produce cleanable html soup from response html_soup = bsoup(content_filter.reskin(get_body.text), 'html.parser') - html_soup.insert( - 0, - bsoup(TOR_BANNER, 'html.parser') - if g.user_request.tor_valid else bsoup('', 'html.parser')) + + # Indicate whether or not a Tor connection is active + tor_banner = bsoup('', 'html.parser') + if g.user_request.tor_valid: + tor_banner = bsoup(TOR_BANNER, 'html.parser') + html_soup.insert(0, tor_banner) if self.feeling_lucky: - return get_first_link(html_soup), 1 + return get_first_link(html_soup) else: formatted_results = content_filter.clean(html_soup) @@ -116,4 +147,4 @@ class RoutingUtils: continue link['href'] += param_str - return formatted_results, content_filter.elements + return str(formatted_results) diff --git a/app/utils/session.py b/app/utils/session.py new file mode 100644 index 0000000..0dc8dc5 --- /dev/null +++ b/app/utils/session.py @@ -0,0 +1,42 @@ +from cryptography.fernet import Fernet +from flask import current_app as app + +REQUIRED_SESSION_VALUES = ['uuid', 'config', 'key'] + + +def generate_user_key(cookies_disabled=False) -> bytes: + """Generates a key for encrypting searches and element URLs + + Args: + cookies_disabled: Flag for whether or not cookies are disabled by the + user. If so, the user can only use the default key + generated on app init for queries. + + Returns: + str: A unique Fernet key + + """ + if cookies_disabled: + return app.default_key + + # Generate/regenerate unique key per user + return Fernet.generate_key() + + +def valid_user_session(session: dict) -> bool: + """Validates the current user session + + Args: + session: The current Flask user session + + Returns: + bool: True/False indicating that all required session values are + available + + """ + # Generate secret key for user if unavailable + for value in REQUIRED_SESSION_VALUES: + if value not in session: + return False + + return True diff --git a/app/utils/session_utils.py b/app/utils/session_utils.py deleted file mode 100644 index f959abe..0000000 --- a/app/utils/session_utils.py +++ /dev/null @@ -1,24 +0,0 @@ -from cryptography.fernet import Fernet -from flask import current_app as app - -REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys'] - - -def generate_user_keys(cookies_disabled=False) -> dict: - if cookies_disabled: - return app.default_key_set - - # Generate/regenerate unique key per user - return { - 'element_key': Fernet.generate_key(), - 'text_key': Fernet.generate_key() - } - - -def valid_user_session(session): - # Generate secret key for user if unavailable - for value in REQUIRED_SESSION_VALUES: - if value not in session: - return False - - return True diff --git a/docker-compose.yml b/docker-compose.yml index 75ad171..f2e8797 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,9 +1,26 @@ -version: "3" +# cant use mem_limit in a 3.x docker-compose file in non swarm mode +# see https://github.com/docker/compose/issues/4513 +version: "2.4" services: whoogle-search: image: benbusby/whoogle-search container_name: whoogle-search + restart: on-failure:5 + pids_limit: 50 + mem_limit: 256mb + memswap_limit: 256mb + # user debian-tor from tor package + user: '102' + security_opt: + - no-new-privileges + cap_drop: + - ALL + read_only: true + tmpfs: + - /config/:size=10M,uid=102,gid=102,mode=1700 + - /var/lib/tor/:size=10M,uid=102,gid=102,mode=1700 + - /run/tor/:size=1M,uid=102,gid=102,mode=1700 #environment: # Uncomment to configure environment variables # Basic auth configuration, uncomment to enable #- WHOOGLE_USER= @@ -19,6 +36,9 @@ services: #- WHOOGLE_ALT_TW=nitter.net #- WHOOGLE_ALT_YT=invidious.snopyta.org #- WHOOGLE_ALT_IG=bibliogram.art/u + #- WHOOGLE_ALT_RD=libredd.it + # Load environment variables from whoogle.env + #- WHOOGLE_DOTENV=1 ports: - 5000:5000 restart: unless-stopped diff --git a/misc/heroku-regen.sh b/misc/heroku-regen.sh new file mode 100755 index 0000000..198edcf --- /dev/null +++ b/misc/heroku-regen.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Assumes this is being executed from a session that has already logged +# into Heroku with "heroku login -i" beforehand. +# +# You can set this up to run every night when you aren't using the +# instance with a cronjob. For example: +# 0 3 * * * /home/pi/whoogle-search/config/heroku-regen.sh + +HEROKU_CLI_SITE="https://devcenter.heroku.com/articles/heroku-cli" + +if ! [[ -x "$(command -v heroku)" ]]; then + echo "Must have heroku cli installed: $HEROKU_CLI_SITE" + exit 1 +fi + +cd "$(builtin cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)/../" + +if [[ $# -ne 1 ]]; then + echo -e "Must provide the name of the Whoogle instance to regenerate" + exit 1 +fi + +APP_NAME="$1" + +heroku apps:destroy "$APP_NAME" --confirm "$APP_NAME" +heroku apps:create "$APP_NAME" +heroku container:login +heroku container:push web +heroku container:release web diff --git a/config/tor/start-tor.sh b/misc/tor/start-tor.sh similarity index 100% rename from config/tor/start-tor.sh rename to misc/tor/start-tor.sh diff --git a/config/tor/torrc b/misc/tor/torrc similarity index 100% rename from config/tor/torrc rename to misc/tor/torrc diff --git a/requirements.txt b/requirements.txt index c015b64..0e4cdf0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,12 +6,12 @@ certifi==2020.4.5.1 cffi==1.13.2 chardet==3.0.4 Click==7.0 -cryptography==3.2 +cryptography==3.3.2 Flask==1.1.1 Flask-Session==0.3.2 idna==2.9 itsdangerous==1.1.0 -Jinja2==2.10.3 +Jinja2==2.11.3 MarkupSafe==1.1.1 more-itertools==8.3.0 packaging==20.4 @@ -31,3 +31,4 @@ urllib3==1.25.9 waitress==1.4.3 wcwidth==0.1.9 Werkzeug==0.16.0 +python-dotenv==0.16.0 diff --git a/run b/run index b2eedbd..4ace594 100755 --- a/run +++ b/run @@ -12,12 +12,14 @@ SUBDIR="${1:-app}" export APP_ROOT="$SCRIPT_DIR/$SUBDIR" export STATIC_FOLDER="$APP_ROOT/static" -mkdir -p "$STATIC_FOLDER" - # Check for regular vs test run if [[ "$SUBDIR" == "test" ]]; then + # Set up static files for testing + rm -rf "$STATIC_FOLDER" + ln -s "$SCRIPT_DIR/app/static" "$STATIC_FOLDER" pytest -sv else + mkdir -p "$STATIC_FOLDER" python3 -um app \ --host "${ADDRESS:-0.0.0.0}" \ --port "${PORT:-"${EXPOSE_PORT:-5000}"}" diff --git a/setup.py b/setup.py index fdd7684..2bd2d4a 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setuptools.setup( author='Ben Busby', author_email='benbusby@protonmail.com', name='whoogle-search', - version='0.3.2', + version='0.4.0', include_package_data=True, install_requires=requirements, description='Self-hosted, ad-free, privacy-respecting metasearch engine', diff --git a/test/conftest.py b/test/conftest.py index 4b19636..34c92c4 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,5 +1,5 @@ from app import app -from app.utils.session_utils import generate_user_keys +from app.utils.session import generate_user_key import pytest import random @@ -18,6 +18,6 @@ def client(): with app.test_client() as client: with client.session_transaction() as session: session['uuid'] = 'test' - session['fernet_keys'] = generate_user_keys() + session['key'] = generate_user_key() session['config'] = {} yield client diff --git a/test/misc b/test/misc deleted file mode 120000 index 4b8163d..0000000 --- a/test/misc +++ /dev/null @@ -1 +0,0 @@ -../app/misc/ \ No newline at end of file diff --git a/test/test_misc.py b/test/test_misc.py index 92fcadb..65a4ed0 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -1,20 +1,26 @@ -from app.utils.session_utils import generate_user_keys, valid_user_session +from cryptography.fernet import Fernet + +from app.utils.session import generate_user_key, valid_user_session def test_generate_user_keys(): - keys = generate_user_keys() - assert 'text_key' in keys - assert 'element_key' in keys - assert keys['text_key'] not in keys['element_key'] + key = generate_user_key() + assert Fernet(key) + assert generate_user_key() != key def test_valid_session(client): - assert not valid_user_session({'fernet_keys': '', 'config': {}}) + assert not valid_user_session({'key': '', 'config': {}}) with client.session_transaction() as session: assert valid_user_session(session) -def test_request_key_generation(client): +def test_query_decryption(client): + # FIXME: Handle decryption errors in search.py and rewrite test + # This previously was used to test swapping decryption keys between + # queries. While this worked in theory and usually didn't cause problems, + # they were tied to session IDs and those are really unreliable (meaning + # that occasionally page navigation would break). rv = client.get('/') cookie = rv.headers['Set-Cookie'] @@ -23,11 +29,9 @@ def test_request_key_generation(client): with client.session_transaction() as session: assert valid_user_session(session) - text_key = session['fernet_keys']['text_key'] rv = client.get('/search?q=test+2', headers={'Cookie': cookie}) assert rv._status_code == 200 with client.session_transaction() as session: assert valid_user_session(session) - assert text_key not in session['fernet_keys']['text_key'] diff --git a/test/test_results.py b/test/test_results.py index 74af29c..38b9936 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -1,13 +1,13 @@ from bs4 import BeautifulSoup from app.filter import Filter -from app.utils.session_utils import generate_user_keys +from app.utils.session import generate_user_key from datetime import datetime from dateutil.parser import * def get_search_results(data): - secret_key = generate_user_keys() - soup = Filter(user_keys=secret_key).clean( + secret_key = generate_user_key() + soup = Filter(user_key=secret_key).clean( BeautifulSoup(data, 'html.parser')) main_divs = soup.find('div', {'id': 'main'}) diff --git a/test/test_routes.py b/test/test_routes.py index e3ba084..fda189d 100644 --- a/test/test_routes.py +++ b/test/test_routes.py @@ -19,14 +19,21 @@ def test_feeling_lucky(client): def test_ddg_bang(client): + # Bang at beginning of query rv = client.get('/search?q=!gh%20whoogle') assert rv._status_code == 302 assert rv.headers.get('Location').startswith('https://github.com') - rv = client.get('/search?q=!w%20github') + # Move bang to end of query + rv = client.get('/search?q=github%20!w') assert rv._status_code == 302 assert rv.headers.get('Location').startswith('https://en.wikipedia.org') + # Move bang to middle of query + rv = client.get('/search?q=big%20!r%20chungus') + assert rv._status_code == 302 + assert rv.headers.get('Location').startswith('https://www.reddit.com') + def test_config(client): rv = client.post('/config', data=demo_config) diff --git a/whoogle.env b/whoogle.env new file mode 100644 index 0000000..87a77a2 --- /dev/null +++ b/whoogle.env @@ -0,0 +1,25 @@ +# You can set Whoogle environment variables here, but must set +# WHOOGLE_DOTENV=1 in your deployment to enable these values + +#WHOOGLE_ALT_TW=nitter.net +#WHOOGLE_ALT_YT=invidious.snopyta.org +#WHOOGLE_ALT_IG=bibliogram.art/u +#WHOOGLE_ALT_RD=libredd.it +#WHOOGLE_USER="" +#WHOOGLE_PASS="" +#WHOOGLE_PROXY_USER="" +#WHOOGLE_PROXY_PASS="" +#WHOOGLE_PROXY_TYPE="" +#WHOOGLE_PROXY_LOC="" +#HTTPS_ONLY=1 + +#WHOOGLE_CONFIG_COUNTRY=countryUK # See app/static/settings/countries.json for values +#WHOOGLE_CONFIG_LANGUAGE=lang_en # See app/static/settings/languages.json for values +#WHOOGLE_CONFIG_DARK=1 # Dark mode +#WHOOGLE_CONFIG_SAFE=1 # Safe searches +#WHOOGLE_CONFIG_ALTS=1 # Use social media site alternatives +#WHOOGLE_CONFIG_TOR=1 # Use Tor if available +#WHOOGLE_CONFIG_NEW_TAB=1 # Open results in new tab +#WHOOGLE_CONFIG_GET_ONLY=1 # Search using GET requests only +#WHOOGLE_CONFIG_URL=https:/// +#WHOOGLE_CONFIG_STYLE=":root { /* LIGHT THEME COLORS */ --whoogle-background: #d8dee9; --whoogle-accent: #2e3440; --whoogle-text: #3B4252; --whoogle-contrast-text: #eceff4; --whoogle-secondary-text: #70757a; --whoogle-result-bg: #fff; --whoogle-result-title: #4c566a; --whoogle-result-url: #81a1c1; --whoogle-result-visited: #a3be8c; /* DARK THEME COLORS */ --whoogle-dark-background: #222; --whoogle-dark-accent: #685e79; --whoogle-dark-text: #fff; --whoogle-dark-contrast-text: #000; --whoogle-dark-secondary-text: #bbb; --whoogle-dark-result-bg: #000; --whoogle-dark-result-title: #1967d2; --whoogle-dark-result-url: #4b11a8; --whoogle-dark-result-visited: #bbbbff; }"