diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..e674a22 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,9 @@ +# These are supported funding model platforms +github: benbusby +ko_fi: benbusby +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username +custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/workflows/buildx.yml b/.github/workflows/buildx.yml new file mode 100644 index 0000000..47fc086 --- /dev/null +++ b/.github/workflows/buildx.yml @@ -0,0 +1,26 @@ +name: buildx + +on: + push: + branches: develop + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: checkout code + uses: actions/checkout@v2 + - name: install buildx + id: buildx + uses: crazy-max/ghaction-docker-buildx@v1 + with: + version: latest + - name: log in to docker hub + run: | + echo "${{ secrets.DOCKER_PASSWORD }}" | \ + docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin + - name: build and push the image + run: | + docker buildx build --push \ + --tag benbusby/whoogle-search:buildx-experimental \ + --platform linux/amd64,linux/arm/v7,linux/arm64 . diff --git a/.github/workflows/pep8.yml b/.github/workflows/pep8.yml new file mode 100644 index 0000000..26bcc20 --- /dev/null +++ b/.github/workflows/pep8.yml @@ -0,0 +1,22 @@ +name: pep8 + +on: + push + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pycodestyle + - name: Run pycodestyle + run: | + pycodestyle --show-source --show-pep8 app/* + pycodestyle --show-source --show-pep8 test/* diff --git a/.gitignore b/.gitignore index bbffdb4..caa4595 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ test/static flask_session/ app/static/config app/static/custom_config +app/static/bangs # pip stuff build/ diff --git a/Dockerfile b/Dockerfile index 455169c..c69cfdf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,16 @@ FROM python:3.8-slim WORKDIR /usr/src/app -RUN apt-get update && apt-get install -y build-essential libcurl4-openssl-dev libssl-dev +RUN apt-get update && apt-get install -y \ + build-essential \ + libcurl4-openssl-dev \ + libssl-dev \ + libxml2-dev \ + libxslt-dev \ + libffi-dev \ + tor + +COPY misc/tor/torrc /etc/tor/torrc COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt @@ -17,13 +26,29 @@ ENV WHOOGLE_USER=$username ARG password='' ENV WHOOGLE_PASS=$password +ARG proxyuser='' +ENV WHOOGLE_PROXY_USER=$proxyuser +ARG proxypass='' +ENV WHOOGLE_PROXY_PASS=$proxypass +ARG proxytype='' +ENV WHOOGLE_PROXY_TYPE=$proxytype +ARG proxyloc='' +ENV WHOOGLE_PROXY_LOC=$proxyloc + ENV HTTPS_ONLY=$use_https ARG whoogle_port=5000 ENV EXPOSE_PORT=$whoogle_port +ARG twitter_alt='nitter.net' +ENV WHOOGLE_ALT_TW=$twitter_alt +ARG youtube_alt='invidious.snopyta.org' +ENV WHOOGLE_ALT_YT=$youtube_alt +ARG instagram_alt='bibliogram.art/u' +ENV WHOOGLE_ALT_YT=$instagram_alt + COPY . . EXPOSE $EXPOSE_PORT -CMD ["./run"] +CMD misc/tor/start-tor.sh & ./run diff --git a/README.md b/README.md index 3e9c823..e5c084a 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ -# Whoogle Search +![Whoogle Search](docs/banner.png) [![Latest Release](https://img.shields.io/github/v/release/benbusby/whoogle-search)](https://github.com/benbusby/shoogle/releases) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Build Status](https://travis-ci.com/benbusby/whoogle-search.svg?branch=master)](https://travis-ci.com/benbusby/whoogle-search) +[![pep8](https://github.com/benbusby/whoogle-search/workflows/pep8/badge.svg)](https://github.com/benbusby/whoogle-search/actions?query=workflow%3Apep8) [![codebeat badge](https://codebeat.co/badges/e96cada2-fb6f-4528-8285-7d72abd74e8d)](https://codebeat.co/projects/github-com-benbusby-shoogle-master) [![Docker Pulls](https://img.shields.io/docker/pulls/benbusby/whoogle-search)](https://hub.docker.com/r/benbusby/whoogle-search) -[![Gitter](https://img.shields.io/gitter/room/benbusby/whoogle-search)](https://gitter.im/whoogle-search/community) Get Google search results, but without any ads, javascript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile. @@ -13,10 +13,11 @@ Contents 1. [Features](#features) 2. [Dependencies](#dependencies) 3. [Install/Deploy](#install) -4. [Usage](#usage) -5. [Extra Steps](#extra-steps) -6. [FAQ](#faq) -7. [Screenshots](#screenshots) +4. [Environment Variables](#environment-variables) +5. [Usage](#usage) +6. [Extra Steps](#extra-steps) +7. [FAQ](#faq) +8. [Screenshots](#screenshots) ## Features - No ads or sponsored content @@ -26,16 +27,18 @@ Contents - No AMP links - No URL tracking tags (i.e. utm=%s) - No referrer header +- Tor and HTTP/SOCKS proxy support - Autocomplete/search suggestions - POST request search and suggestion queries (when possible) - View images at full res without site redirect (currently mobile only) - Dark mode - Randomly generated User Agent - Easy to install/deploy +- DDG-style bang (i.e. `! `) searches - Optional location-based searching (i.e. results near \) - Optional NoJS mode to disable all Javascript in results -*If deployed to a remote server +*If deployed to a remote server, or configured to send requests through a VPN, Tor, proxy, etc. ## Dependencies If using Heroku Quick Deploy, **you can skip this section**. @@ -65,7 +68,7 @@ Provides: [![Run on Repl.it](https://repl.it/badge/github/benbusby/whoogle-search)](https://repl.it/github/benbusby/whoogle-search) Provides: -- Free deployment of app (can be ran without account) +- Free deployment of app - Free HTTPS url (https://\.\\.repl\.co) - Supports custom domains - Downtime after periods of inactivity \([solution 1](https://repl.it/talk/ask/use-this-pingmat1replco-just-enter/28821/101298), [solution 2](https://repl.it/talk/learn/How-to-use-and-setup-UptimeRobot/9003)\) @@ -96,6 +99,7 @@ optional arguments: --debug Activates debug mode for the server (default False) --https-only Enforces HTTPS redirects for all requests (default False) ``` +See the [available environment variables](#environment-variables) for additional configuration. ### E) Manual Clone the repo and run the following commands to start the app in a local-only environment: @@ -108,6 +112,7 @@ source venv/bin/activate pip install -r requirements.txt ./run ``` +See the [available environment variables](#environment-variables) for additional configuration. #### systemd Configuration After building the virtual environment, you can add the following to `/lib/systemd/system/whoogle.service` to set up a Whoogle Search systemd service: @@ -117,6 +122,20 @@ After building the virtual environment, you can add the following to `/lib/syste Description=Whoogle [Service] +# Basic auth configuration, uncomment to enable +#Environment=WHOOGLE_USER= +#Environment=WHOOGLE_PASS= +# Proxy configuration, uncomment to enable +#Environment=WHOOGLE_PROXY_USER= +#Environment=WHOOGLE_PROXY_PASS= +#Environment=WHOOGLE_PROXY_TYPE= +# Site alternative configurations, uncomment to enable +# Note: If not set, the feature will still be available +# with default values. +#Environment=WHOOGLE_ALT_TW=nitter.net +#Environment=WHOOGLE_ALT_YT=invidious.snopyta.org +#Environment=WHOOGLE_ALT_IG=bibliogram.art/u Type=simple User=root WorkingDirectory= @@ -143,6 +162,9 @@ sudo systemctl start whoogle 2. Clone and deploy the docker app using a method below: #### Docker CLI + +***Note:** For ARM machines, use the `buildx-experimental` Docker tag.* + Through Docker Hub: ```bash docker pull benbusby/whoogle-search @@ -166,6 +188,19 @@ docker build --tag whoogle-search:1.0 . docker run --publish 5000:5000 --detach --name whoogle-search whoogle-search:1.0 ``` +Optionally, you can also enable some of the following environment variables to further customize your instance: + +```bash +docker run --publish 5000:5000 --detach --name whoogle-search \ + -e WHOOGLE_USER=username \ + -e WHOOGLE_PASS=password \ + -e WHOOGLE_PROXY_USER=username \ + -e WHOOGLE_PROXY_PASS=password \ + -e WHOOGLE_PROXY_TYPE=socks5 \ + -e WHOOGLE_PROXY_LOC=ip \ + whoogle-search:1.0 +``` + And kill with: `docker rm --force whoogle-search` #### Using [Heroku CLI](https://devcenter.heroku.com/articles/heroku-cli) @@ -181,6 +216,7 @@ heroku open ``` This series of commands can take a while, but once you run it once, you shouldn't have to run it again. The final command, `heroku open` will launch a tab in your web browser, where you can test out Whoogle and even [set it as your primary search engine](https://github.com/benbusby/whoogle#set-whoogle-as-your-primary-search-engine). +You may also edit environment variables from your app’s Settings tab in the Heroku Dashboard. #### Using your own server, or alternative container deployment There are other methods for deploying docker containers that are well outlined in [this article](https://rollout.io/blog/the-shortlist-of-docker-hosting/), but there are too many to describe set up for each here. Generally it should be about the same amount of effort as the Heroku deployment. @@ -191,6 +227,23 @@ Depending on your preferences, you can also deploy the app yourself on your own - SSL certificates (free through [Let's Encrypt](https://letsencrypt.org/getting-started/)) - A bit more experience or willingness to work through issues +## Environment Variables +There are a few optional environment variables available for customizing a Whoogle instance: + +| Variable | Description | +| ------------------ | -------------------------------------------------------------- | +| WHOOGLE_USER | The username for basic auth. WHOOGLE_PASS must also be set if used. | +| WHOOGLE_PASS | The password for basic auth. WHOOGLE_USER must also be set if used. | +| WHOOGLE_PROXY_USER | The username of the proxy server. | +| WHOOGLE_PROXY_PASS | The password of the proxy server. | +| WHOOGLE_PROXY_TYPE | The type of the proxy server. Can be "socks5", "socks4", or "http". | +| WHOOGLE_PROXY_LOC | The location of the proxy server (host or ip). | +| EXPOSE_PORT | The port where Whoogle will be exposed. | +| HTTPS_ONLY | Enforce HTTPS. (See [here](https://github.com/benbusby/whoogle-search#https-enforcement)) | +| WHOOGLE_ALT_TW | The twitter.com alternative to use when site alternatives are enabled in the config. | +| WHOOGLE_ALT_YT | The youtube.com alternative to use when site alternatives are enabled in the config. | +| WHOOGLE_ALT_IG | The instagram.com alternative to use when site alternatives are enabled in the config. | + ## Usage Same as most search engines, with the exception of filtering by time range. @@ -256,7 +309,8 @@ Only needed if your setup requires Flask to redirect to HTTPS on its own -- gene Note: You should have your own domain name and [an https certificate](https://letsencrypt.org/getting-started/) in order for this to work properly. - Heroku: Ensure that the `Root URL` configuration on the home page begins with `https://` and not `http://` -- Docker: Add `--build-arg use_https=1` to your run command +- Docker build: Add `--build-arg use_https=1` to your run command +- Docker image: Set the environment variable HTTPS_ONLY=1 - Pip/Pipx: Add the `--https-only` flag to the end of the `whoogle-search` command - Default `run` script: Modify the script locally to include the `--https-only` flag at the end of the python run command @@ -277,7 +331,7 @@ A lot of the app currently piggybacks on Google's existing support for fetching ## Screenshots #### Desktop -![Whoogle Desktop](app/static/img/docs/screenshot_desktop.jpg) +![Whoogle Desktop](docs/screenshot_desktop.jpg) #### Mobile -![Whoogle Mobile](app/static/img/docs/screenshot_mobile.jpg) +![Whoogle Mobile](docs/screenshot_mobile.jpg) diff --git a/app.json b/app.json index 76b58a1..2e70f01 100644 --- a/app.json +++ b/app.json @@ -1,8 +1,64 @@ { - "name": "Whoogle Search", - "description": "A lightweight, privacy-oriented, containerized Google search proxy for desktop/mobile that removes Javascript, AMP links, tracking, and ads/sponsored content", - "repository": "https://github.com/benbusby/whoogle-search", - "logo": "https://raw.githubusercontent.com/benbusby/whoogle-search/master/app/static/img/favicon/ms-icon-150x150.png", - "keywords": ["search", "metasearch", "flask", "docker", "heroku", "adblock", "degoogle", "privacy"], - "stack": "container" + "name": "Whoogle Search", + "description": "A lightweight, privacy-oriented, containerized Google search proxy for desktop/mobile that removes Javascript, AMP links, tracking, and ads/sponsored content", + "repository": "https://github.com/benbusby/whoogle-search", + "logo": "https://raw.githubusercontent.com/benbusby/whoogle-search/master/app/static/img/favicon/ms-icon-150x150.png", + "keywords": [ + "search", + "metasearch", + "flask", + "docker", + "heroku", + "adblock", + "degoogle", + "privacy" + ], + "stack": "container", + "env": { + "WHOOGLE_USER": { + "description": "The username for basic auth. WHOOGLE_PASS must also be set if used. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PASS": { + "description": "The password for basic auth. WHOOGLE_USER must also be set if used. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_USER": { + "description": "The username of the proxy server. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_PASS": { + "description": "The password of the proxy server. Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_TYPE": { + "description": "The type of the proxy server. For example \"socks5\". Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_PROXY_LOC": { + "description": "The location of the proxy server (host or ip). Leave empty to disable.", + "value": "", + "required": false + }, + "WHOOGLE_ALT_TW": { + "description": "The site to use as a replacement for twitter.com when site alternatives are enabled in the config.", + "value": "", + "required": false + }, + "WHOOGLE_ALT_YT": { + "description": "The site to use as a replacement for youtube.com when site alternatives are enabled in the config.", + "value": "", + "required": false + }, + "WHOOGLE_ALT_IG": { + "description": "The site to use as a replacement for instagram.com when site alternatives are enabled in the config.", + "value": "", + "required": false + } + } } diff --git a/app/__init__.py b/app/__init__.py index 8293c44..3d6d6db 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,20 +1,45 @@ +from app.request import send_tor_signal from app.utils.session_utils import generate_user_keys +from app.utils.gen_ddg_bangs import gen_bangs_json from flask import Flask from flask_session import Session +import json import os +from stem import Signal -app = Flask(__name__, static_folder=os.path.dirname(os.path.abspath(__file__)) + '/static') +app = Flask(__name__, static_folder=os.path.dirname( + os.path.abspath(__file__)) + '/static') app.user_elements = {} app.default_key_set = generate_user_keys() app.no_cookie_ips = [] app.config['SECRET_KEY'] = os.urandom(32) app.config['SESSION_TYPE'] = 'filesystem' -app.config['VERSION_NUMBER'] = '0.2.1' -app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) -app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) -app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'config')) -app.config['DEFAULT_CONFIG'] = os.path.join(app.config['CONFIG_PATH'], 'config.json') -app.config['SESSION_FILE_DIR'] = os.path.join(app.config['CONFIG_PATH'], 'session') +app.config['VERSION_NUMBER'] = '0.3.0' +app.config['APP_ROOT'] = os.getenv( + 'APP_ROOT', + os.path.dirname(os.path.abspath(__file__))) +app.config['LANGUAGES'] = json.load(open( + os.path.join(app.config['APP_ROOT'], '../misc/languages.json'))) +app.config['COUNTRIES'] = json.load(open( + os.path.join(app.config['APP_ROOT'], '../misc/countries.json'))) +app.config['STATIC_FOLDER'] = os.getenv( + 'STATIC_FOLDER', + os.path.join(app.config['APP_ROOT'], 'static')) +app.config['CONFIG_PATH'] = os.getenv( + 'CONFIG_VOLUME', + os.path.join(app.config['STATIC_FOLDER'], 'config')) +app.config['DEFAULT_CONFIG'] = os.path.join( + app.config['CONFIG_PATH'], + 'config.json') +app.config['SESSION_FILE_DIR'] = os.path.join( + app.config['CONFIG_PATH'], + 'session') +app.config['BANG_PATH'] = os.getenv( + 'CONFIG_VOLUME', + os.path.join(app.config['STATIC_FOLDER'], 'bangs')) +app.config['BANG_FILE'] = os.path.join( + app.config['BANG_PATH'], + 'bangs.json') if not os.path.exists(app.config['CONFIG_PATH']): os.makedirs(app.config['CONFIG_PATH']) @@ -22,6 +47,15 @@ if not os.path.exists(app.config['CONFIG_PATH']): if not os.path.exists(app.config['SESSION_FILE_DIR']): os.makedirs(app.config['SESSION_FILE_DIR']) +# Generate DDG bang filter, and create path if it doesn't exist yet +if not os.path.exists(app.config['BANG_PATH']): + os.makedirs(app.config['BANG_PATH']) +if not os.path.exists(app.config['BANG_FILE']): + gen_bangs_json(app.config['BANG_FILE']) + Session(app) -from app import routes +# Attempt to acquire tor identity, to determine if Tor config is available +send_tor_signal(Signal.HEARTBEAT) + +from app import routes # noqa diff --git a/app/filter.py b/app/filter.py index 71ac763..d3b69bc 100644 --- a/app/filter.py +++ b/app/filter.py @@ -32,20 +32,27 @@ class Filter: def reskin(self, page): # Aesthetic only re-skinning if self.dark: - page = page.replace('fff', '000').replace('202124', 'ddd').replace('1967D2', '3b85ea') + page = page.replace( + 'fff', '000').replace( + '202124', 'ddd').replace( + '1967D2', '3b85ea') return page def encrypt_path(self, msg, is_element=False): # Encrypts path to avoid plaintext results in logs if is_element: - # Element paths are tracked differently in order for the element key to be regenerated - # once all elements have been loaded - enc_path = Fernet(self.user_keys['element_key']).encrypt(msg.encode()).decode() + # Element paths are encrypted separately from text, to allow key + # regeneration once all items have been served to the user + enc_path = Fernet( + self.user_keys['element_key'] + ).encrypt(msg.encode()).decode() self._elements += 1 return enc_path - return Fernet(self.user_keys['text_key']).encrypt(msg.encode()).decode() + return Fernet( + self.user_keys['text_key'] + ).encrypt(msg.encode()).decode() def clean(self, soup): self.main_divs = soup.find('div', {'id': 'main'}) @@ -75,7 +82,8 @@ class Filter: footer = soup.find('footer') if footer: # Remove divs that have multiple links beyond just page navigation - [_.decompose() for _ in footer.find_all('div', recursive=False) if len(_.find_all('a', href=True)) > 2] + [_.decompose() for _ in footer.find_all('div', recursive=False) + if len(_.find_all('a', href=True)) > 3] header = soup.find('header') if header: @@ -88,16 +96,34 @@ class Filter: return for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]: - has_ad = len([_ for _ in div.find_all('span', recursive=True) if has_ad_content(_.text)]) - _ = div.decompose() if has_ad else None + div_ads = [_ for _ in div.find_all('span', recursive=True) + if has_ad_content(_.text)] + _ = div.decompose() if len(div_ads) else None def fix_question_section(self): if not self.main_divs: return - question_divs = [_ for _ in self.main_divs.find_all('div', recursive=False) if len(_.find_all('h2')) > 0] + question_divs = [_ for _ in self.main_divs.find_all( + 'div', recursive=False + ) if len(_.find_all('h2')) > 0] + + if len(question_divs) == 0: + return + + # Wrap section in details element to allow collapse/expand + details = BeautifulSoup(features='html.parser').new_tag('details') + summary = BeautifulSoup(features='html.parser').new_tag('summary') + summary.string = question_divs[0].find('h2').text + question_divs[0].find('h2').decompose() + details.append(summary) + question_divs[0].wrap(details) + for question_div in question_divs: - questions = [_ for _ in question_div.find_all('div', recursive=True) if _.text.endswith('?')] + questions = [_ for _ in question_div.find_all( + 'div', recursive=True + ) if _.text.endswith('?')] + for question in questions: question['style'] = 'padding: 10px; font-style: italic;' @@ -107,18 +133,22 @@ class Filter: element_src = 'https:' + element_src elif element_src.startswith(LOGO_URL): # Re-brand with Whoogle logo - element['src'] = '/static/img/logo.png' + element['src'] = 'static/img/logo.png' element['style'] = 'height:40px;width:162px' return elif element_src.startswith(GOOG_IMG): element['src'] = BLANK_B64 return - element['src'] = '/element?url=' + self.encrypt_path(element_src, is_element=True) + \ - '&type=' + urlparse.quote(mime) - # TODO: Non-mobile image results link to website instead of image + element['src'] = 'element?url=' + self.encrypt_path( + element_src, + is_element=True) + '&type=' + urlparse.quote(mime) + + # FIXME: Non-mobile image results link to website instead of image # if not self.mobile: - # img.append(BeautifulSoup(FULL_RES_IMG.format(element_src), 'html.parser')) + # img.append( + # BeautifulSoup(FULL_RES_IMG.format(element_src), + # 'html.parser')) def update_styling(self, soup): # Remove unnecessary button(s) @@ -132,8 +162,9 @@ class Filter: # Update logo logo = soup.find('a', {'class': 'l'}) if logo and self.mobile: - logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; ' \ - 'font-size:18px; ' + logo['style'] = ('display:flex; justify-content:center; ' + 'align-items:center; color:#685e79; ' + 'font-size:18px; ') # Fix search bar length on mobile try: @@ -145,8 +176,8 @@ class Filter: def update_link(self, link): # Replace href with only the intended destination (no "utm" type tags) href = link['href'].replace('https://www.google.com', '') - if '/advanced_search' in href or 'tbm=shop' in href: - # TODO: The "Shopping" tab requires further filtering (see #136) + if 'advanced_search' in href or 'tbm=shop' in href: + # FIXME: The "Shopping" tab requires further filtering (see #136) # Temporarily removing all links to that tab for now. link.decompose() return @@ -154,20 +185,26 @@ class Filter: link['target'] = '_blank' result_link = urlparse.urlparse(href) - query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' + query_link = parse_qs( + result_link.query + )['q'][0] if '?q=' in href else '' if query_link.startswith('/'): - # Internal google links (i.e. mail, maps, etc) should still be forwarded to Google + # Internal google links (i.e. mail, maps, etc) should still + # be forwarded to Google link['href'] = 'https://google.com' + query_link elif '/search?q=' in href: - # "li:1" implies the query should be interpreted verbatim, so we wrap it in double quotes + # "li:1" implies the query should be interpreted verbatim, + # which is accomplished by wrapping the query in double quotes if 'li:1' in href: query_link = '"' + query_link + '"' - new_search = '/search?q=' + self.encrypt_path(query_link) + new_search = 'search?q=' + self.encrypt_path(query_link) query_params = parse_qs(urlparse.urlparse(href).query) for param in VALID_PARAMS: - param_val = query_params[param][0] if param in query_params else '' + if param not in query_params: + continue + param_val = query_params[param][0] new_search += '&' + param + '=' + param_val link['href'] = new_search elif 'url?q=' in href: @@ -182,9 +219,11 @@ class Filter: # Replace link location if "alts" config is enabled if self.alt_redirect: - # Search and replace all link descriptions with alternative location + # Search and replace all link descriptions + # with alternative location link['href'] = get_site_alt(link['href']) - link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys()))) + link_desc = link.find_all( + text=re.compile('|'.join(SITE_ALTS.keys()))) if len(link_desc) == 0: return diff --git a/app/models/config.py b/app/models/config.py index 2fb4088..3916be2 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -1,302 +1,4 @@ class Config: - # Derived from here: - # https://sites.google.com/site/tomihasa/google-language-codes#searchlanguage - LANGUAGES = [ - {'name': 'Default (none specified)', 'value': ''}, - {'name': 'English', 'value': 'lang_en'}, - {'name': 'Afrikaans', 'value': 'lang_af'}, - {'name': 'Arabic', 'value': 'lang_ar'}, - {'name': 'Armenian', 'value': 'lang_hy'}, - {'name': 'Belarusian', 'value': 'lang_be'}, - {'name': 'Bulgarian', 'value': 'lang_bg'}, - {'name': 'Catalan', 'value': 'lang_ca'}, - {'name': 'Chinese (Simplified)', 'value': 'lang_zh-CN'}, - {'name': 'Chinese (Traditional)', 'value': 'lang_zh-TW'}, - {'name': 'Croatian', 'value': 'lang_hr'}, - {'name': 'Czech', 'value': 'lang_cs'}, - {'name': 'Danish', 'value': 'lang_da'}, - {'name': 'Dutch', 'value': 'lang_nl'}, - {'name': 'Esperanto', 'value': 'lang_eo'}, - {'name': 'Estonian', 'value': 'lang_et'}, - {'name': 'Filipino', 'value': 'lang_tl'}, - {'name': 'Finnish', 'value': 'lang_fi'}, - {'name': 'French', 'value': 'lang_fr'}, - {'name': 'German', 'value': 'lang_de'}, - {'name': 'Greek', 'value': 'lang_el'}, - {'name': 'Hebrew', 'value': 'lang_iw'}, - {'name': 'Hindi', 'value': 'lang_hi'}, - {'name': 'Hungarian', 'value': 'lang_hu'}, - {'name': 'Icelandic', 'value': 'lang_is'}, - {'name': 'Indonesian', 'value': 'lang_id'}, - {'name': 'Italian', 'value': 'lang_it'}, - {'name': 'Japanese', 'value': 'lang_ja'}, - {'name': 'Korean', 'value': 'lang_ko'}, - {'name': 'Latvian', 'value': 'lang_lv'}, - {'name': 'Lithuanian', 'value': 'lang_lt'}, - {'name': 'Norwegian', 'value': 'lang_no'}, - {'name': 'Persian', 'value': 'lang_fa'}, - {'name': 'Polish', 'value': 'lang_pl'}, - {'name': 'Portuguese', 'value': 'lang_pt'}, - {'name': 'Romanian', 'value': 'lang_ro'}, - {'name': 'Russian', 'value': 'lang_ru'}, - {'name': 'Serbian', 'value': 'lang_sr'}, - {'name': 'Slovak', 'value': 'lang_sk'}, - {'name': 'Slovenian', 'value': 'lang_sl'}, - {'name': 'Spanish', 'value': 'lang_es'}, - {'name': 'Swahili', 'value': 'lang_sw'}, - {'name': 'Swedish', 'value': 'lang_sv'}, - {'name': 'Thai', 'value': 'lang_th'}, - {'name': 'Turkish', 'value': 'lang_tr'}, - {'name': 'Ukrainian', 'value': 'lang_uk'}, - {'name': 'Vietnamese', 'value': 'lang_vi'}, - ] - - COUNTRIES = [ - {'name': 'Default (none)', 'value': ''}, - {'name': 'Afghanistan', 'value': 'countryAF'}, - {'name': 'Albania', 'value': 'countryAL'}, - {'name': 'Algeria', 'value': 'countryDZ'}, - {'name': 'American Samoa', 'value': 'countryAS'}, - {'name': 'Andorra', 'value': 'countryAD'}, - {'name': 'Angola', 'value': 'countryAO'}, - {'name': 'Anguilla', 'value': 'countryAI'}, - {'name': 'Antarctica', 'value': 'countryAQ'}, - {'name': 'Antigua and Barbuda', 'value': 'countryAG'}, - {'name': 'Argentina', 'value': 'countryAR'}, - {'name': 'Armenia', 'value': 'countryAM'}, - {'name': 'Aruba', 'value': 'countryAW'}, - {'name': 'Australia', 'value': 'countryAU'}, - {'name': 'Austria', 'value': 'countryAT'}, - {'name': 'Azerbaijan', 'value': 'countryAZ'}, - {'name': 'Bahamas', 'value': 'countryBS'}, - {'name': 'Bahrain', 'value': 'countryBH'}, - {'name': 'Bangladesh', 'value': 'countryBD'}, - {'name': 'Barbados', 'value': 'countryBB'}, - {'name': 'Belarus', 'value': 'countryBY'}, - {'name': 'Belgium', 'value': 'countryBE'}, - {'name': 'Belize', 'value': 'countryBZ'}, - {'name': 'Benin', 'value': 'countryBJ'}, - {'name': 'Bermuda', 'value': 'countryBM'}, - {'name': 'Bhutan', 'value': 'countryBT'}, - {'name': 'Bolivia', 'value': 'countryBO'}, - {'name': 'Bosnia and Herzegovina', 'value': 'countryBA'}, - {'name': 'Botswana', 'value': 'countryBW'}, - {'name': 'Bouvet Island', 'value': 'countryBV'}, - {'name': 'Brazil', 'value': 'countryBR'}, - {'name': 'British Indian Ocean Territory', 'value': 'countryIO'}, - {'name': 'Brunei Darussalam', 'value': 'countryBN'}, - {'name': 'Bulgaria', 'value': 'countryBG'}, - {'name': 'Burkina Faso', 'value': 'countryBF'}, - {'name': 'Burundi', 'value': 'countryBI'}, - {'name': 'Cambodia', 'value': 'countryKH'}, - {'name': 'Cameroon', 'value': 'countryCM'}, - {'name': 'Canada', 'value': 'countryCA'}, - {'name': 'Cape Verde', 'value': 'countryCV'}, - {'name': 'Cayman Islands', 'value': 'countryKY'}, - {'name': 'Central African Republic', 'value': 'countryCF'}, - {'name': 'Chad', 'value': 'countryTD'}, - {'name': 'Chile', 'value': 'countryCL'}, - {'name': 'China', 'value': 'countryCN'}, - {'name': 'Christmas Island', 'value': 'countryCX'}, - {'name': 'Cocos (Keeling) Islands', 'value': 'countryCC'}, - {'name': 'Colombia', 'value': 'countryCO'}, - {'name': 'Comoros', 'value': 'countryKM'}, - {'name': 'Congo', 'value': 'countryCG'}, - {'name': 'Congo, Democratic Republic of the', 'value': 'countryCD'}, - {'name': 'Cook Islands', 'value': 'countryCK'}, - {'name': 'Costa Rica', 'value': 'countryCR'}, - {'name': 'Cote D\'ivoire', 'value': 'countryCI'}, - {'name': 'Croatia (Hrvatska)', 'value': 'countryHR'}, - {'name': 'Cuba', 'value': 'countryCU'}, - {'name': 'Cyprus', 'value': 'countryCY'}, - {'name': 'Czech Republic', 'value': 'countryCZ'}, - {'name': 'Denmark', 'value': 'countryDK'}, - {'name': 'Djibouti', 'value': 'countryDJ'}, - {'name': 'Dominica', 'value': 'countryDM'}, - {'name': 'Dominican Republic', 'value': 'countryDO'}, - {'name': 'East Timor', 'value': 'countryTP'}, - {'name': 'Ecuador', 'value': 'countryEC'}, - {'name': 'Egypt', 'value': 'countryEG'}, - {'name': 'El Salvador', 'value': 'countrySV'}, - {'name': 'Equatorial Guinea', 'value': 'countryGQ'}, - {'name': 'Eritrea', 'value': 'countryER'}, - {'name': 'Estonia', 'value': 'countryEE'}, - {'name': 'Ethiopia', 'value': 'countryET'}, - {'name': 'European Union', 'value': 'countryEU'}, - {'name': 'Falkland Islands (Malvinas)', 'value': 'countryFK'}, - {'name': 'Faroe Islands', 'value': 'countryFO'}, - {'name': 'Fiji', 'value': 'countryFJ'}, - {'name': 'Finland', 'value': 'countryFI'}, - {'name': 'France', 'value': 'countryFR'}, - {'name': 'France\, Metropolitan', 'value': 'countryFX'}, - {'name': 'French Guiana', 'value': 'countryGF'}, - {'name': 'French Polynesia', 'value': 'countryPF'}, - {'name': 'French Southern Territories', 'value': 'countryTF'}, - {'name': 'Gabon', 'value': 'countryGA'}, - {'name': 'Gambia', 'value': 'countryGM'}, - {'name': 'Georgia', 'value': 'countryGE'}, - {'name': 'Germany', 'value': 'countryDE'}, - {'name': 'Ghana', 'value': 'countryGH'}, - {'name': 'Gibraltar', 'value': 'countryGI'}, - {'name': 'Greece', 'value': 'countryGR'}, - {'name': 'Greenland', 'value': 'countryGL'}, - {'name': 'Grenada', 'value': 'countryGD'}, - {'name': 'Guadeloupe', 'value': 'countryGP'}, - {'name': 'Guam', 'value': 'countryGU'}, - {'name': 'Guatemala', 'value': 'countryGT'}, - {'name': 'Guinea', 'value': 'countryGN'}, - {'name': 'Guinea-Bissau', 'value': 'countryGW'}, - {'name': 'Guyana', 'value': 'countryGY'}, - {'name': 'Haiti', 'value': 'countryHT'}, - {'name': 'Heard Island and Mcdonald Islands', 'value': 'countryHM'}, - {'name': 'Holy See (Vatican City State)', 'value': 'countryVA'}, - {'name': 'Honduras', 'value': 'countryHN'}, - {'name': 'Hong Kong', 'value': 'countryHK'}, - {'name': 'Hungary', 'value': 'countryHU'}, - {'name': 'Iceland', 'value': 'countryIS'}, - {'name': 'India', 'value': 'countryIN'}, - {'name': 'Indonesia', 'value': 'countryID'}, - {'name': 'Iran, Islamic Republic of', 'value': 'countryIR'}, - {'name': 'Iraq', 'value': 'countryIQ'}, - {'name': 'Ireland', 'value': 'countryIE'}, - {'name': 'Israel', 'value': 'countryIL'}, - {'name': 'Italy', 'value': 'countryIT'}, - {'name': 'Jamaica', 'value': 'countryJM'}, - {'name': 'Japan', 'value': 'countryJP'}, - {'name': 'Jordan', 'value': 'countryJO'}, - {'name': 'Kazakhstan', 'value': 'countryKZ'}, - {'name': 'Kenya', 'value': 'countryKE'}, - {'name': 'Kiribati', 'value': 'countryKI'}, - {'name': 'Korea, Democratic People\'s Republic of', 'value': 'countryKP'}, - {'name': 'Korea, Republic of', 'value': 'countryKR'}, - {'name': 'Kuwait', 'value': 'countryKW'}, - {'name': 'Kyrgyzstan', 'value': 'countryKG'}, - {'name': 'Lao People\'s Democratic Republic', 'value': 'countryLA'}, - {'name': 'Latvia', 'value': 'countryLV'}, - {'name': 'Lebanon', 'value': 'countryLB'}, - {'name': 'Lesotho', 'value': 'countryLS'}, - {'name': 'Liberia', 'value': 'countryLR'}, - {'name': 'Libyan Arab Jamahiriya', 'value': 'countryLY'}, - {'name': 'Liechtenstein', 'value': 'countryLI'}, - {'name': 'Lithuania', 'value': 'countryLT'}, - {'name': 'Luxembourg', 'value': 'countryLU'}, - {'name': 'Macao', 'value': 'countryMO'}, - {'name': 'Macedonia, the Former Yugosalv Republic of', 'value': 'countryMK'}, - {'name': 'Madagascar', 'value': 'countryMG'}, - {'name': 'Malawi', 'value': 'countryMW'}, - {'name': 'Malaysia', 'value': 'countryMY'}, - {'name': 'Maldives', 'value': 'countryMV'}, - {'name': 'Mali', 'value': 'countryML'}, - {'name': 'Malta', 'value': 'countryMT'}, - {'name': 'Marshall Islands', 'value': 'countryMH'}, - {'name': 'Martinique', 'value': 'countryMQ'}, - {'name': 'Mauritania', 'value': 'countryMR'}, - {'name': 'Mauritius', 'value': 'countryMU'}, - {'name': 'Mayotte', 'value': 'countryYT'}, - {'name': 'Mexico', 'value': 'countryMX'}, - {'name': 'Micronesia, Federated States of', 'value': 'countryFM'}, - {'name': 'Moldova, Republic of', 'value': 'countryMD'}, - {'name': 'Monaco', 'value': 'countryMC'}, - {'name': 'Mongolia', 'value': 'countryMN'}, - {'name': 'Montserrat', 'value': 'countryMS'}, - {'name': 'Morocco', 'value': 'countryMA'}, - {'name': 'Mozambique', 'value': 'countryMZ'}, - {'name': 'Myanmar', 'value': 'countryMM'}, - {'name': 'Namibia', 'value': 'countryNA'}, - {'name': 'Nauru', 'value': 'countryNR'}, - {'name': 'Nepal', 'value': 'countryNP'}, - {'name': 'Netherlands', 'value': 'countryNL'}, - {'name': 'Netherlands Antilles', 'value': 'countryAN'}, - {'name': 'New Caledonia', 'value': 'countryNC'}, - {'name': 'New Zealand', 'value': 'countryNZ'}, - {'name': 'Nicaragua', 'value': 'countryNI'}, - {'name': 'Niger', 'value': 'countryNE'}, - {'name': 'Nigeria', 'value': 'countryNG'}, - {'name': 'Niue', 'value': 'countryNU'}, - {'name': 'Norfolk Island', 'value': 'countryNF'}, - {'name': 'Northern Mariana Islands', 'value': 'countryMP'}, - {'name': 'Norway', 'value': 'countryNO'}, - {'name': 'Oman', 'value': 'countryOM'}, - {'name': 'Pakistan', 'value': 'countryPK'}, - {'name': 'Palau', 'value': 'countryPW'}, - {'name': 'Palestinian Territory', 'value': 'countryPS'}, - {'name': 'Panama', 'value': 'countryPA'}, - {'name': 'Papua New Guinea', 'value': 'countryPG'}, - {'name': 'Paraguay', 'value': 'countryPY'}, - {'name': 'Peru', 'value': 'countryPE'}, - {'name': 'Philippines', 'value': 'countryPH'}, - {'name': 'Pitcairn', 'value': 'countryPN'}, - {'name': 'Poland', 'value': 'countryPL'}, - {'name': 'Portugal', 'value': 'countryPT'}, - {'name': 'Puerto Rico', 'value': 'countryPR'}, - {'name': 'Qatar', 'value': 'countryQA'}, - {'name': 'Reunion', 'value': 'countryRE'}, - {'name': 'Romania', 'value': 'countryRO'}, - {'name': 'Russian Federation', 'value': 'countryRU'}, - {'name': 'Rwanda', 'value': 'countryRW'}, - {'name': 'Saint Helena', 'value': 'countrySH'}, - {'name': 'Saint Kitts and Nevis', 'value': 'countryKN'}, - {'name': 'Saint Lucia', 'value': 'countryLC'}, - {'name': 'Saint Pierre and Miquelon', 'value': 'countryPM'}, - {'name': 'Saint Vincent and the Grenadines', 'value': 'countryVC'}, - {'name': 'Samoa', 'value': 'countryWS'}, - {'name': 'San Marino', 'value': 'countrySM'}, - {'name': 'Sao Tome and Principe', 'value': 'countryST'}, - {'name': 'Saudi Arabia', 'value': 'countrySA'}, - {'name': 'Senegal', 'value': 'countrySN'}, - {'name': 'Serbia and Montenegro', 'value': 'countryCS'}, - {'name': 'Seychelles', 'value': 'countrySC'}, - {'name': 'Sierra Leone', 'value': 'countrySL'}, - {'name': 'Singapore', 'value': 'countrySG'}, - {'name': 'Slovakia', 'value': 'countrySK'}, - {'name': 'Slovenia', 'value': 'countrySI'}, - {'name': 'Solomon Islands', 'value': 'countrySB'}, - {'name': 'Somalia', 'value': 'countrySO'}, - {'name': 'South Africa', 'value': 'countryZA'}, - {'name': 'South Georgia and the South Sandwich Islands', 'value': 'countryGS'}, - {'name': 'Spain', 'value': 'countryES'}, - {'name': 'Sri Lanka', 'value': 'countryLK'}, - {'name': 'Sudan', 'value': 'countrySD'}, - {'name': 'Suriname', 'value': 'countrySR'}, - {'name': 'Svalbard and Jan Mayen', 'value': 'countrySJ'}, - {'name': 'Swaziland', 'value': 'countrySZ'}, - {'name': 'Sweden', 'value': 'countrySE'}, - {'name': 'Switzerland', 'value': 'countryCH'}, - {'name': 'Syrian Arab Republic', 'value': 'countrySY'}, - {'name': 'Taiwan, Province of China', 'value': 'countryTW'}, - {'name': 'Tajikistan', 'value': 'countryTJ'}, - {'name': 'Tanzania, United Republic of', 'value': 'countryTZ'}, - {'name': 'Thailand', 'value': 'countryTH'}, - {'name': 'Togo', 'value': 'countryTG'}, - {'name': 'Tokelau', 'value': 'countryTK'}, - {'name': 'Tonga', 'value': 'countryTO'}, - {'name': 'Trinidad and Tobago', 'value': 'countryTT'}, - {'name': 'Tunisia', 'value': 'countryTN'}, - {'name': 'Turkey', 'value': 'countryTR'}, - {'name': 'Turkmenistan', 'value': 'countryTM'}, - {'name': 'Turks and Caicos Islands', 'value': 'countryTC'}, - {'name': 'Tuvalu', 'value': 'countryTV'}, - {'name': 'Uganda', 'value': 'countryUG'}, - {'name': 'Ukraine', 'value': 'countryUA'}, - {'name': 'United Arab Emirates', 'value': 'countryAE'}, - {'name': 'United Kingdom', 'value': 'countryUK'}, - {'name': 'United States', 'value': 'countryUS'}, - {'name': 'United States Minor Outlying Islands', 'value': 'countryUM'}, - {'name': 'Uruguay', 'value': 'countryUY'}, - {'name': 'Uzbekistan', 'value': 'countryUZ'}, - {'name': 'Vanuatu', 'value': 'countryVU'}, - {'name': 'Venezuela', 'value': 'countryVE'}, - {'name': 'Vietnam', 'value': 'countryVN'}, - {'name': 'Virgin Islands, British', 'value': 'countryVG'}, - {'name': 'Virgin Islands, U.S.', 'value': 'countryVI'}, - {'name': 'Wallis and Futuna', 'value': 'countryWF'}, - {'name': 'Western Sahara', 'value': 'countryEH'}, - {'name': 'Yemen', 'value': 'countryYE'}, - {'name': 'Yugoslavia', 'value': 'countryYU'}, - {'name': 'Zambia', 'value': 'countryZM'}, - {'name': 'Zimbabwe', 'value': 'countryZW'} - ] - def __init__(self, **kwargs): self.url = '' self.lang_search = '' @@ -305,10 +7,17 @@ class Config: self.safe = False self.dark = False self.nojs = False + self.tor = False self.near = '' self.alts = False self.new_tab = False self.get_only = False + self.safe_keys = [ + 'lang_search', + 'lang_interface', + 'ctry', + 'dark' + ] for key, value in kwargs.items(): setattr(self, key, value) @@ -324,3 +33,34 @@ class Config: def __contains__(self, name): return hasattr(self, name) + + def is_safe_key(self, key) -> bool: + """Establishes a group of config options that are safe to set + in the url. + + Args: + key (str) -- the key to check against + + Returns: + bool -- True/False depending on if the key is in the "safe" + array + """ + + return key in self.safe_keys + + def from_params(self, params) -> 'Config': + """Modify user config with search parameters. This is primarily + used for specifying configuration on a search-by-search basis on + public instances. + + Args: + params -- the url arguments (can be any deemed safe by is_safe()) + + Returns: + Config -- a modified config object + """ + for param_key in params.keys(): + if not self.is_safe_key(param_key): + continue + self[param_key] = params.get(param_key) + return self diff --git a/app/request.py b/app/request.py index 4abb9b3..00e2ce0 100644 --- a/app/request.py +++ b/app/request.py @@ -1,12 +1,16 @@ -from lxml import etree +from app.models.config import Config +import xml.etree.ElementTree as ET import random import requests -from requests import Response +from requests import Response, ConnectionError import urllib.parse as urlparse +import os +from stem import Signal, SocketError +from stem.control import Controller -# Core Google search URLs SEARCH_URL = 'https://www.google.com/search?gbv=1&q=' -AUTOCOMPLETE_URL = 'https://suggestqueries.google.com/complete/search?client=toolbar&' +AUTOCOMPLETE_URL = ('https://suggestqueries.google.com/' + 'complete/search?client=toolbar&') MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0' DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' @@ -15,7 +19,36 @@ DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr'] -def gen_user_agent(is_mobile): +class TorError(Exception): + """Exception raised for errors in Tor requests. + + Attributes: + message -- a message describing the error that occurred + disable -- optionally disables Tor in the user config (note: + this should only happen if the connection has been dropped + altogether). + """ + + def __init__(self, message, disable=False): + self.message = message + self.disable = disable + super().__init__(self.message) + + +def send_tor_signal(signal: Signal) -> bool: + try: + with Controller.from_port(port=9051) as c: + c.authenticate() + c.signal(signal) + os.environ['TOR_AVAILABLE'] = '1' + return True + except (SocketError, ConnectionRefusedError, ConnectionError): + os.environ['TOR_AVAILABLE'] = '0' + + return False + + +def gen_user_agent(is_mobile) -> str: mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla' firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox' linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux' @@ -26,7 +59,7 @@ def gen_user_agent(is_mobile): return DESKTOP_UA.format(mozilla, linux, firefox) -def gen_query(query, args, config, near_city=None): +def gen_query(query, args, config, near_city=None) -> str: param_dict = {key: '' for key in VALID_PARAMS} # Use :past(hour/day/week/month/year) if available @@ -39,11 +72,16 @@ def gen_query(query, args, config, near_city=None): result_tbs = args.get('tbs') param_dict['tbs'] = '&tbs=' + result_tbs - # Occasionally the 'tbs' param provided by google also contains a field for 'lr', but formatted - # strangely. This is a (admittedly not very elegant) solution for this. - # Ex/ &tbs=qdr:h,lr:lang_1pl --> the lr param needs to be extracted and have the "1" digit removed in this case + # Occasionally the 'tbs' param provided by google also contains a + # field for 'lr', but formatted strangely. This is a rough solution + # for this. + # + # Example: + # &tbs=qdr:h,lr:lang_1pl + # -- the lr param needs to be extracted and remove the leading '1' sub_lang = [_ for _ in result_tbs.split(',') if 'lr:' in _] - sub_lang = sub_lang[0][sub_lang[0].find('lr:') + 3:len(sub_lang[0])] if len(sub_lang) > 0 else '' + sub_lang = sub_lang[0][sub_lang[0].find('lr:') + + 3:len(sub_lang[0])] if len(sub_lang) > 0 else '' # Ensure search query is parsable query = urlparse.quote(query) @@ -60,20 +98,26 @@ def gen_query(query, args, config, near_city=None): if near_city: param_dict['near'] = '&near=' + urlparse.quote(near_city) - # Set language for results (lr) if source isn't set, otherwise use the result - # language param provided by google (but with the strange digit(s) removed) + # Set language for results (lr) if source isn't set, otherwise use the + # result language param provided in the results if 'source' in args: param_dict['source'] = '&source=' + args.get('source') - param_dict['lr'] = ('&lr=' + ''.join([_ for _ in sub_lang if not _.isdigit()])) if sub_lang else '' + param_dict['lr'] = ('&lr=' + ''.join( + [_ for _ in sub_lang if not _.isdigit()] + )) if sub_lang else '' else: - param_dict['lr'] = ('&lr=' + config.lang_search) if config.lang_search else '' + param_dict['lr'] = ( + '&lr=' + config.lang_search + ) if config.lang_search else '' - # Set autocorrected search ignore + # 'nfpr' defines the exclusion of results from an auto-corrected query if 'nfpr' in args: param_dict['nfpr'] = '&nfpr=' + args.get('nfpr') param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else '' - param_dict['hl'] = ('&hl=' + config.lang_interface.replace('lang_', '')) if config.lang_interface else '' + param_dict['hl'] = ( + '&hl=' + config.lang_interface.replace('lang_', '') + ) if config.lang_interface else '' param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off') for val in param_dict.values(): @@ -85,27 +129,117 @@ def gen_query(query, args, config, near_city=None): class Request: - def __init__(self, normal_ua, language='lang_en'): - self.language = language + """Class used for handling all outbound requests, including search queries, + search suggestions, and loading of external content (images, audio, etc). + + Attributes: + normal_ua -- the user's current user agent + root_path -- the root path of the whoogle instance + config -- the user's current whoogle configuration + """ + + def __init__(self, normal_ua, root_path, config: Config): + # Send heartbeat to Tor, used in determining if the user can or cannot + # enable Tor for future requests + send_tor_signal(Signal.HEARTBEAT) + + self.language = config.lang_search self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua self.modified_user_agent = gen_user_agent(self.mobile) + # Set up proxy, if previously configured + if os.environ.get('WHOOGLE_PROXY_LOC'): + auth_str = '' + if os.environ.get('WHOOGLE_PROXY_USER'): + auth_str = os.environ.get('WHOOGLE_PROXY_USER') + \ + ':' + os.environ.get('WHOOGLE_PROXY_PASS') + self.proxies = { + 'http': os.environ.get('WHOOGLE_PROXY_TYPE') + '://' + + auth_str + '@' + os.environ.get('WHOOGLE_PROXY_LOC'), + } + self.proxies['https'] = self.proxies['http'].replace('http', + 'https') + else: + self.proxies = { + 'http': 'socks5://127.0.0.1:9050', + 'https': 'socks5://127.0.0.1:9050' + } if config.tor else {} + self.tor = config.tor + self.tor_valid = False + self.root_path = root_path + def __getitem__(self, name): return getattr(self, name) - def autocomplete(self, query): + def autocomplete(self, query) -> list: + """Sends a query to Google's search suggestion service + + Args: + query: The in-progress query to send + + Returns: + list: The list of matches for possible search suggestions + + """ ac_query = dict(hl=self.language, q=query) - response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text + response = self.send(base_url=AUTOCOMPLETE_URL, + query=urlparse.urlencode(ac_query)).text - if response: - dom = etree.fromstring(response) - return dom.xpath('//suggestion/@data') + if not response: + return [] - return [] + root = ET.fromstring(response) + return [_.attrib['data'] for _ in + root.findall('.//suggestion/[@data]')] - def send(self, base_url=SEARCH_URL, query='') -> Response: + def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response: + """Sends an outbound request to a URL. Optionally sends the request + using Tor, if enabled by the user. + + Args: + base_url: The URL to use in the request + query: The optional query string for the request + attempt: The number of attempts made for the request + (used for cycling through Tor identities, if enabled) + + Returns: + Response: The Response object returned by the requests call + + """ headers = { 'User-Agent': self.modified_user_agent } - return requests.get(base_url + query, headers=headers) + # Validate Tor conn and request new identity if the last one failed + if self.tor and not send_tor_signal( + Signal.NEWNYM if attempt > 0 else Signal.HEARTBEAT): + raise TorError( + "Tor was previously enabled, but the connection has been " + "dropped. Please check your Tor configuration and try again.", + disable=True) + + # Make sure that the tor connection is valid, if enabled + if self.tor: + tor_check = requests.get('https://check.torproject.org/', + proxies=self.proxies, headers=headers) + self.tor_valid = 'Congratulations' in tor_check.text + + if not self.tor_valid: + raise TorError( + "Tor connection succeeded, but the connection could not " + "be validated by torproject.org", + disable=True) + + response = requests.get( + base_url + query, + proxies=self.proxies, + headers=headers) + + # Retry query with new identity if using Tor (max 10 attempts) + if 'form id="captcha-form"' in response.text and self.tor: + attempt += 1 + if attempt > 10: + raise TorError("Tor query failed -- max attempts exceeded 10") + return self.send(base_url, query, attempt) + + return response diff --git a/app/routes.py b/app/routes.py index 56bc6de..d2d446c 100644 --- a/app/routes.py +++ b/app/routes.py @@ -9,15 +9,19 @@ import uuid from functools import wraps import waitress -from flask import jsonify, make_response, request, redirect, render_template, send_file, session +from flask import jsonify, make_response, request, redirect, render_template, \ + send_file, session, url_for from requests import exceptions from app import app from app.models.config import Config -from app.request import Request +from app.request import Request, TorError from app.utils.session_utils import valid_user_session from app.utils.routing_utils import * +# Load DDG bang json files only on init +bang_json = json.load(open(app.config['BANG_FILE'])) + def auth_required(f): @wraps(f) @@ -27,23 +31,30 @@ def auth_required(f): # Skip if username/password not set whoogle_user = os.getenv('WHOOGLE_USER', '') whoogle_pass = os.getenv('WHOOGLE_PASS', '') - if (not whoogle_user or not whoogle_pass) or \ - (auth and whoogle_user == auth.username and whoogle_pass == auth.password): + if (not whoogle_user or not whoogle_pass) or ( + auth + and whoogle_user == auth.username + and whoogle_pass == auth.password): return f(*args, **kwargs) else: - return make_response('Not logged in', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'}) + return make_response('Not logged in', 401, { + 'WWW-Authenticate': 'Basic realm="Login Required"'}) + return decorated @app.before_request def before_request_func(): - g.request_params = request.args if request.method == 'GET' else request.form + g.request_params = ( + request.args if request.method == 'GET' else request.form + ) g.cookies_disabled = False # Generate session values for user if unavailable if not valid_user_session(session): session['config'] = json.load(open(app.config['DEFAULT_CONFIG'])) \ - if os.path.exists(app.config['DEFAULT_CONFIG']) else {'url': request.url_root} + if os.path.exists(app.config['DEFAULT_CONFIG']) else { + 'url': request.url_root} session['uuid'] = str(uuid.uuid4()) session['fernet_keys'] = generate_user_keys(True) @@ -54,18 +65,28 @@ def before_request_func(): if session['uuid'] not in app.user_elements: app.user_elements.update({session['uuid']: 0}) - # Always redirect to https if HTTPS_ONLY is set (otherwise default to False) + # Handle https upgrade https_only = os.getenv('HTTPS_ONLY', False) + is_heroku = request.url.endswith('.herokuapp.com') + is_http = request.url.startswith('http://') + + if (is_heroku and is_http) or (https_only and is_http): + return redirect( + request.url.replace('http://', 'https://', 1), + code=308) - if https_only and request.url.startswith('http://'): - return redirect(request.url.replace('http://', 'https://', 1), code=308) - g.user_config = Config(**session['config']) if not g.user_config.url: - g.user_config.url = request.url_root.replace('http://', 'https://') if https_only else request.url_root + g.user_config.url = request.url_root.replace( + 'http://', + 'https://') if https_only else request.url_root + + g.user_request = Request( + request.headers.get('User-Agent'), + request.url_root, + config=g.user_config) - g.user_request = Request(request.headers.get('User-Agent'), language=g.user_config.lang_search) g.app_location = g.user_config.url @@ -73,13 +94,17 @@ def before_request_func(): def after_request_func(response): if app.user_elements[session['uuid']] <= 0 and '/element' in request.url: # Regenerate element key if all elements have been served to user - session['fernet_keys']['element_key'] = '' if not g.cookies_disabled else app.default_key_set['element_key'] + session['fernet_keys'][ + 'element_key'] = '' if not g.cookies_disabled else \ + app.default_key_set['element_key'] app.user_elements[session['uuid']] = 0 - # Check if address consistently has cookies blocked, in which case start removing session - # files after creation. - # Note: This is primarily done to prevent overpopulation of session directories, since browsers that - # block cookies will still trigger Flask's session creation routine with every request. + # Check if address consistently has cookies blocked, + # in which case start removing session files after creation. + # + # Note: This is primarily done to prevent overpopulation of session + # directories, since browsers that block cookies will still trigger + # Flask's session creation routine with every request. if g.cookies_disabled and request.remote_addr not in app.no_cookie_ips: app.no_cookie_ips.append(request.remote_addr) elif g.cookies_disabled and request.remote_addr in app.no_cookie_ips: @@ -92,6 +117,7 @@ def after_request_func(response): @app.errorhandler(404) def unknown_page(e): + app.logger.warn(e) return redirect(g.app_location) @@ -100,11 +126,16 @@ def unknown_page(e): def index(): # Reset keys session['fernet_keys'] = generate_user_keys(g.cookies_disabled) + error_message = session[ + 'error_message'] if 'error_message' in session else '' + session['error_message'] = '' return render_template('index.html', - languages=Config.LANGUAGES, - countries=Config.COUNTRIES, + languages=app.config['LANGUAGES'], + countries=app.config['COUNTRIES'], config=g.user_config, + error_message=error_message, + tor_available=int(os.environ.get('TOR_AVAILABLE')), version_number=app.config['VERSION_NUMBER']) @@ -115,23 +146,43 @@ def opensearch(): if opensearch_url.endswith('/'): opensearch_url = opensearch_url[:-1] + get_only = g.user_config.get_only or 'Chrome' in request.headers.get( + 'User-Agent') + return render_template( 'opensearch.xml', main_url=opensearch_url, - request_type='' if g.user_config.get_only else 'method="post"' + request_type='' if get_only else 'method="post"' ), 200, {'Content-Disposition': 'attachment; filename="opensearch.xml"'} @app.route('/autocomplete', methods=['GET', 'POST']) def autocomplete(): q = g.request_params.get('q') + if not q: + # FF will occasionally (incorrectly) send the q field without a + # mimetype in the format "b'q='" through the request.data field + q = str(request.data).replace('q=', '') + + # Search bangs if the query begins with "!", but not "! " (feeling lucky) + if q.startswith('!') and len(q) > 1 and not q.startswith('! '): + return jsonify([q, [bang_json[_]['suggestion'] for _ in bang_json if + _.startswith(q)]]) if not q and not request.data: return jsonify({'?': []}) elif request.data: - q = urlparse.unquote_plus(request.data.decode('utf-8').replace('q=', '')) + q = urlparse.unquote_plus( + request.data.decode('utf-8').replace('q=', '')) - return jsonify([q, g.user_request.autocomplete(q)]) + # Return a list of suggestions for the query + # + # Note: If Tor is enabled, this returns nothing, as the request is + # almost always rejected + return jsonify([ + q, + g.user_request.autocomplete(q) if not g.user_config.tor else [] + ]) @app.route('/search', methods=['GET', 'POST']) @@ -140,19 +191,36 @@ def search(): # Reset element counter app.user_elements[session['uuid']] = 0 - search_util = RoutingUtils(request, g.user_config, session, cookies_disabled=g.cookies_disabled) + # Update user config if specified in search args + g.user_config = g.user_config.from_params(g.request_params) + + search_util = RoutingUtils(request, g.user_config, session, + cookies_disabled=g.cookies_disabled) query = search_util.new_search_query() + resolved_bangs = search_util.bang_operator(bang_json) + if resolved_bangs != '': + return redirect(resolved_bangs) + # Redirect to home if invalid/blank search if not query: return redirect('/') # Generate response and number of external elements from the page - response, elements = search_util.generate_response() - if search_util.feeling_lucky: + try: + response, elements = search_util.generate_response() + except TorError as e: + session['error_message'] = e.message + ( + "\\n\\nTor config is now disabled!" if e.disable else "") + session['config']['tor'] = False if e.disable else session['config'][ + 'tor'] + return redirect(url_for('.index')) + + if search_util.feeling_lucky or elements < 0: return redirect(response, code=303) - # Keep count of external elements to fetch before element key can be regenerated + # Keep count of external elements to fetch before + # the element key can be regenerated app.user_elements[session['uuid']] = elements return render_template( @@ -162,12 +230,13 @@ def search(): dark_mode=g.user_config.dark, response=response, version_number=app.config['VERSION_NUMBER'], - search_header=render_template( + search_header=(render_template( 'header.html', dark_mode=g.user_config.dark, query=urlparse.unquote(query), search_type=search_util.search_type, - mobile=g.user_request.mobile) if 'isch' not in search_util.search_type else '') + mobile=g.user_request.mobile) + if 'isch' not in search_util.search_type else '')) @app.route('/config', methods=['GET', 'POST', 'PUT']) @@ -177,8 +246,12 @@ def config(): return json.dumps(g.user_config.__dict__) elif request.method == 'PUT': if 'name' in request.args: - config_pkl = os.path.join(app.config['CONFIG_PATH'], request.args.get('name')) - session['config'] = pickle.load(open(config_pkl, 'rb')) if os.path.exists(config_pkl) else session['config'] + config_pkl = os.path.join( + app.config['CONFIG_PATH'], + request.args.get('name')) + session['config'] = (pickle.load(open(config_pkl, 'rb')) + if os.path.exists(config_pkl) + else session['config']) return json.dumps(session['config']) else: return json.dumps({}) @@ -189,11 +262,16 @@ def config(): # Save config by name to allow a user to easily load later if 'name' in request.args: - pickle.dump(config_data, open(os.path.join(app.config['CONFIG_PATH'], request.args.get('name')), 'wb')) + pickle.dump( + config_data, + open(os.path.join( + app.config['CONFIG_PATH'], + request.args.get('name')), 'wb')) # Overwrite default config if user has cookies disabled if g.cookies_disabled: - open(app.config['DEFAULT_CONFIG'], 'w').write(json.dumps(config_data, indent=4)) + open(app.config['DEFAULT_CONFIG'], 'w').write( + json.dumps(config_data, indent=4)) session['config'] = config_data return redirect(config_data['url']) @@ -236,7 +314,8 @@ def element(): except exceptions.RequestException: pass - empty_gif = base64.b64decode('R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==') + empty_gif = base64.b64decode( + 'R0lGODlhAQABAIAAAP///////yH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==') return send_file(io.BytesIO(empty_gif), mimetype='image/gif') @@ -244,32 +323,62 @@ def element(): @auth_required def window(): get_body = g.user_request.send(base_url=request.args.get('location')).text - get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"') - get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"') + get_body = get_body.replace('src="/', + 'src="' + request.args.get('location') + '"') + get_body = get_body.replace('href="/', + 'href="' + request.args.get('location') + '"') - results = BeautifulSoup(get_body, 'html.parser') + results = bsoup(get_body, 'html.parser') - try: - for script in results('script'): - script.decompose() - except Exception: - pass + for script in results('script'): + script.decompose() return render_template('display.html', response=results) def run_app(): - parser = argparse.ArgumentParser(description='Whoogle Search console runner') - parser.add_argument('--port', default=5000, metavar='', - help='Specifies a port to run on (default 5000)') - parser.add_argument('--host', default='127.0.0.1', metavar='', - help='Specifies the host address to use (default 127.0.0.1)') - parser.add_argument('--debug', default=False, action='store_true', - help='Activates debug mode for the server (default False)') - parser.add_argument('--https-only', default=False, action='store_true', - help='Enforces HTTPS redirects for all requests') - parser.add_argument('--userpass', default='', metavar='', - help='Sets a username/password basic auth combo (default None)') + parser = argparse.ArgumentParser( + description='Whoogle Search console runner') + parser.add_argument( + '--port', + default=5000, + metavar='', + help='Specifies a port to run on (default 5000)') + parser.add_argument( + '--host', + default='127.0.0.1', + metavar='', + help='Specifies the host address to use (default 127.0.0.1)') + parser.add_argument( + '--debug', + default=False, + action='store_true', + help='Activates debug mode for the server (default False)') + parser.add_argument( + '--https-only', + default=False, + action='store_true', + help='Enforces HTTPS redirects for all requests') + parser.add_argument( + '--userpass', + default='', + metavar='', + help='Sets a username/password basic auth combo (default None)') + parser.add_argument( + '--proxyauth', + default='', + metavar='', + help='Sets a username/password for a HTTP/SOCKS proxy (default None)') + parser.add_argument( + '--proxytype', + default='', + metavar='', + help='Sets a proxy type for all connections (default None)') + parser.add_argument( + '--proxyloc', + default='', + metavar='', + help='Sets a proxy location for all connections (default None)') args = parser.parse_args() if args.userpass: @@ -277,6 +386,14 @@ def run_app(): os.environ['WHOOGLE_USER'] = user_pass[0] os.environ['WHOOGLE_PASS'] = user_pass[1] + if args.proxytype and args.proxyloc: + if args.proxyauth: + proxy_user_pass = args.proxyauth.split(':') + os.environ['WHOOGLE_PROXY_USER'] = proxy_user_pass[0] + os.environ['WHOOGLE_PROXY_PASS'] = proxy_user_pass[1] + os.environ['WHOOGLE_PROXY_TYPE'] = args.proxytype + os.environ['WHOOGLE_PROXY_LOC'] = args.proxyloc + os.environ['HTTPS_ONLY'] = '1' if args.https_only else '' if args.debug: diff --git a/app/static/css/dark-theme.css b/app/static/css/dark-theme.css index 36cfada..4bd6af1 100644 --- a/app/static/css/dark-theme.css +++ b/app/static/css/dark-theme.css @@ -1,5 +1,5 @@ html { - background-color: #000 !important; + background-color: #222 !important; } body { @@ -7,7 +7,14 @@ body { } div { - /*background-color: #111 !important;*/ + color: #fff !important; +} + +li a { + color: #4b8eaa !important; +} + +li { color: #fff !important; } @@ -34,9 +41,17 @@ input { #search-bar { color: #fff !important; - background-color: #000 !important; + background-color: #222 !important; } .search-container { - background-color: #000 !important; + background-color: #222 !important; +} + +.ZINbbc{ + background-color: #1a1a1a !important; +} + +.bRsWnc{ + background-color: #1a1a1a !important; } diff --git a/app/static/css/header.css b/app/static/css/header.css index c3eebf8..3b24fdc 100644 --- a/app/static/css/header.css +++ b/app/static/css/header.css @@ -52,4 +52,11 @@ header { width: 100%; -webkit-tap-highlight-color: rgba(0,0,0,0); overflow: hidden; -} \ No newline at end of file +} + +.tracking-link { + font-size: large; + text-align: center; + margin: 15px; + display: block; +} diff --git a/app/static/css/search-dark.css b/app/static/css/search-dark.css index 2ac5b47..efd923e 100644 --- a/app/static/css/search-dark.css +++ b/app/static/css/search-dark.css @@ -21,7 +21,7 @@ padding: 10px; cursor: pointer; color: #fff; - background-color: #000; + background-color: #222; border-bottom: 1px solid #242424; } @@ -32,4 +32,9 @@ .autocomplete-active { background-color: #685e79 !important; color: #ffffff; -} \ No newline at end of file +} + +details summary { + padding: 10px; + font-weight: bold; +} diff --git a/app/static/css/search.css b/app/static/css/search.css index a79522b..155cfcf 100644 --- a/app/static/css/search.css +++ b/app/static/css/search.css @@ -31,4 +31,9 @@ .autocomplete-active { background-color: #685e79 !important; color: #ffffff; -} \ No newline at end of file +} + +details summary { + padding: 10px; + font-weight: bold; +} diff --git a/app/static/js/autocomplete.js b/app/static/js/autocomplete.js index 3d179ca..702ebc4 100644 --- a/app/static/js/autocomplete.js +++ b/app/static/js/autocomplete.js @@ -1,6 +1,6 @@ const handleUserInput = searchBar => { let xhrRequest = new XMLHttpRequest(); - xhrRequest.open("POST", "/autocomplete"); + xhrRequest.open("POST", "autocomplete"); xhrRequest.setRequestHeader("Content-type", "application/x-www-form-urlencoded"); xhrRequest.onload = function () { if (xhrRequest.readyState === 4 && xhrRequest.status !== 200) { @@ -93,8 +93,14 @@ const autocomplete = (searchInput, autocompleteResults) => { removeActive(suggestion); suggestion[currentFocus].classList.add("autocomplete-active"); - // Autofill search bar with suggestion content - searchBar.value = suggestion[currentFocus].textContent; + // Autofill search bar with suggestion content (minus the "bang name" if using a bang operator) + let searchContent = suggestion[currentFocus].textContent; + if (searchContent.indexOf('(') > 0) { + searchBar.value = searchContent.substring(0, searchContent.indexOf('(')); + } else { + searchBar.value = searchContent; + } + searchBar.focus(); }; @@ -117,4 +123,4 @@ const autocomplete = (searchInput, autocompleteResults) => { document.addEventListener("click", function (e) { closeAllLists(e.target); }); -}; \ No newline at end of file +}; diff --git a/app/static/js/controller.js b/app/static/js/controller.js index 156a84d..3ab8ca7 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -1,6 +1,6 @@ // Whoogle configurations that use boolean values and checkboxes CONFIG_BOOLS = [ - "nojs", "dark", "safe", "alts", "new_tab", "get_only" + "nojs", "dark", "safe", "alts", "new_tab", "get_only", "tor" ]; // Whoogle configurations that use string values and input fields @@ -31,7 +31,7 @@ const setupSearchLayout = () => { const fillConfigValues = () => { // Request existing config info let xhrGET = new XMLHttpRequest(); - xhrGET.open("GET", "/config"); + xhrGET.open("GET", "config"); xhrGET.onload = function() { if (xhrGET.readyState === 4 && xhrGET.status !== 200) { alert("Error loading Whoogle config"); @@ -82,7 +82,7 @@ const loadConfig = event => { } let xhrPUT = new XMLHttpRequest(); - xhrPUT.open("PUT", "/config?name=" + config + ".conf"); + xhrPUT.open("PUT", "config?name=" + config + ".conf"); xhrPUT.onload = function() { if (xhrPUT.readyState === 4 && xhrPUT.status !== 200) { alert("Error loading Whoogle config"); @@ -104,7 +104,7 @@ const saveConfig = event => { } let configForm = document.getElementById("config-form"); - configForm.action = '/config?name=' + config + ".conf"; + configForm.action = 'config?name=' + config + ".conf"; configForm.submit(); }; diff --git a/app/static/js/utils.js b/app/static/js/utils.js new file mode 100644 index 0000000..775e072 --- /dev/null +++ b/app/static/js/utils.js @@ -0,0 +1,60 @@ +const checkForTracking = () => { + const mainDiv = document.getElementById("main"); + const query = document.getElementById("search-bar").value.replace(/\s+/g, ''); + + // Note: regex functions for checking for tracking queries were derived + // from here -- https://stackoverflow.com/questions/619977 + const matchTracking = { + "ups": { + "link": `https://www.ups.com/track?tracknum=${query}`, + "expr": [ + /\b(1Z ?[0-9A-Z]{3} ?[0-9A-Z]{3} ?[0-9A-Z]{2} ?[0-9A-Z]{4} ?[0-9A-Z]{3} ?[0-9A-Z]|[\dT]\d\d\d ?\d\d\d\d ?\d\d\d)\b/ + ] + }, + "usps": { + "link": `https://tools.usps.com/go/TrackConfirmAction?tLabels=${query}`, + "expr": [ + /(\b\d{30}\b)|(\b91\d+\b)|(\b\d{20}\b)/, + /^E\D{1}\d{9}\D{2}$|^9\d{15,21}$/, + /^91[0-9]+$/, + /^[A-Za-z]{2}[0-9]+US$/ + ] + }, + "fedex": { + "link": `https://www.fedex.com/apps/fedextrack/?tracknumbers=${query}`, + "expr": [ + /(\b96\d{20}\b)|(\b\d{15}\b)|(\b\d{12}\b)/, + /\b((98\d\d\d\d\d?\d\d\d\d|98\d\d) ?\d\d\d\d ?\d\d\d\d( ?\d\d\d)?)\b/, + /^[0-9]{15}$/ + ] + } + } + + // Creates a link to a UPS/USPS/FedEx tracking page + const createTrackingLink = href => { + let link = document.createElement("a"); + link.className = "tracking-link"; + link.innerHTML = "View Tracking Info"; + link.href = href; + mainDiv.prepend(link); + } + + // Compares the query against a set of regex patterns + // for tracking numbers + const compareQuery = provider => { + provider.expr.some(regex => { + if (query.match(regex)) { + createTrackingLink(provider.link); + return true; + } + }); + } + + for (const key of Object.keys(matchTracking)) { + compareQuery(matchTracking[key]); + } +} + +document.addEventListener("DOMContentLoaded", function() { + checkForTracking(); +}); diff --git a/app/templates/display.html b/app/templates/display.html index 6a8a609..f1dc572 100644 --- a/app/templates/display.html +++ b/app/templates/display.html @@ -1,15 +1,16 @@ - - - + + + - - - + + + + {% if dark_mode %} - + {% endif %} {{ query }} - Whoogle Search diff --git a/app/templates/error.html b/app/templates/error.html index 003623d..9546e23 100644 --- a/app/templates/error.html +++ b/app/templates/error.html @@ -3,3 +3,4 @@

Error parsing "{{ query }}"

+Return Home diff --git a/app/templates/header.html b/app/templates/header.html index 113cfa4..7ae980f 100644 --- a/app/templates/header.html +++ b/app/templates/header.html @@ -15,6 +15,7 @@ border: {{ '1px solid #685e79' if dark_mode else '' }}" spellcheck="false" type="text" value="{{ query }}"> +
@@ -38,6 +39,7 @@ color: {{ '#685e79' if dark_mode else '#000' }}; border: {{ '1px solid #685e79' if dark_mode else '' }}"> +
@@ -56,4 +58,4 @@ document.getElementById("search-form").submit(); } }); - \ No newline at end of file + diff --git a/app/templates/index.html b/app/templates/index.html index 8d50f76..766fab1 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -1,30 +1,30 @@ - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - - - + + + + - - + + {% if config.dark %} - + {% endif %}