Merge remote-tracking branch 'origin/main' into feature/public-instance-sessions

This commit is contained in:
Ben Busby 2021-10-26 21:49:46 -06:00
commit 548dc418bf
No known key found for this signature in database
GPG Key ID: 339B7B7EB5333D14
9 changed files with 70 additions and 20 deletions

View File

@ -1,4 +1,4 @@
![Whoogle Search](https://raw.githubusercontent.com/benbusby/whoogle-search/main/docs/banner.png)
![Whoogle Search](docs/banner.png)
[![Latest Release](https://img.shields.io/github/v/release/benbusby/whoogle-search)](https://github.com/benbusby/shoogle/releases)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@ -321,6 +321,7 @@ There are a few optional environment variables available for customizing a Whoog
| WHOOGLE_ALT_TL | The Google Translate alternative to use. This is used for all "translate ____" searches. |
| WHOOGLE_ALT_MD | The medium.com alternative to use when site alternatives are enabled in the config. |
| WHOOGLE_AUTOCOMPLETE | Controls visibility of autocomplete/search suggestions. Default on -- use '0' to disable |
| WHOOGLE_MINIMAL | Remove everything except basic result cards from all search queries. |
### Config Environment Variables
These environment variables allow setting default config values, but can be overwritten manually by using the home page config menu. These allow a shortcut for destroying/rebuilding an instance to the same config state every time.
@ -505,7 +506,7 @@ A lot of the app currently piggybacks on Google's existing support for fetching
## Screenshots
#### Desktop
![Whoogle Desktop](https://raw.githubusercontent.com/benbusby/whoogle-search/main/docs/screenshot_desktop.jpg)
![Whoogle Desktop](docs/screenshot_desktop.jpg)
#### Mobile
![Whoogle Mobile](https://raw.githubusercontent.com/benbusby/whoogle-search/main/docs/screenshot_mobile.jpg)
![Whoogle Mobile](docs/screenshot_mobile.jpg)

View File

@ -75,6 +75,11 @@
"value": "scribe.rip",
"required": false
},
"WHOOGLE_MINIMAL": {
"description": "Remove everything except basic result cards from all search queries (set to 1 or leave blank)",
"value": "",
"required": false
},
"WHOOGLE_CONFIG_COUNTRY": {
"description": "[CONFIG] The country to use for restricting search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/countries.json)",
"value": "",

View File

@ -1,4 +1,5 @@
from app.request import VALID_PARAMS, MAPS_URL
from app.utils.misc import read_config_bool
from app.utils.results import *
from bs4 import BeautifulSoup
from bs4.element import ResultSet, Tag
@ -7,6 +8,7 @@ from flask import render_template
import re
import urllib.parse as urlparse
from urllib.parse import parse_qs
import os
def extract_q(q_str: str, href: str) -> str:
@ -171,6 +173,8 @@ class Filter:
Returns:
None (The soup object is modified directly)
"""
minimal_mode = read_config_bool('WHOOGLE_MINIMAL')
def pull_child_divs(result_div: BeautifulSoup):
try:
return result_div.findChildren(
@ -186,6 +190,10 @@ class Filter:
# Loop through results and check for the number of child divs in each
for result in self.main_divs:
result_children = pull_child_divs(result)
if minimal_mode:
if len(result_children) in (1, 3):
continue
else:
if len(result_children) < self.RESULT_CHILD_LIMIT:
continue
@ -206,13 +214,18 @@ class Filter:
while not parent and idx < len(result_children):
parent = result_children[idx].parent
idx += 1
details = BeautifulSoup(features='html.parser').new_tag('details')
summary = BeautifulSoup(features='html.parser').new_tag('summary')
summary.string = label
details.append(summary)
if parent:
if parent and not minimal_mode:
parent.wrap(details)
elif parent and minimal_mode:
# Remove parent element from document if "minimal mode" is
# enabled
parent.decompose()
def update_element_src(self, element: Tag, mime: str) -> None:
"""Encrypts the original src of an element and rewrites the element src

View File

@ -9,7 +9,8 @@ import os
from stem import Signal, SocketError
from stem.control import Controller
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
SEARCH_URL = 'https://www.google.com/search?gbv=1&num=' + str(
os.getenv('WHOOGLE_RESULTS_PER_PAGE', 10)) + '&q='
MAPS_URL = 'https://maps.google.com/maps'
AUTOCOMPLETE_URL = ('https://suggestqueries.google.com/'
'complete/search?client=toolbar&')

View File

@ -1,5 +1,6 @@
import argparse
import base64
import html
import io
import json
import pickle
@ -15,6 +16,7 @@ from app.request import Request, TorError
from app.utils.bangs import resolve_bang
from app.utils.misc import read_config_bool
from app.utils.results import add_ip_card
from app.utils.results import bold_search_terms
from app.utils.search import *
from app.utils.session import generate_user_key, valid_user_session
from bs4 import BeautifulSoup as bsoup
@ -298,7 +300,7 @@ def search():
# Return 503 if temporarily blocked by captcha
resp_code = 503 if has_captcha(str(response)) else 200
response = bold_search_terms(response, query)
# Feature to display IP address
if search_util.check_kw_ip():
html_soup = bsoup(response, "html.parser")
@ -320,7 +322,7 @@ def search():
is_translation=any(
_ in query.lower() for _ in [translation['translate'], 'translate']
) and not search_util.search_type, # Standard search queries only
response=response,
response=html.unescape(str(response)),
version_number=app.config['VERSION_NUMBER'],
search_header=(render_template(
'header.html',

View File

@ -7,7 +7,6 @@
<meta name="referrer" content="no-referrer">
<link rel="stylesheet" href="{{ cb_url('input.css') }}">
<link rel="stylesheet" href="{{ cb_url('search.css') }}">
<link rel="stylesheet" href="{{ cb_url('variables.css') }}">
<link rel="stylesheet" href="{{ cb_url('header.css') }}">
{% if config.theme %}
{% if config.theme == 'system' %}

View File

@ -21,7 +21,6 @@
<script type="text/javascript" src="{{ cb_url('controller.js') }}"></script>
<link rel="search" href="opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="{{ cb_url('variables.css') }}">
{% if config.theme %}
{% if config.theme == 'system' %}
<style>

View File

@ -1,8 +1,8 @@
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, NavigableString
import os
import urllib.parse as urlparse
from urllib.parse import parse_qs
import re
SKIP_ARGS = ['ref_src', 'utm']
SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
@ -13,7 +13,6 @@ BLANK_B64 = ('data:image/png;base64,'
'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw'
'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC')
# Ad keywords
BLACKLIST = [
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama',
@ -34,6 +33,43 @@ SITE_ALTS = {
}
def bold_search_terms(response: str, query: str) -> BeautifulSoup:
"""Wraps all search terms in bold tags (<b>). If any terms are wrapped
in quotes, only that exact phrase will be made bold.
Args:
response: The initial response body for the query
query: The original search query
Returns:
BeautifulSoup: modified soup object with bold items
"""
response = BeautifulSoup(response, 'html.parser')
def replace_any_case(element: NavigableString, target_word: str) -> None:
# Replace all instances of the word, but maintaining the same case in
# the replacement
if len(element) == len(target_word):
return
element.replace_with(
re.sub(fr'\b((?![{{}}<>-]){target_word}(?![{{}}<>-]))\b',
r'<b>\1</b>',
element,
flags=re.I)
)
# Split all words out of query, grouping the ones wrapped in quotes
for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query):
word = re.sub(r'[^A-Za-z0-9 ]+', '', word)
target = response.find_all(
text=re.compile(r'' + re.escape(word), re.I))
for nav_str in target:
replace_any_case(nav_str, word)
return response
def has_ad_content(element: str) -> bool:
"""Inspects an HTML element for ad related content

View File

@ -53,12 +53,6 @@ def test_config(client):
for key in demo_config.keys():
assert config[key] == demo_config[key]
# Test setting config via search
custom_config = '&dark=1&lang_interface=lang_en'
rv = client.get('/search?q=test' + custom_config)
assert rv._status_code == 200
assert custom_config.replace('&', '&amp;') in str(rv.data)
# Test disabling changing config from client
app.config['CONFIG_DISABLE'] = 1
dark_mod = not demo_config['dark']