Merge remote-tracking branch 'origin/main' into feature/public-instance-sessions
This commit is contained in:
commit
548dc418bf
|
@ -1,4 +1,4 @@
|
|||

|
||||

|
||||
|
||||
[](https://github.com/benbusby/shoogle/releases)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
|
@ -321,6 +321,7 @@ There are a few optional environment variables available for customizing a Whoog
|
|||
| WHOOGLE_ALT_TL | The Google Translate alternative to use. This is used for all "translate ____" searches. |
|
||||
| WHOOGLE_ALT_MD | The medium.com alternative to use when site alternatives are enabled in the config. |
|
||||
| WHOOGLE_AUTOCOMPLETE | Controls visibility of autocomplete/search suggestions. Default on -- use '0' to disable |
|
||||
| WHOOGLE_MINIMAL | Remove everything except basic result cards from all search queries. |
|
||||
|
||||
### Config Environment Variables
|
||||
These environment variables allow setting default config values, but can be overwritten manually by using the home page config menu. These allow a shortcut for destroying/rebuilding an instance to the same config state every time.
|
||||
|
@ -505,7 +506,7 @@ A lot of the app currently piggybacks on Google's existing support for fetching
|
|||
|
||||
## Screenshots
|
||||
#### Desktop
|
||||

|
||||

|
||||
|
||||
#### Mobile
|
||||

|
||||

|
||||
|
|
5
app.json
5
app.json
|
@ -75,6 +75,11 @@
|
|||
"value": "scribe.rip",
|
||||
"required": false
|
||||
},
|
||||
"WHOOGLE_MINIMAL": {
|
||||
"description": "Remove everything except basic result cards from all search queries (set to 1 or leave blank)",
|
||||
"value": "",
|
||||
"required": false
|
||||
},
|
||||
"WHOOGLE_CONFIG_COUNTRY": {
|
||||
"description": "[CONFIG] The country to use for restricting search results (use values from https://raw.githubusercontent.com/benbusby/whoogle-search/develop/app/static/settings/countries.json)",
|
||||
"value": "",
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from app.request import VALID_PARAMS, MAPS_URL
|
||||
from app.utils.misc import read_config_bool
|
||||
from app.utils.results import *
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import ResultSet, Tag
|
||||
|
@ -7,6 +8,7 @@ from flask import render_template
|
|||
import re
|
||||
import urllib.parse as urlparse
|
||||
from urllib.parse import parse_qs
|
||||
import os
|
||||
|
||||
|
||||
def extract_q(q_str: str, href: str) -> str:
|
||||
|
@ -171,6 +173,8 @@ class Filter:
|
|||
Returns:
|
||||
None (The soup object is modified directly)
|
||||
"""
|
||||
minimal_mode = read_config_bool('WHOOGLE_MINIMAL')
|
||||
|
||||
def pull_child_divs(result_div: BeautifulSoup):
|
||||
try:
|
||||
return result_div.findChildren(
|
||||
|
@ -186,8 +190,12 @@ class Filter:
|
|||
# Loop through results and check for the number of child divs in each
|
||||
for result in self.main_divs:
|
||||
result_children = pull_child_divs(result)
|
||||
if len(result_children) < self.RESULT_CHILD_LIMIT:
|
||||
continue
|
||||
if minimal_mode:
|
||||
if len(result_children) in (1, 3):
|
||||
continue
|
||||
else:
|
||||
if len(result_children) < self.RESULT_CHILD_LIMIT:
|
||||
continue
|
||||
|
||||
# Find and decompose the first element with an inner HTML text val.
|
||||
# This typically extracts the title of the section (i.e. "Related
|
||||
|
@ -206,13 +214,18 @@ class Filter:
|
|||
while not parent and idx < len(result_children):
|
||||
parent = result_children[idx].parent
|
||||
idx += 1
|
||||
|
||||
details = BeautifulSoup(features='html.parser').new_tag('details')
|
||||
summary = BeautifulSoup(features='html.parser').new_tag('summary')
|
||||
summary.string = label
|
||||
details.append(summary)
|
||||
|
||||
if parent:
|
||||
if parent and not minimal_mode:
|
||||
parent.wrap(details)
|
||||
elif parent and minimal_mode:
|
||||
# Remove parent element from document if "minimal mode" is
|
||||
# enabled
|
||||
parent.decompose()
|
||||
|
||||
def update_element_src(self, element: Tag, mime: str) -> None:
|
||||
"""Encrypts the original src of an element and rewrites the element src
|
||||
|
|
|
@ -9,7 +9,8 @@ import os
|
|||
from stem import Signal, SocketError
|
||||
from stem.control import Controller
|
||||
|
||||
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
|
||||
SEARCH_URL = 'https://www.google.com/search?gbv=1&num=' + str(
|
||||
os.getenv('WHOOGLE_RESULTS_PER_PAGE', 10)) + '&q='
|
||||
MAPS_URL = 'https://maps.google.com/maps'
|
||||
AUTOCOMPLETE_URL = ('https://suggestqueries.google.com/'
|
||||
'complete/search?client=toolbar&')
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import argparse
|
||||
import base64
|
||||
import html
|
||||
import io
|
||||
import json
|
||||
import pickle
|
||||
|
@ -15,6 +16,7 @@ from app.request import Request, TorError
|
|||
from app.utils.bangs import resolve_bang
|
||||
from app.utils.misc import read_config_bool
|
||||
from app.utils.results import add_ip_card
|
||||
from app.utils.results import bold_search_terms
|
||||
from app.utils.search import *
|
||||
from app.utils.session import generate_user_key, valid_user_session
|
||||
from bs4 import BeautifulSoup as bsoup
|
||||
|
@ -298,7 +300,7 @@ def search():
|
|||
|
||||
# Return 503 if temporarily blocked by captcha
|
||||
resp_code = 503 if has_captcha(str(response)) else 200
|
||||
|
||||
response = bold_search_terms(response, query)
|
||||
# Feature to display IP address
|
||||
if search_util.check_kw_ip():
|
||||
html_soup = bsoup(response, "html.parser")
|
||||
|
@ -320,7 +322,7 @@ def search():
|
|||
is_translation=any(
|
||||
_ in query.lower() for _ in [translation['translate'], 'translate']
|
||||
) and not search_util.search_type, # Standard search queries only
|
||||
response=response,
|
||||
response=html.unescape(str(response)),
|
||||
version_number=app.config['VERSION_NUMBER'],
|
||||
search_header=(render_template(
|
||||
'header.html',
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
<meta name="referrer" content="no-referrer">
|
||||
<link rel="stylesheet" href="{{ cb_url('input.css') }}">
|
||||
<link rel="stylesheet" href="{{ cb_url('search.css') }}">
|
||||
<link rel="stylesheet" href="{{ cb_url('variables.css') }}">
|
||||
<link rel="stylesheet" href="{{ cb_url('header.css') }}">
|
||||
{% if config.theme %}
|
||||
{% if config.theme == 'system' %}
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
<script type="text/javascript" src="{{ cb_url('controller.js') }}"></script>
|
||||
<link rel="search" href="opensearch.xml" type="application/opensearchdescription+xml" title="Whoogle Search">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<link rel="stylesheet" href="{{ cb_url('variables.css') }}">
|
||||
{% if config.theme %}
|
||||
{% if config.theme == 'system' %}
|
||||
<style>
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
import os
|
||||
import urllib.parse as urlparse
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
import re
|
||||
|
||||
SKIP_ARGS = ['ref_src', 'utm']
|
||||
SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
|
||||
|
@ -13,7 +13,6 @@ BLANK_B64 = ('data:image/png;base64,'
|
|||
'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw'
|
||||
'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC')
|
||||
|
||||
|
||||
# Ad keywords
|
||||
BLACKLIST = [
|
||||
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama',
|
||||
|
@ -34,6 +33,43 @@ SITE_ALTS = {
|
|||
}
|
||||
|
||||
|
||||
def bold_search_terms(response: str, query: str) -> BeautifulSoup:
|
||||
"""Wraps all search terms in bold tags (<b>). If any terms are wrapped
|
||||
in quotes, only that exact phrase will be made bold.
|
||||
|
||||
Args:
|
||||
response: The initial response body for the query
|
||||
query: The original search query
|
||||
|
||||
Returns:
|
||||
BeautifulSoup: modified soup object with bold items
|
||||
"""
|
||||
response = BeautifulSoup(response, 'html.parser')
|
||||
|
||||
def replace_any_case(element: NavigableString, target_word: str) -> None:
|
||||
# Replace all instances of the word, but maintaining the same case in
|
||||
# the replacement
|
||||
if len(element) == len(target_word):
|
||||
return
|
||||
|
||||
element.replace_with(
|
||||
re.sub(fr'\b((?![{{}}<>-]){target_word}(?![{{}}<>-]))\b',
|
||||
r'<b>\1</b>',
|
||||
element,
|
||||
flags=re.I)
|
||||
)
|
||||
|
||||
# Split all words out of query, grouping the ones wrapped in quotes
|
||||
for word in re.split(r'\s+(?=[^"]*(?:"[^"]*"[^"]*)*$)', query):
|
||||
word = re.sub(r'[^A-Za-z0-9 ]+', '', word)
|
||||
target = response.find_all(
|
||||
text=re.compile(r'' + re.escape(word), re.I))
|
||||
for nav_str in target:
|
||||
replace_any_case(nav_str, word)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def has_ad_content(element: str) -> bool:
|
||||
"""Inspects an HTML element for ad related content
|
||||
|
||||
|
|
|
@ -53,12 +53,6 @@ def test_config(client):
|
|||
for key in demo_config.keys():
|
||||
assert config[key] == demo_config[key]
|
||||
|
||||
# Test setting config via search
|
||||
custom_config = '&dark=1&lang_interface=lang_en'
|
||||
rv = client.get('/search?q=test' + custom_config)
|
||||
assert rv._status_code == 200
|
||||
assert custom_config.replace('&', '&') in str(rv.data)
|
||||
|
||||
# Test disabling changing config from client
|
||||
app.config['CONFIG_DISABLE'] = 1
|
||||
dark_mod = not demo_config['dark']
|
||||
|
|
Loading…
Reference in New Issue
Block a user