Expand /window
endpoint to behave like a proxy
The current `/window` endpoint is used as a proxy currently, but only for removing Javascript from the result page. This expands the existing functionality to allow users to proxy search result pages through their Whoogle instance. This commit temporarily overwrites the nojs feature until I can decide on how I want to approach separating (or not separating) these two features. Note that this feature will likely be merged in without being 100% finished, since I'm unsure of how users intend to use the feature. With some community testing, I'll get a better idea of A) how many people are actually going to use this feature, and B) what aspects of the feature are useful or not useful.
This commit is contained in:
parent
2a0ad8796c
commit
7b8a8525d5
|
@ -264,7 +264,7 @@ class Filter:
|
|||
# enabled
|
||||
parent.decompose()
|
||||
|
||||
def update_element_src(self, element: Tag, mime: str) -> None:
|
||||
def update_element_src(self, element: Tag, mime: str, attr='src') -> None:
|
||||
"""Encrypts the original src of an element and rewrites the element src
|
||||
to use the "/element?src=" pass-through.
|
||||
|
||||
|
@ -272,10 +272,12 @@ class Filter:
|
|||
None (The soup element is modified directly)
|
||||
|
||||
"""
|
||||
src = element['src']
|
||||
src = element[attr].split(' ')[0]
|
||||
|
||||
if src.startswith('//'):
|
||||
src = 'https:' + src
|
||||
elif src.startswith('data:'):
|
||||
return
|
||||
|
||||
if src.startswith(LOGO_URL):
|
||||
# Re-brand with Whoogle logo
|
||||
|
@ -287,7 +289,7 @@ class Filter:
|
|||
element['src'] = BLANK_B64
|
||||
return
|
||||
|
||||
element['src'] = f'{Endpoint.element}?url=' + self.encrypt_path(
|
||||
element[attr] = f'{Endpoint.element}?url=' + self.encrypt_path(
|
||||
src,
|
||||
is_element=True) + '&type=' + urlparse.quote(mime)
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@ from app.models.config import Config
|
|||
from app.models.endpoint import Endpoint
|
||||
from app.request import Request, TorError
|
||||
from app.utils.bangs import resolve_bang
|
||||
from app.filter import Filter
|
||||
from app.utils.misc import read_config_bool, get_client_ip, get_request_url, \
|
||||
check_for_update
|
||||
from app.utils.results import add_ip_card, bold_search_terms,\
|
||||
|
@ -457,8 +458,11 @@ def imgres():
|
|||
@session_required
|
||||
@auth_required
|
||||
def element():
|
||||
cipher_suite = Fernet(g.session_key)
|
||||
src_url = cipher_suite.decrypt(request.args.get('url').encode()).decode()
|
||||
element_url = src_url = request.args.get('url')
|
||||
if element_url.startswith('gAAAAA'):
|
||||
cipher_suite = Fernet(g.session_key)
|
||||
src_url = cipher_suite.decrypt(element_url.encode()).decode()
|
||||
|
||||
src_type = request.args.get('type')
|
||||
|
||||
try:
|
||||
|
@ -477,18 +481,58 @@ def element():
|
|||
|
||||
|
||||
@app.route(f'/{Endpoint.window}')
|
||||
@session_required
|
||||
@auth_required
|
||||
def window():
|
||||
get_body = g.user_request.send(base_url=request.args.get('location')).text
|
||||
get_body = get_body.replace('src="/',
|
||||
'src="' + request.args.get('location') + '"')
|
||||
get_body = get_body.replace('href="/',
|
||||
'href="' + request.args.get('location') + '"')
|
||||
target_url = request.args.get('location')
|
||||
if target_url.startswith('gAAAAA'):
|
||||
cipher_suite = Fernet(g.session_key)
|
||||
target_url = cipher_suite.decrypt(target_url.encode()).decode()
|
||||
|
||||
content_filter = Filter(g.session_key, config=g.user_config)
|
||||
target = urlparse.urlparse(target_url)
|
||||
host_url = f'{target.scheme}://{target.netloc}'
|
||||
|
||||
get_body = g.user_request.send(base_url=target_url).text
|
||||
|
||||
results = bsoup(get_body, 'html.parser')
|
||||
src_attrs = ['src', 'href', 'srcset', 'data-srcset', 'data-src']
|
||||
|
||||
for script in results('script'):
|
||||
script.decompose()
|
||||
# Parse HTML response and replace relative links w/ absolute
|
||||
for element in results.find_all():
|
||||
for attr in src_attrs:
|
||||
if not element.has_attr(attr) or not element[attr].startswith('/'):
|
||||
continue
|
||||
|
||||
element[attr] = host_url + element[attr]
|
||||
|
||||
# Replace or remove javascript sources
|
||||
for script in results.find_all('script', {'src': True}):
|
||||
if 'nojs' in request.args:
|
||||
script.decompose()
|
||||
else:
|
||||
content_filter.update_element_src(script, 'application/javascript')
|
||||
|
||||
# Replace all possible image attributes
|
||||
for img in results.find_all('img'):
|
||||
_ = [
|
||||
content_filter.update_element_src(img, 'image/png', attr=_)
|
||||
for _ in ['src', 'data-src', 'data-srcset', 'srcset'] if img.has_attr(_)
|
||||
]
|
||||
|
||||
# Replace all stylesheet sources
|
||||
for link in results.find_all('link', {'href': True}):
|
||||
content_filter.update_element_src(link, 'text/css', attr='href')
|
||||
|
||||
# Use anonymous view for all links on page
|
||||
for a in results.find_all('a', {'href': True}):
|
||||
a['href'] = '/window?location=' + a['href'] + (
|
||||
'&nojs=1' if 'nojs' in request.args else '')
|
||||
|
||||
# Remove all iframes -- these are commonly used inside of <noscript> tags
|
||||
# to enforce loading Google Analytics
|
||||
for iframe in results.find_all('iframe'):
|
||||
iframe.decompose()
|
||||
|
||||
return render_template(
|
||||
'display.html',
|
||||
|
|
Loading…
Reference in New Issue
Block a user