diff --git a/app/filter.py b/app/filter.py index 731b6aa..bc778a5 100644 --- a/app/filter.py +++ b/app/filter.py @@ -264,7 +264,7 @@ class Filter: # enabled parent.decompose() - def update_element_src(self, element: Tag, mime: str) -> None: + def update_element_src(self, element: Tag, mime: str, attr='src') -> None: """Encrypts the original src of an element and rewrites the element src to use the "/element?src=" pass-through. @@ -272,10 +272,12 @@ class Filter: None (The soup element is modified directly) """ - src = element['src'] + src = element[attr].split(' ')[0] if src.startswith('//'): src = 'https:' + src + elif src.startswith('data:'): + return if src.startswith(LOGO_URL): # Re-brand with Whoogle logo @@ -287,7 +289,7 @@ class Filter: element['src'] = BLANK_B64 return - element['src'] = f'{Endpoint.element}?url=' + self.encrypt_path( + element[attr] = f'{Endpoint.element}?url=' + self.encrypt_path( src, is_element=True) + '&type=' + urlparse.quote(mime) diff --git a/app/routes.py b/app/routes.py index f4308e1..1e8bab3 100644 --- a/app/routes.py +++ b/app/routes.py @@ -16,6 +16,7 @@ from app.models.config import Config from app.models.endpoint import Endpoint from app.request import Request, TorError from app.utils.bangs import resolve_bang +from app.filter import Filter from app.utils.misc import read_config_bool, get_client_ip, get_request_url, \ check_for_update from app.utils.results import add_ip_card, bold_search_terms,\ @@ -457,8 +458,11 @@ def imgres(): @session_required @auth_required def element(): - cipher_suite = Fernet(g.session_key) - src_url = cipher_suite.decrypt(request.args.get('url').encode()).decode() + element_url = src_url = request.args.get('url') + if element_url.startswith('gAAAAA'): + cipher_suite = Fernet(g.session_key) + src_url = cipher_suite.decrypt(element_url.encode()).decode() + src_type = request.args.get('type') try: @@ -477,18 +481,58 @@ def element(): @app.route(f'/{Endpoint.window}') +@session_required @auth_required def window(): - get_body = g.user_request.send(base_url=request.args.get('location')).text - get_body = get_body.replace('src="/', - 'src="' + request.args.get('location') + '"') - get_body = get_body.replace('href="/', - 'href="' + request.args.get('location') + '"') + target_url = request.args.get('location') + if target_url.startswith('gAAAAA'): + cipher_suite = Fernet(g.session_key) + target_url = cipher_suite.decrypt(target_url.encode()).decode() + + content_filter = Filter(g.session_key, config=g.user_config) + target = urlparse.urlparse(target_url) + host_url = f'{target.scheme}://{target.netloc}' + + get_body = g.user_request.send(base_url=target_url).text results = bsoup(get_body, 'html.parser') + src_attrs = ['src', 'href', 'srcset', 'data-srcset', 'data-src'] - for script in results('script'): - script.decompose() + # Parse HTML response and replace relative links w/ absolute + for element in results.find_all(): + for attr in src_attrs: + if not element.has_attr(attr) or not element[attr].startswith('/'): + continue + + element[attr] = host_url + element[attr] + + # Replace or remove javascript sources + for script in results.find_all('script', {'src': True}): + if 'nojs' in request.args: + script.decompose() + else: + content_filter.update_element_src(script, 'application/javascript') + + # Replace all possible image attributes + for img in results.find_all('img'): + _ = [ + content_filter.update_element_src(img, 'image/png', attr=_) + for _ in ['src', 'data-src', 'data-srcset', 'srcset'] if img.has_attr(_) + ] + + # Replace all stylesheet sources + for link in results.find_all('link', {'href': True}): + content_filter.update_element_src(link, 'text/css', attr='href') + + # Use anonymous view for all links on page + for a in results.find_all('a', {'href': True}): + a['href'] = '/window?location=' + a['href'] + ( + '&nojs=1' if 'nojs' in request.args else '') + + # Remove all iframes -- these are commonly used inside of