From 82f58778c0835aa8109fc237758bb68fcfd77a7d Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Mon, 7 Feb 2022 09:45:33 -0700 Subject: [PATCH] Expand removal of unsupported G pages The images page still had a number of pages linked that forwarded to non-Whoogle pages. Also fixed footer styling on image results page. --- app/filter.py | 15 ++++++++++++--- app/static/css/search.css | 4 ++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/app/filter.py b/app/filter.py index fa7e604..88af0fb 100644 --- a/app/filter.py +++ b/app/filter.py @@ -13,6 +13,14 @@ from urllib.parse import parse_qs import os minimal_mode_sections = ['Top stories', 'Images', 'People also ask'] +unsupported_g_pages = [ + 'support.google.com', + 'accounts.google.com', + 'google.com/preferences', + 'google.com/intl', + 'advanced_search', + 'tbm=shop' +] def extract_q(q_str: str, href: str) -> str: @@ -315,14 +323,15 @@ class Filter: None (the tag is updated directly) """ - # Replace href with only the intended destination (no "utm" type tags) - href = link['href'].replace('https://www.google.com', '') - if 'advanced_search' in href or 'tbm=shop' in href: + # Remove any elements that direct to unsupported Google pages + if any(url in link['href'] for url in unsupported_g_pages): # FIXME: The "Shopping" tab requires further filtering (see #136) # Temporarily removing all links to that tab for now. link.decompose() return + # Replace href with only the intended destination (no "utm" type tags) + href = link['href'].replace('https://www.google.com', '') result_link = urlparse.urlparse(href) q = extract_q(result_link.query, href) diff --git a/app/static/css/search.css b/app/static/css/search.css index 23484a2..bba5054 100644 --- a/app/static/css/search.css +++ b/app/static/css/search.css @@ -53,6 +53,10 @@ details summary span { padding-top: 0 !important; } +.footer { + text-align: center; +} + @media (min-width: 801px) { body { min-width: 736px !important;