Added backend site alternative redirects/text replacement

By default, twitter/instagram/youtube will redirect to
nitter/bibliogram/invidious respectively. Currently missing a front end
for enabling/disabling this feature, but may want to set this as enabled
until a user manually disables it.

Also refactored util naming a bit, added filter utils to hold all
non-class methods from filter.py
This commit is contained in:
Ben Busby 2020-07-18 11:32:52 -06:00
parent 3d7456f37b
commit 4c8ffaa3ba
9 changed files with 94 additions and 64 deletions

View File

@ -1,4 +1,4 @@
from app.utils.misc import generate_user_keys from app.utils.session_utils import generate_user_keys
from flask import Flask from flask import Flask
from flask_session import Session from flask_session import Session
import os import os

View File

@ -1,56 +1,11 @@
from app.request import VALID_PARAMS from app.request import VALID_PARAMS
from app.utils.misc import BLACKLIST from app.utils.filter_utils import *
from bs4 import BeautifulSoup
from bs4.element import ResultSet from bs4.element import ResultSet
from cryptography.fernet import Fernet from cryptography.fernet import Fernet
import re import re
import urllib.parse as urlparse import urllib.parse as urlparse
from urllib.parse import parse_qs from urllib.parse import parse_qs
SKIP_ARGS = ['ref_src', 'utm']
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
LOGO_URL = GOOG_IMG + '_desk'
BLANK_B64 = '''

'''
def get_first_link(soup):
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
# Return the first search result URL
if 'url?q=' in a['href']:
return filter_link_args(a['href'])
def filter_link_args(query_link):
parsed_link = urlparse.urlparse(query_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
if len(link_args) == 0 and len(parsed_link) > 0:
return query_link
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
safe_args[arg] = link_args[arg]
# Remove original link query and replace with filtered args
query_link = query_link.replace(parsed_link.query, '')
if len(safe_args) > 0:
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
else:
query_link = query_link.replace('?', '')
return query_link
def has_ad_content(element: str):
return element.upper() in (value.upper() for value in BLACKLIST) or '' in element
class Filter: class Filter:
def __init__(self, user_keys: dict, mobile=False, config=None): def __init__(self, user_keys: dict, mobile=False, config=None):
@ -61,6 +16,7 @@ class Filter:
self.dark = config['dark'] if 'dark' in config else False self.dark = config['dark'] if 'dark' in config else False
self.nojs = config['nojs'] if 'nojs' in config else False self.nojs = config['nojs'] if 'nojs' in config else False
self.new_tab = config['new_tab'] if 'new_tab' in config else False self.new_tab = config['new_tab'] if 'new_tab' in config else False
self.alt_redirect = config['alts'] if 'alts' in config else True
self.mobile = mobile self.mobile = mobile
self.user_keys = user_keys self.user_keys = user_keys
self.main_divs = ResultSet('') self.main_divs = ResultSet('')
@ -232,11 +188,11 @@ class Filter:
else: else:
link['href'] = href link['href'] = href
# Replace link location
if self.alt_redirect:
link['href'] = get_site_alt(link['href'])
link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys())))
if len(link_desc) == 0:
return
def gen_nojs(sibling): link_desc[0].replace_with(get_site_alt(link_desc[0]))
nojs_link = BeautifulSoup().new_tag('a')
nojs_link['href'] = '/window?location=' + sibling['href']
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
sibling.append(nojs_link)

View File

@ -15,7 +15,7 @@ from requests import exceptions
from app import app from app import app
from app.models.config import Config from app.models.config import Config
from app.request import Request from app.request import Request
from app.utils.misc import valid_user_session from app.utils.session_utils import valid_user_session
from app.utils.routing_utils import * from app.utils.routing_utils import *

79
app/utils/filter_utils.py Normal file
View File

@ -0,0 +1,79 @@
from bs4 import BeautifulSoup
import urllib.parse as urlparse
from urllib.parse import parse_qs
SKIP_ARGS = ['ref_src', 'utm']
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
LOGO_URL = GOOG_IMG + '_desk'
BLANK_B64 = '''

'''
BLACKLIST = [
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
]
SITE_ALTS = {
'twitter.com': 'nitter.net',
'youtube.com': 'invidio.us',
'instagram.com': 'bibliogram.art/u'
}
def has_ad_content(element: str):
return element.upper() in (value.upper() for value in BLACKLIST) or '' in element
def get_first_link(soup):
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
# Return the first search result URL
if 'url?q=' in a['href']:
return filter_link_args(a['href'])
def get_site_alt(link: str):
for site_key in SITE_ALTS.keys():
if site_key not in link:
continue
link = link.replace(site_key, SITE_ALTS[site_key])
break
return link
def filter_link_args(query_link):
parsed_link = urlparse.urlparse(query_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
if len(link_args) == 0 and len(parsed_link) > 0:
return query_link
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
safe_args[arg] = link_args[arg]
# Remove original link query and replace with filtered args
query_link = query_link.replace(parsed_link.query, '')
if len(safe_args) > 0:
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
else:
query_link = query_link.replace('?', '')
return query_link
def gen_nojs(sibling):
nojs_link = BeautifulSoup().new_tag('a')
nojs_link['href'] = '/window?location=' + sibling['href']
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
sibling.append(nojs_link)

View File

@ -1,5 +1,5 @@
from app.filter import Filter, get_first_link from app.filter import Filter, get_first_link
from app.utils.misc import generate_user_keys from app.utils.session_utils import generate_user_keys
from app.request import gen_query from app.request import gen_query
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from cryptography.fernet import Fernet, InvalidToken from cryptography.fernet import Fernet, InvalidToken

View File

@ -2,11 +2,6 @@ from cryptography.fernet import Fernet
from flask import current_app as app from flask import current_app as app
REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys'] REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys']
BLACKLIST = [
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
]
def generate_user_keys(cookies_disabled=False) -> dict: def generate_user_keys(cookies_disabled=False) -> dict:

View File

@ -1,5 +1,5 @@
from app import app from app import app
from app.utils.misc import generate_user_keys from app.utils.session_utils import generate_user_keys
import pytest import pytest

View File

@ -1,4 +1,4 @@
from app.utils.misc import generate_user_keys, valid_user_session from app.utils.session_utils import generate_user_keys, valid_user_session
def test_generate_user_keys(): def test_generate_user_keys():

View File

@ -1,6 +1,6 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from app.filter import Filter from app.filter import Filter
from app.utils.misc import generate_user_keys from app.utils.session_utils import generate_user_keys
from datetime import datetime from datetime import datetime
from dateutil.parser import * from dateutil.parser import *