Added backend site alternative redirects/text replacement

By default, twitter/instagram/youtube will redirect to
nitter/bibliogram/invidious respectively. Currently missing a front end
for enabling/disabling this feature, but may want to set this as enabled
until a user manually disables it.

Also refactored util naming a bit, added filter utils to hold all
non-class methods from filter.py
This commit is contained in:
Ben Busby 2020-07-18 11:32:52 -06:00
parent 3d7456f37b
commit 4c8ffaa3ba
9 changed files with 94 additions and 64 deletions

View File

@ -1,4 +1,4 @@
from app.utils.misc import generate_user_keys
from app.utils.session_utils import generate_user_keys
from flask import Flask
from flask_session import Session
import os

View File

@ -1,56 +1,11 @@
from app.request import VALID_PARAMS
from app.utils.misc import BLACKLIST
from bs4 import BeautifulSoup
from app.utils.filter_utils import *
from bs4.element import ResultSet
from cryptography.fernet import Fernet
import re
import urllib.parse as urlparse
from urllib.parse import parse_qs
SKIP_ARGS = ['ref_src', 'utm']
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
LOGO_URL = GOOG_IMG + '_desk'
BLANK_B64 = '''
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC
'''
def get_first_link(soup):
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
# Return the first search result URL
if 'url?q=' in a['href']:
return filter_link_args(a['href'])
def filter_link_args(query_link):
parsed_link = urlparse.urlparse(query_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
if len(link_args) == 0 and len(parsed_link) > 0:
return query_link
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
safe_args[arg] = link_args[arg]
# Remove original link query and replace with filtered args
query_link = query_link.replace(parsed_link.query, '')
if len(safe_args) > 0:
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
else:
query_link = query_link.replace('?', '')
return query_link
def has_ad_content(element: str):
return element.upper() in (value.upper() for value in BLACKLIST) or '' in element
class Filter:
def __init__(self, user_keys: dict, mobile=False, config=None):
@ -61,6 +16,7 @@ class Filter:
self.dark = config['dark'] if 'dark' in config else False
self.nojs = config['nojs'] if 'nojs' in config else False
self.new_tab = config['new_tab'] if 'new_tab' in config else False
self.alt_redirect = config['alts'] if 'alts' in config else True
self.mobile = mobile
self.user_keys = user_keys
self.main_divs = ResultSet('')
@ -232,11 +188,11 @@ class Filter:
else:
link['href'] = href
# Replace link location
if self.alt_redirect:
link['href'] = get_site_alt(link['href'])
link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys())))
if len(link_desc) == 0:
return
def gen_nojs(sibling):
nojs_link = BeautifulSoup().new_tag('a')
nojs_link['href'] = '/window?location=' + sibling['href']
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
sibling.append(nojs_link)
link_desc[0].replace_with(get_site_alt(link_desc[0]))

View File

@ -15,7 +15,7 @@ from requests import exceptions
from app import app
from app.models.config import Config
from app.request import Request
from app.utils.misc import valid_user_session
from app.utils.session_utils import valid_user_session
from app.utils.routing_utils import *

79
app/utils/filter_utils.py Normal file
View File

@ -0,0 +1,79 @@
from bs4 import BeautifulSoup
import urllib.parse as urlparse
from urllib.parse import parse_qs
SKIP_ARGS = ['ref_src', 'utm']
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
LOGO_URL = GOOG_IMG + '_desk'
BLANK_B64 = '''
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC
'''
BLACKLIST = [
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
]
SITE_ALTS = {
'twitter.com': 'nitter.net',
'youtube.com': 'invidio.us',
'instagram.com': 'bibliogram.art/u'
}
def has_ad_content(element: str):
return element.upper() in (value.upper() for value in BLACKLIST) or '' in element
def get_first_link(soup):
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
# Return the first search result URL
if 'url?q=' in a['href']:
return filter_link_args(a['href'])
def get_site_alt(link: str):
for site_key in SITE_ALTS.keys():
if site_key not in link:
continue
link = link.replace(site_key, SITE_ALTS[site_key])
break
return link
def filter_link_args(query_link):
parsed_link = urlparse.urlparse(query_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
if len(link_args) == 0 and len(parsed_link) > 0:
return query_link
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
safe_args[arg] = link_args[arg]
# Remove original link query and replace with filtered args
query_link = query_link.replace(parsed_link.query, '')
if len(safe_args) > 0:
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
else:
query_link = query_link.replace('?', '')
return query_link
def gen_nojs(sibling):
nojs_link = BeautifulSoup().new_tag('a')
nojs_link['href'] = '/window?location=' + sibling['href']
nojs_link['style'] = 'display:block;width:100%;'
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
sibling.append(nojs_link)

View File

@ -1,5 +1,5 @@
from app.filter import Filter, get_first_link
from app.utils.misc import generate_user_keys
from app.utils.session_utils import generate_user_keys
from app.request import gen_query
from bs4 import BeautifulSoup
from cryptography.fernet import Fernet, InvalidToken

View File

@ -2,11 +2,6 @@ from cryptography.fernet import Fernet
from flask import current_app as app
REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys']
BLACKLIST = [
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
]
def generate_user_keys(cookies_disabled=False) -> dict:

View File

@ -1,5 +1,5 @@
from app import app
from app.utils.misc import generate_user_keys
from app.utils.session_utils import generate_user_keys
import pytest

View File

@ -1,4 +1,4 @@
from app.utils.misc import generate_user_keys, valid_user_session
from app.utils.session_utils import generate_user_keys, valid_user_session
def test_generate_user_keys():

View File

@ -1,6 +1,6 @@
from bs4 import BeautifulSoup
from app.filter import Filter
from app.utils.misc import generate_user_keys
from app.utils.session_utils import generate_user_keys
from datetime import datetime
from dateutil.parser import *