Added backend site alternative redirects/text replacement
By default, twitter/instagram/youtube will redirect to nitter/bibliogram/invidious respectively. Currently missing a front end for enabling/disabling this feature, but may want to set this as enabled until a user manually disables it. Also refactored util naming a bit, added filter utils to hold all non-class methods from filter.py
This commit is contained in:
parent
3d7456f37b
commit
4c8ffaa3ba
|
@ -1,4 +1,4 @@
|
|||
from app.utils.misc import generate_user_keys
|
||||
from app.utils.session_utils import generate_user_keys
|
||||
from flask import Flask
|
||||
from flask_session import Session
|
||||
import os
|
||||
|
|
|
@ -1,56 +1,11 @@
|
|||
from app.request import VALID_PARAMS
|
||||
from app.utils.misc import BLACKLIST
|
||||
from bs4 import BeautifulSoup
|
||||
from app.utils.filter_utils import *
|
||||
from bs4.element import ResultSet
|
||||
from cryptography.fernet import Fernet
|
||||
import re
|
||||
import urllib.parse as urlparse
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
SKIP_ARGS = ['ref_src', 'utm']
|
||||
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
|
||||
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
|
||||
LOGO_URL = GOOG_IMG + '_desk'
|
||||
BLANK_B64 = '''
|
||||

|
||||
'''
|
||||
|
||||
|
||||
def get_first_link(soup):
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
# Return the first search result URL
|
||||
if 'url?q=' in a['href']:
|
||||
return filter_link_args(a['href'])
|
||||
|
||||
|
||||
def filter_link_args(query_link):
|
||||
parsed_link = urlparse.urlparse(query_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
if len(link_args) == 0 and len(parsed_link) > 0:
|
||||
return query_link
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
query_link = query_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 0:
|
||||
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
|
||||
else:
|
||||
query_link = query_link.replace('?', '')
|
||||
|
||||
return query_link
|
||||
|
||||
|
||||
def has_ad_content(element: str):
|
||||
return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element
|
||||
|
||||
|
||||
class Filter:
|
||||
def __init__(self, user_keys: dict, mobile=False, config=None):
|
||||
|
@ -61,6 +16,7 @@ class Filter:
|
|||
self.dark = config['dark'] if 'dark' in config else False
|
||||
self.nojs = config['nojs'] if 'nojs' in config else False
|
||||
self.new_tab = config['new_tab'] if 'new_tab' in config else False
|
||||
self.alt_redirect = config['alts'] if 'alts' in config else True
|
||||
self.mobile = mobile
|
||||
self.user_keys = user_keys
|
||||
self.main_divs = ResultSet('')
|
||||
|
@ -232,11 +188,11 @@ class Filter:
|
|||
else:
|
||||
link['href'] = href
|
||||
|
||||
# Replace link location
|
||||
if self.alt_redirect:
|
||||
link['href'] = get_site_alt(link['href'])
|
||||
link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys())))
|
||||
if len(link_desc) == 0:
|
||||
return
|
||||
|
||||
def gen_nojs(sibling):
|
||||
nojs_link = BeautifulSoup().new_tag('a')
|
||||
nojs_link['href'] = '/window?location=' + sibling['href']
|
||||
nojs_link['style'] = 'display:block;width:100%;'
|
||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||
sibling.append(nojs_link)
|
||||
link_desc[0].replace_with(get_site_alt(link_desc[0]))
|
||||
|
|
|
@ -15,7 +15,7 @@ from requests import exceptions
|
|||
from app import app
|
||||
from app.models.config import Config
|
||||
from app.request import Request
|
||||
from app.utils.misc import valid_user_session
|
||||
from app.utils.session_utils import valid_user_session
|
||||
from app.utils.routing_utils import *
|
||||
|
||||
|
||||
|
|
79
app/utils/filter_utils.py
Normal file
79
app/utils/filter_utils.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import urllib.parse as urlparse
|
||||
from urllib.parse import parse_qs
|
||||
|
||||
SKIP_ARGS = ['ref_src', 'utm']
|
||||
FULL_RES_IMG = '<br/><a href="{}">Full Image</a>'
|
||||
GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo'
|
||||
LOGO_URL = GOOG_IMG + '_desk'
|
||||
BLANK_B64 = '''
|
||||

|
||||
'''
|
||||
|
||||
BLACKLIST = [
|
||||
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
|
||||
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
|
||||
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
|
||||
]
|
||||
|
||||
SITE_ALTS = {
|
||||
'twitter.com': 'nitter.net',
|
||||
'youtube.com': 'invidio.us',
|
||||
'instagram.com': 'bibliogram.art/u'
|
||||
}
|
||||
|
||||
|
||||
def has_ad_content(element: str):
|
||||
return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element
|
||||
|
||||
|
||||
def get_first_link(soup):
|
||||
# Replace hrefs with only the intended destination (no "utm" type tags)
|
||||
for a in soup.find_all('a', href=True):
|
||||
# Return the first search result URL
|
||||
if 'url?q=' in a['href']:
|
||||
return filter_link_args(a['href'])
|
||||
|
||||
|
||||
def get_site_alt(link: str):
|
||||
for site_key in SITE_ALTS.keys():
|
||||
if site_key not in link:
|
||||
continue
|
||||
|
||||
link = link.replace(site_key, SITE_ALTS[site_key])
|
||||
break
|
||||
|
||||
return link
|
||||
|
||||
|
||||
def filter_link_args(query_link):
|
||||
parsed_link = urlparse.urlparse(query_link)
|
||||
link_args = parse_qs(parsed_link.query)
|
||||
safe_args = {}
|
||||
|
||||
if len(link_args) == 0 and len(parsed_link) > 0:
|
||||
return query_link
|
||||
|
||||
for arg in link_args.keys():
|
||||
if arg in SKIP_ARGS:
|
||||
continue
|
||||
|
||||
safe_args[arg] = link_args[arg]
|
||||
|
||||
# Remove original link query and replace with filtered args
|
||||
query_link = query_link.replace(parsed_link.query, '')
|
||||
if len(safe_args) > 0:
|
||||
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
|
||||
else:
|
||||
query_link = query_link.replace('?', '')
|
||||
|
||||
return query_link
|
||||
|
||||
|
||||
def gen_nojs(sibling):
|
||||
nojs_link = BeautifulSoup().new_tag('a')
|
||||
nojs_link['href'] = '/window?location=' + sibling['href']
|
||||
nojs_link['style'] = 'display:block;width:100%;'
|
||||
nojs_link.string = 'NoJS Link: ' + nojs_link['href']
|
||||
sibling.append(BeautifulSoup('<br><hr><br>', 'html.parser'))
|
||||
sibling.append(nojs_link)
|
|
@ -1,5 +1,5 @@
|
|||
from app.filter import Filter, get_first_link
|
||||
from app.utils.misc import generate_user_keys
|
||||
from app.utils.session_utils import generate_user_keys
|
||||
from app.request import gen_query
|
||||
from bs4 import BeautifulSoup
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
|
|
|
@ -2,11 +2,6 @@ from cryptography.fernet import Fernet
|
|||
from flask import current_app as app
|
||||
|
||||
REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys']
|
||||
BLACKLIST = [
|
||||
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고',
|
||||
'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam',
|
||||
'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés'
|
||||
]
|
||||
|
||||
|
||||
def generate_user_keys(cookies_disabled=False) -> dict:
|
|
@ -1,5 +1,5 @@
|
|||
from app import app
|
||||
from app.utils.misc import generate_user_keys
|
||||
from app.utils.session_utils import generate_user_keys
|
||||
import pytest
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from app.utils.misc import generate_user_keys, valid_user_session
|
||||
from app.utils.session_utils import generate_user_keys, valid_user_session
|
||||
|
||||
|
||||
def test_generate_user_keys():
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from bs4 import BeautifulSoup
|
||||
from app.filter import Filter
|
||||
from app.utils.misc import generate_user_keys
|
||||
from app.utils.session_utils import generate_user_keys
|
||||
from datetime import datetime
|
||||
from dateutil.parser import *
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user