feat: block title or url by regex
This commit is contained in:
parent
ca782875c2
commit
8bc88fad18
|
@ -45,7 +45,8 @@ class Filter:
|
||||||
def __init__(self, user_key: str, mobile=False, config=None) -> None:
|
def __init__(self, user_key: str, mobile=False, config=None) -> None:
|
||||||
if config is None:
|
if config is None:
|
||||||
config = {}
|
config = {}
|
||||||
|
else:
|
||||||
|
self.config = config
|
||||||
self.near = config['near'] if 'near' in config else ''
|
self.near = config['near'] if 'near' in config else ''
|
||||||
self.dark = config['dark'] if 'dark' in config else False
|
self.dark = config['dark'] if 'dark' in config else False
|
||||||
self.nojs = config['nojs'] if 'nojs' in config else False
|
self.nojs = config['nojs'] if 'nojs' in config else False
|
||||||
|
@ -87,6 +88,8 @@ class Filter:
|
||||||
def clean(self, soup) -> BeautifulSoup:
|
def clean(self, soup) -> BeautifulSoup:
|
||||||
self.main_divs = soup.find('div', {'id': 'main'})
|
self.main_divs = soup.find('div', {'id': 'main'})
|
||||||
self.remove_ads()
|
self.remove_ads()
|
||||||
|
self.remove_block_titles()
|
||||||
|
self.remove_block_url()
|
||||||
self.collapse_sections()
|
self.collapse_sections()
|
||||||
self.update_styling(soup)
|
self.update_styling(soup)
|
||||||
|
|
||||||
|
@ -134,6 +137,23 @@ class Filter:
|
||||||
if has_ad_content(_.text)]
|
if has_ad_content(_.text)]
|
||||||
_ = div.decompose() if len(div_ads) else None
|
_ = div.decompose() if len(div_ads) else None
|
||||||
|
|
||||||
|
def remove_block_titles(self) -> None:
|
||||||
|
if not self.main_divs:
|
||||||
|
return
|
||||||
|
|
||||||
|
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
|
||||||
|
block_divs = [_ for _ in div.find_all('h3', recursive=True)
|
||||||
|
if self.config.block_title.search(_.text) is not None]
|
||||||
|
_ = div.decompose() if len(block_divs) else None
|
||||||
|
|
||||||
|
def remove_block_url(self) -> None:
|
||||||
|
if not self.main_divs:
|
||||||
|
return
|
||||||
|
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
|
||||||
|
block_divs = [_ for _ in div.find_all('a', recursive=True)
|
||||||
|
if self.config.block_url.search(_.attrs['href']) is not None]
|
||||||
|
_ = div.decompose() if len(block_divs) else None
|
||||||
|
|
||||||
def collapse_sections(self) -> None:
|
def collapse_sections(self) -> None:
|
||||||
"""Collapses long result sections ("people also asked", "related
|
"""Collapses long result sections ("people also asked", "related
|
||||||
searches", etc) into "details" elements
|
searches", etc) into "details" elements
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
from app.utils.misc import read_config_bool
|
from app.utils.misc import read_config_bool
|
||||||
from flask import current_app
|
from flask import current_app
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
|
@ -14,6 +15,8 @@ class Config:
|
||||||
open(os.path.join(app_config['STATIC_FOLDER'],
|
open(os.path.join(app_config['STATIC_FOLDER'],
|
||||||
'css/variables.css')).read())
|
'css/variables.css')).read())
|
||||||
self.block = os.getenv('WHOOGLE_CONFIG_BLOCK', '')
|
self.block = os.getenv('WHOOGLE_CONFIG_BLOCK', '')
|
||||||
|
self.block_title = re.compile(os.getenv('WHOOGLE_CONFIG_BLOCK_TITLE', '^$'))
|
||||||
|
self.block_url = re.compile(os.getenv('WHOOGLE_CONFIG_BLOCK_URL', '^$'))
|
||||||
self.ctry = os.getenv('WHOOGLE_CONFIG_COUNTRY', '')
|
self.ctry = os.getenv('WHOOGLE_CONFIG_COUNTRY', '')
|
||||||
self.theme = os.getenv('WHOOGLE_CONFIG_THEME', '')
|
self.theme = os.getenv('WHOOGLE_CONFIG_THEME', '')
|
||||||
self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE')
|
self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user