feat: block title or url by regex

This commit is contained in:
YadominJinta 2021-10-19 18:07:36 +08:00
parent ca782875c2
commit 8bc88fad18
2 changed files with 24 additions and 1 deletions

View File

@ -45,7 +45,8 @@ class Filter:
def __init__(self, user_key: str, mobile=False, config=None) -> None:
if config is None:
config = {}
else:
self.config = config
self.near = config['near'] if 'near' in config else ''
self.dark = config['dark'] if 'dark' in config else False
self.nojs = config['nojs'] if 'nojs' in config else False
@ -87,6 +88,8 @@ class Filter:
def clean(self, soup) -> BeautifulSoup:
self.main_divs = soup.find('div', {'id': 'main'})
self.remove_ads()
self.remove_block_titles()
self.remove_block_url()
self.collapse_sections()
self.update_styling(soup)
@ -134,6 +137,23 @@ class Filter:
if has_ad_content(_.text)]
_ = div.decompose() if len(div_ads) else None
def remove_block_titles(self) -> None:
if not self.main_divs:
return
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
block_divs = [_ for _ in div.find_all('h3', recursive=True)
if self.config.block_title.search(_.text) is not None]
_ = div.decompose() if len(block_divs) else None
def remove_block_url(self) -> None:
if not self.main_divs:
return
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
block_divs = [_ for _ in div.find_all('a', recursive=True)
if self.config.block_url.search(_.attrs['href']) is not None]
_ = div.decompose() if len(block_divs) else None
def collapse_sections(self) -> None:
"""Collapses long result sections ("people also asked", "related
searches", etc) into "details" elements

View File

@ -1,6 +1,7 @@
from app.utils.misc import read_config_bool
from flask import current_app
import os
import re
class Config:
@ -14,6 +15,8 @@ class Config:
open(os.path.join(app_config['STATIC_FOLDER'],
'css/variables.css')).read())
self.block = os.getenv('WHOOGLE_CONFIG_BLOCK', '')
self.block_title = re.compile(os.getenv('WHOOGLE_CONFIG_BLOCK_TITLE', '^$'))
self.block_url = re.compile(os.getenv('WHOOGLE_CONFIG_BLOCK_URL', '^$'))
self.ctry = os.getenv('WHOOGLE_CONFIG_COUNTRY', '')
self.theme = os.getenv('WHOOGLE_CONFIG_THEME', '')
self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE')