feat: block title or url by regex
This commit is contained in:
parent
ca782875c2
commit
8bc88fad18
|
@ -45,7 +45,8 @@ class Filter:
|
|||
def __init__(self, user_key: str, mobile=False, config=None) -> None:
|
||||
if config is None:
|
||||
config = {}
|
||||
|
||||
else:
|
||||
self.config = config
|
||||
self.near = config['near'] if 'near' in config else ''
|
||||
self.dark = config['dark'] if 'dark' in config else False
|
||||
self.nojs = config['nojs'] if 'nojs' in config else False
|
||||
|
@ -87,6 +88,8 @@ class Filter:
|
|||
def clean(self, soup) -> BeautifulSoup:
|
||||
self.main_divs = soup.find('div', {'id': 'main'})
|
||||
self.remove_ads()
|
||||
self.remove_block_titles()
|
||||
self.remove_block_url()
|
||||
self.collapse_sections()
|
||||
self.update_styling(soup)
|
||||
|
||||
|
@ -134,6 +137,23 @@ class Filter:
|
|||
if has_ad_content(_.text)]
|
||||
_ = div.decompose() if len(div_ads) else None
|
||||
|
||||
def remove_block_titles(self) -> None:
|
||||
if not self.main_divs:
|
||||
return
|
||||
|
||||
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
|
||||
block_divs = [_ for _ in div.find_all('h3', recursive=True)
|
||||
if self.config.block_title.search(_.text) is not None]
|
||||
_ = div.decompose() if len(block_divs) else None
|
||||
|
||||
def remove_block_url(self) -> None:
|
||||
if not self.main_divs:
|
||||
return
|
||||
for div in [_ for _ in self.main_divs.find_all('div', recursive=True)]:
|
||||
block_divs = [_ for _ in div.find_all('a', recursive=True)
|
||||
if self.config.block_url.search(_.attrs['href']) is not None]
|
||||
_ = div.decompose() if len(block_divs) else None
|
||||
|
||||
def collapse_sections(self) -> None:
|
||||
"""Collapses long result sections ("people also asked", "related
|
||||
searches", etc) into "details" elements
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from app.utils.misc import read_config_bool
|
||||
from flask import current_app
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
class Config:
|
||||
|
@ -14,6 +15,8 @@ class Config:
|
|||
open(os.path.join(app_config['STATIC_FOLDER'],
|
||||
'css/variables.css')).read())
|
||||
self.block = os.getenv('WHOOGLE_CONFIG_BLOCK', '')
|
||||
self.block_title = re.compile(os.getenv('WHOOGLE_CONFIG_BLOCK_TITLE', '^$'))
|
||||
self.block_url = re.compile(os.getenv('WHOOGLE_CONFIG_BLOCK_URL', '^$'))
|
||||
self.ctry = os.getenv('WHOOGLE_CONFIG_COUNTRY', '')
|
||||
self.theme = os.getenv('WHOOGLE_CONFIG_THEME', '')
|
||||
self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE')
|
||||
|
|
Loading…
Reference in New Issue
Block a user