From 5069838e69cdbd018bc46c697daf608f5ee82adf Mon Sep 17 00:00:00 2001 From: jan Anja Date: Sat, 26 Feb 2022 03:29:54 +0500 Subject: [PATCH 1/2] Configure setup() using setup.cfg (#667) Dependencies are not read from requirements.txt intentionally, so only direct dependencies without version pinning are included. Setuptools documentation: https://setuptools.pypa.io/en/latest/userguide/declarative_config.html --- .github/workflows/pypi.yml | 1 + setup.cfg | 37 +++++++++++++++++++++++++++++++++++++ setup.py | 28 +--------------------------- 3 files changed, 39 insertions(+), 27 deletions(-) create mode 100644 setup.cfg diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 383b242..7fb140d 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -20,6 +20,7 @@ jobs: python -m pip install build + setuptools --user - name: Set dev timestamp run: echo "DEV_BUILD=$(date +%s)" >> $GITHUB_ENV diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..7edf18a --- /dev/null +++ b/setup.cfg @@ -0,0 +1,37 @@ +[metadata] +name = whoogle-search +url = https://github.com/benbusby/whoogle-search +description = Self-hosted, ad-free, privacy-respecting metasearch engine +long_description = file: README.md +long_description_content_type = text/markdown +keywords = search, metasearch, flask, adblock, degoogle, privacy +author = Ben Busby +author_email = contact@benbusby.com +license = MIT +classifiers = + Programming Language :: Python :: 3 + License :: OSI Approved :: MIT License + Operating System :: OS Independent + +[options] +packages = find: +include_package_data = True +install_requires= + beautifulsoup4 + cryptography + Flask + Flask-Session + python-dotenv + requests + stem + waitress + +[options.extras_require] +test = + pytest + python-dateutil +dev = pycodestyle + +[options.entry_points] +console_scripts = + whoogle-search = app.routes:run_app diff --git a/setup.py b/setup.py index f8f22c0..fcdbddd 100644 --- a/setup.py +++ b/setup.py @@ -1,34 +1,8 @@ import os import setuptools -long_description = open('README.md', 'r').read() - -requirements = list(open('requirements.txt', 'r')) - optional_dev_tag = '' if os.getenv('DEV_BUILD'): optional_dev_tag = '.dev' + os.getenv('DEV_BUILD') -setuptools.setup( - author='Ben Busby', - author_email='contact@benbusby.com', - name='whoogle-search', - version='0.7.1' + optional_dev_tag, - include_package_data=True, - install_requires=requirements, - description='Self-hosted, ad-free, privacy-respecting metasearch engine', - long_description=long_description, - long_description_content_type='text/markdown', - url='https://github.com/benbusby/whoogle-search', - entry_points={ - 'console_scripts': [ - 'whoogle-search=app.routes:run_app', - ] - }, - packages=setuptools.find_packages(), - classifiers=[ - 'Programming Language :: Python :: 3', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - ], -) +setuptools.setup(version='0.7.1' + optional_dev_tag) From b28fa86e337db2bf37234644896027f34141c763 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 25 Feb 2022 23:02:58 -0700 Subject: [PATCH 2/2] Update ad filter Recent changes to ads in search results caused Whoogle to display ads for certain searches. In particular, ads recently started appearing grouped into one div, as opposed to a singular ad per div. This was accompanied by the div label "ads" (instead of just "ad"), which threw off the existing ad filter. The ad keyword blacklist has been updated accordingly, and has been enhanced to only check against alpha chars for each label. This only seems to have affected English language searches, and only for very specific searches. --- app/utils/results.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/app/utils/results.py b/app/utils/results.py index 48aa857..38e92e0 100644 --- a/app/utils/results.py +++ b/app/utils/results.py @@ -18,10 +18,11 @@ BLANK_B64 = ('data:image/png;base64,' # Ad keywords BLACKLIST = [ - 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', - 'Реклама', 'Anunț', '광고', 'annons', 'Annonse', 'Iklan', '広告', 'Augl.', - 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', 'آگهی', - 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', 'Anúncio' + 'ad', 'ads', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', + 'Reklama', 'Реклама', 'Anunț', '광고', 'annons', 'Annonse', 'Iklan', + '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', + 'Reklam', 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés', + 'Anúncio' ] SITE_ALTS = { @@ -89,7 +90,8 @@ def has_ad_content(element: str) -> bool: bool: True/False for the element containing an ad """ - return (element.upper() in (value.upper() for value in BLACKLIST) + element_str = ''.join(filter(str.isalpha, element)) + return (element_str.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element)