* Block websites in search results via user config Adds a new config field "Block" to specify a comma separated list of websites to block in search results. This is applied for all searches. * Add test for blocking sites from search results * Document WHOOGLE_CONFIG_BLOCK usage * Strip '-site:' filters from query in header template The 'behind the scenes' site filter applied for blocked sites was appearing in the query field when navigating between search categories (all -> images -> news, etc). This prevents the filter from appearing in all except "images", since the image category uses a separate header. This should eventually be addressed when the image page can begin using the standard whoogle header, but until then, the filter will still appear for image searches.
112 lines
3.4 KiB
Python
112 lines
3.4 KiB
Python
from bs4 import BeautifulSoup
|
|
from app.filter import Filter
|
|
from app.utils.session import generate_user_key
|
|
from datetime import datetime
|
|
from dateutil.parser import *
|
|
from urllib.parse import urlparse
|
|
|
|
from test.conftest import demo_config
|
|
|
|
|
|
def get_search_results(data):
|
|
secret_key = generate_user_key()
|
|
soup = Filter(user_key=secret_key).clean(
|
|
BeautifulSoup(data, 'html.parser'))
|
|
|
|
main_divs = soup.find('div', {'id': 'main'})
|
|
assert len(main_divs) > 1
|
|
|
|
result_divs = []
|
|
for div in main_divs:
|
|
# Result divs should only have 1 inner div
|
|
if (len(list(div.children)) != 1
|
|
or not div.findChild()
|
|
or 'div' not in div.findChild().name):
|
|
continue
|
|
|
|
result_divs.append(div)
|
|
|
|
return result_divs
|
|
|
|
|
|
def test_get_results(client):
|
|
rv = client.get('/search?q=test')
|
|
assert rv._status_code == 200
|
|
|
|
# Depending on the search, there can be more
|
|
# than 10 result divs
|
|
assert len(get_search_results(rv.data)) >= 10
|
|
assert len(get_search_results(rv.data)) <= 15
|
|
|
|
|
|
def test_post_results(client):
|
|
rv = client.post('/search', data=dict(q='test'))
|
|
assert rv._status_code == 200
|
|
|
|
# Depending on the search, there can be more
|
|
# than 10 result divs
|
|
assert len(get_search_results(rv.data)) >= 10
|
|
assert len(get_search_results(rv.data)) <= 15
|
|
|
|
|
|
def test_block_results(client):
|
|
rv = client.post('/search', data=dict(q='pinterest'))
|
|
assert rv._status_code == 200
|
|
|
|
has_pinterest = False
|
|
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
|
|
if 'pinterest.com' in urlparse(link['href']).netloc:
|
|
has_pinterest = True
|
|
break
|
|
|
|
assert has_pinterest
|
|
|
|
demo_config['block'] = 'pinterest.com'
|
|
rv = client.post('/config', data=demo_config)
|
|
assert rv._status_code == 302
|
|
|
|
rv = client.post('/search', data=dict(q='pinterest'))
|
|
assert rv._status_code == 200
|
|
|
|
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
|
|
assert 'pinterest.com' not in urlparse(link['href']).netloc
|
|
|
|
|
|
# TODO: Unit test the site alt method instead -- the results returned
|
|
# are too unreliable for this test in particular.
|
|
# def test_site_alts(client):
|
|
# rv = client.post('/search', data=dict(q='twitter official account'))
|
|
# assert b'twitter.com/Twitter' in rv.data
|
|
|
|
# client.post('/config', data=dict(alts=True))
|
|
# assert json.loads(client.get('/config').data)['alts']
|
|
|
|
# rv = client.post('/search', data=dict(q='twitter official account'))
|
|
# assert b'twitter.com/Twitter' not in rv.data
|
|
# assert b'nitter.net/Twitter' in rv.data
|
|
|
|
|
|
def test_recent_results(client):
|
|
times = {
|
|
'past year': 365,
|
|
'past month': 31,
|
|
'past week': 7
|
|
}
|
|
|
|
for time, num_days in times.items():
|
|
rv = client.post('/search', data=dict(q='test :' + time))
|
|
result_divs = get_search_results(rv.data)
|
|
|
|
current_date = datetime.now()
|
|
for div in [_ for _ in result_divs if _.find('span')]:
|
|
date_span = div.find('span').decode_contents()
|
|
if not date_span or len(date_span) > 15 or len(date_span) < 7:
|
|
continue
|
|
|
|
try:
|
|
date = parse(date_span)
|
|
# Date can have a little bit of wiggle room
|
|
assert (current_date - date).days <= (num_days + 5)
|
|
except ParserError:
|
|
pass
|