whoogle-search/test/test_results.py
Ben Busby 10a15e06e1
Fix incorrect request type for image searches
Previously had hardcoded POST requests for all requests that didn't use
the header template (which currently is only the image tab).

Also refactored how the Filter class works. It now requires a valid
Config model to be provided, which is then set up as a class var that
the filtering functions can use as needed, rather than setting specific
values from the config as individual values (which was confusing and
sloppy).

Fixes #561
2021-12-06 21:39:50 -07:00

126 lines
3.9 KiB
Python

from bs4 import BeautifulSoup
from app.filter import Filter
from app.models.config import Config
from app.models.endpoint import Endpoint
from app.utils.session import generate_user_key
from datetime import datetime
from dateutil.parser import *
from urllib.parse import urlparse
from test.conftest import demo_config
def get_search_results(data):
secret_key = generate_user_key()
soup = Filter(user_key=secret_key, config=Config(**demo_config)).clean(
BeautifulSoup(data, 'html.parser'))
main_divs = soup.find('div', {'id': 'main'})
assert len(main_divs) > 1
result_divs = []
for div in main_divs:
# Result divs should only have 1 inner div
if (len(list(div.children)) != 1
or not div.findChild()
or 'div' not in div.findChild().name):
continue
result_divs.append(div)
return result_divs
def test_get_results(client):
rv = client.get(f'/{Endpoint.search}?q=test')
assert rv._status_code == 200
# Depending on the search, there can be more
# than 10 result divs
results = get_search_results(rv.data)
assert len(results) >= 10
assert len(results) <= 15
def test_post_results(client):
rv = client.post(f'/{Endpoint.search}', data=dict(q='test'))
assert rv._status_code == 200
# Depending on the search, there can be more
# than 10 result divs
results = get_search_results(rv.data)
assert len(results) >= 10
assert len(results) <= 15
def test_translate_search(client):
rv = client.post(f'/{Endpoint.search}', data=dict(q='translate hola'))
assert rv._status_code == 200
# Pretty weak test, but better than nothing
str_data = str(rv.data)
assert 'iframe' in str_data
assert 'lingva.ml/auto/en/ hola' in str_data
def test_block_results(client):
rv = client.post(f'/{Endpoint.search}', data=dict(q='pinterest'))
assert rv._status_code == 200
has_pinterest = False
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
if 'pinterest.com' in urlparse(link['href']).netloc:
has_pinterest = True
break
assert has_pinterest
demo_config['block'] = 'pinterest.com,help.pinterest.com'
rv = client.post(f'/{Endpoint.config}', data=demo_config)
assert rv._status_code == 302
rv = client.post(f'/{Endpoint.search}', data=dict(q='pinterest'))
assert rv._status_code == 200
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
assert 'pinterest.com' not in urlparse(link['href']).netloc
# TODO: Unit test the site alt method instead -- the results returned
# are too unreliable for this test in particular.
# def test_site_alts(client):
# rv = client.post('/search', data=dict(q='twitter official account'))
# assert b'twitter.com/Twitter' in rv.data
# client.post('/config', data=dict(alts=True))
# assert json.loads(client.get('/config').data)['alts']
# rv = client.post('/search', data=dict(q='twitter official account'))
# assert b'twitter.com/Twitter' not in rv.data
# assert b'nitter.net/Twitter' in rv.data
def test_recent_results(client):
times = {
'past year': 365,
'past month': 31,
'past week': 7
}
for time, num_days in times.items():
rv = client.post(f'/{Endpoint.search}', data=dict(q='test :' + time))
result_divs = get_search_results(rv.data)
current_date = datetime.now()
for div in [_ for _ in result_divs if _.find('span')]:
date_span = div.find('span').decode_contents()
if not date_span or len(date_span) > 15 or len(date_span) < 7:
continue
try:
date = parse(date_span)
# Date can have a little bit of wiggle room
assert (current_date - date).days <= (num_days + 5)
except ParserError:
pass