Putting '! ' at the beginning of the query now redirects to the first search result

Signed-off-by: Paul Rothrock <paul@movetoiceland.com>
This commit is contained in:
Paul Rothrock 2020-05-14 22:26:22 -04:00
parent 87f0a8d496
commit e5b24ac102
3 changed files with 53 additions and 2 deletions

View File

@ -58,6 +58,40 @@ class Filter:
return soup
def get_first_url(self, soup):
# Replace hrefs with only the intended destination (no "utm" type tags)
for a in soup.find_all('a', href=True):
href = a['href'].replace('https://www.google.com', '')
result_link = urlparse.urlparse(href)
query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else ''
# Return the first search result URL
if 'url?q=' in href:
parsed_link = urlparse.urlparse(query_link)
link_args = parse_qs(parsed_link.query)
safe_args = {}
if len(link_args) == 0 and len(parsed_link) > 0:
return query_link
for arg in link_args.keys():
if arg in SKIP_ARGS:
continue
safe_args[arg] = link_args[arg]
# Remove original link query and replace with filtered args
query_link = query_link.replace(parsed_link.query, '')
if len(safe_args) > 0:
query_link = query_link + urlparse.urlencode(safe_args, doseq=True)
else:
query_link = query_link.replace('?', '')
print(query_link)
return query_link
def remove_ads(self, soup):
main_divs = soup.find('div', {'id': 'main'})
if main_divs is None:

View File

@ -64,7 +64,7 @@ def opensearch():
def search():
request_params = request.args if request.method == 'GET' else request.form
q = request_params.get('q')
if q is None or len(q) == 0:
return redirect('/')
else:
@ -74,6 +74,11 @@ def search():
except InvalidToken:
pass
feeling_lucky = q.startswith("! ")
if feeling_lucky:
q = q[2:]
user_agent = request.headers.get('User-Agent')
mobile = 'Android' in user_agent or 'iPhone' in user_agent
@ -82,7 +87,15 @@ def search():
get_body = g.user_request.send(query=full_query)
results = content_filter.reskin(get_body)
formatted_results = content_filter.clean(BeautifulSoup(results, 'html.parser'))
dirty_soup = BeautifulSoup(results, 'html.parser')
if feeling_lucky:
redirect_url = content_filter.get_first_url(dirty_soup)
return redirect(redirect_url, 303) # Using 303 so the browser performs a GET request for the URL
else:
formatted_results = content_filter.clean(dirty_soup)
return render_template('display.html', query=urlparse.unquote(q), response=formatted_results)

View File

@ -17,6 +17,10 @@ def test_search(client):
rv = client.get('/search?q=test')
assert rv._status_code == 200
def test_feeling_lucky(client):
rv = client.get('/search?q=!%20test')
assert rv._status_code == 303
def test_config(client):
rv = client.post('/config', data=demo_config)