whoogle-search/app/utils/routing_utils.py

73 lines
2.7 KiB
Python
Raw Normal View History

Session refactoring and improved filter (#86) * Project refactor (#85) * Major refactor of requests and session management - Switches from pycurl to requests library - Allows for less janky decoding, especially with non-latin character sets - Adds session level management of user configs - Allows for each session to set its own config -- users with blocked cookies fall back to the "default" profile (same usage as before) - Updates key gen/regen to more aggressively swap out keys after each request * Added ability to save/load configs by name - New PUT method for config allows changing config with specified name - New methods in js controller to handle loading/saving of configs * Result formatting and removal of unused elements - Fixed question section formatting from results page (added appropriate padding and made questions styled as italic) - Removed user agent display from main config settings * Minor change to save config button label (now "Save As...") * Fixed issue with "de-pickling" of flask session Having a gitignore-everything ("*") file within a flask session folder seems to cause a weird bug where the state of the app becomes unusable from continuously trying to prune files listed in the gitignore (and it can't prune '*'). * Switched to pickling saved configs * Updated ad/sponsored content filter and conf naming Configs are now named with a .conf extension to allow for easier manual cleanup/modification of named config files Sponsored content now removed by basic string matching of span content * Version bump to 0.2.0 * Fixed request.send return style * Moved custom conf files to their own directory * Refactored whoogle session mgmt Now allows a fallback "default" session to be used if a user's browser is blocking cookies * Reworked pytest client fixture to support new session mgmt * Added better multilingual support, updated filter Results page now includes method for switching to "All Languages" from whichever language is specified as the primary in the config (see #74). Also removes the non-Whoogle links from the page footer, leaving only the page navigation controls Added support for the date range filter on the results page, though I'd still recommend using the ":past <unit>" query instead. * Removed no-cache enforcement, minor styling/formatting improvements * Improving ad filtering for non-English languages * Added footer to results page
2020-06-11 19:38:51 +00:00
from app.filter import Filter, get_first_link
from app.utils.misc import generate_user_keys
from app.request import gen_query
from bs4 import BeautifulSoup
from cryptography.fernet import Fernet, InvalidToken
from flask import g
from typing import Any, Tuple
class RoutingUtils:
def __init__(self, request, config, session, cookies_disabled=False):
self.request_params = request.args if request.method == 'GET' else request.form
self.user_agent = request.headers.get('User-Agent')
self.feeling_lucky = False
self.config = config
self.session = session
self.query = ''
self.cookies_disabled = cookies_disabled
self.search_type = self.request_params.get('tbm') if 'tbm' in self.request_params else ''
def __getitem__(self, name):
return getattr(self, name)
def __setitem__(self, name, value):
return setattr(self, name, value)
def __delitem__(self, name):
return delattr(self, name)
def __contains__(self, name):
return hasattr(self, name)
def new_search_query(self) -> str:
# Generate a new element key each time a new search is performed
self.session['fernet_keys']['element_key'] = generate_user_keys(
cookies_disabled=self.cookies_disabled)['element_key']
q = self.request_params.get('q')
if q is None or len(q) == 0:
return ''
else:
# Attempt to decrypt if this is an internal link
try:
q = Fernet(self.session['fernet_keys']['text_key']).decrypt(q.encode()).decode()
except InvalidToken:
pass
# Reset text key
self.session['fernet_keys']['text_key'] = generate_user_keys(
cookies_disabled=self.cookies_disabled)['text_key']
# Format depending on whether or not the query is a "feeling lucky" query
self.feeling_lucky = q.startswith('! ')
self.query = q[2:] if self.feeling_lucky else q
return self.query
def generate_response(self) -> Tuple[Any, int]:
mobile = 'Android' in self.user_agent or 'iPhone' in self.user_agent
content_filter = Filter(self.session['fernet_keys'], mobile=mobile, config=self.config)
full_query = gen_query(self.query, self.request_params, self.config, content_filter.near)
get_body = g.user_request.send(query=full_query).text
# Produce cleanable html soup from response
html_soup = BeautifulSoup(content_filter.reskin(get_body), 'html.parser')
if self.feeling_lucky:
return get_first_link(html_soup), 1
else:
formatted_results = content_filter.clean(html_soup)
return formatted_results, content_filter.elements