- Switches from pycurl to requests library - Allows for less janky decoding, especially with non-latin character sets - Adds session level management of user configs - Allows for each session to set its own config (people are probably going to complain about this, though not sure if it'll be the same number of people who are upset that their friends/family have to share their config) - Updates key gen/regen to more aggressively swap out keys after each request
95 lines
3.0 KiB
Python
95 lines
3.0 KiB
Python
from io import BytesIO
|
|
from lxml import etree
|
|
import random
|
|
import requests
|
|
import urllib.parse as urlparse
|
|
|
|
# Core Google search URLs
|
|
SEARCH_URL = 'https://www.google.com/search?gbv=1&q='
|
|
AUTOCOMPLETE_URL = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
|
|
|
|
MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0'
|
|
DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0'
|
|
|
|
# Valid query params
|
|
VALID_PARAMS = ['tbs', 'tbm', 'start', 'near']
|
|
|
|
|
|
def gen_user_agent(is_mobile):
|
|
mozilla = random.choice(['Moo', 'Woah', 'Bro', 'Slow']) + 'zilla'
|
|
firefox = random.choice(['Choir', 'Squier', 'Higher', 'Wire']) + 'fox'
|
|
linux = random.choice(['Win', 'Sin', 'Gin', 'Fin', 'Kin']) + 'ux'
|
|
|
|
if is_mobile:
|
|
return MOBILE_UA.format(mozilla, firefox)
|
|
else:
|
|
return DESKTOP_UA.format(mozilla, linux, firefox)
|
|
|
|
|
|
def gen_query(query, args, config, near_city=None):
|
|
param_dict = {key: '' for key in VALID_PARAMS}
|
|
# Use :past(hour/day/week/month/year) if available
|
|
# example search "new restaurants :past month"
|
|
if ':past' in query:
|
|
time_range = str.strip(query.split(':past', 1)[-1])
|
|
param_dict['tbs'] = '&tbs=qdr:' + str.lower(time_range[0])
|
|
|
|
# Ensure search query is parsable
|
|
query = urlparse.quote(query)
|
|
|
|
# Pass along type of results (news, images, books, etc)
|
|
if 'tbm' in args:
|
|
param_dict['tbm'] = '&tbm=' + args.get('tbm')
|
|
|
|
# Get results page start value (10 per page, ie page 2 start val = 20)
|
|
if 'start' in args:
|
|
param_dict['start'] = '&start=' + args.get('start')
|
|
|
|
# Search for results near a particular city, if available
|
|
if near_city:
|
|
param_dict['near'] = '&near=' + urlparse.quote(near_city)
|
|
|
|
# Set language for results (lr) and interface (hl)
|
|
param_dict['lr'] = '&lr=' + config.lang + '&hl=' + config.lang.replace('lang_', '')
|
|
param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else ''
|
|
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
|
|
|
|
for val in param_dict.values():
|
|
if not val or val is None:
|
|
continue
|
|
query += val
|
|
|
|
return query
|
|
|
|
|
|
class Request:
|
|
def __init__(self, normal_ua, language='lang_en'):
|
|
self.language = language
|
|
self.mobile = 'Android' in normal_ua or 'iPhone' in normal_ua
|
|
self.modified_user_agent = gen_user_agent(self.mobile)
|
|
|
|
def __getitem__(self, name):
|
|
return getattr(self, name)
|
|
|
|
def autocomplete(self, query):
|
|
ac_query = dict(hl=self.language, q=query)
|
|
response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query))
|
|
|
|
if response:
|
|
dom = etree.fromstring(response)
|
|
return dom.xpath('//suggestion/@data')
|
|
|
|
return []
|
|
|
|
def send(self, base_url=SEARCH_URL, query='', return_bytes=False):
|
|
headers = {
|
|
'User-Agent': self.modified_user_agent
|
|
}
|
|
|
|
response = requests.get(base_url + query, headers=headers)
|
|
|
|
if return_bytes:
|
|
return response.content
|
|
else:
|
|
return response.text
|