Add exception handling and logger in metadata provider

This commit is contained in:
xlivevil 2022-02-25 12:18:07 +08:00
parent 695ce83681
commit 97cf20764b
No known key found for this signature in database
GPG Key ID: D4D073C8E61991AF
7 changed files with 81 additions and 26 deletions

View File

@ -19,15 +19,20 @@
import concurrent.futures import concurrent.futures
import requests import requests
from bs4 import BeautifulSoup as BS # requirement from bs4 import BeautifulSoup as BS # requirement
from typing import List, Optional
try: try:
import cchardet #optional for better speed import cchardet #optional for better speed
except ImportError: except ImportError:
pass pass
from cps import logger
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
#from time import time #from time import time
from operator import itemgetter from operator import itemgetter
log = logger.create()
class Amazon(Metadata): class Amazon(Metadata):
__name__ = "Amazon" __name__ = "Amazon"
__id__ = "amazon" __id__ = "amazon"
@ -46,12 +51,16 @@ class Amazon(Metadata):
def search( def search(
self, query: str, generic_cover: str = "", locale: str = "en" self, query: str, generic_cover: str = "", locale: str = "en"
): ) -> Optional[List[MetaRecord]]:
#timer=time() #timer=time()
def inner(link,index)->[dict,int]: def inner(link,index) -> tuple[dict,int]:
with self.session as session: with self.session as session:
try:
r = session.get(f"https://www.amazon.com/{link}") r = session.get(f"https://www.amazon.com/{link}")
r.raise_for_status() r.raise_for_status()
except Exception as e:
log.warning(e)
return
long_soup = BS(r.text, "lxml") #~4sec :/ long_soup = BS(r.text, "lxml") #~4sec :/
soup2 = long_soup.find("div", attrs={"cel_widget_id": "dpx-books-ppd_csm_instrumentation_wrapper"}) soup2 = long_soup.find("div", attrs={"cel_widget_id": "dpx-books-ppd_csm_instrumentation_wrapper"})
if soup2 is None: if soup2 is None:
@ -107,11 +116,15 @@ class Amazon(Metadata):
val = list() val = list()
if self.active: if self.active:
try:
results = self.session.get( results = self.session.get(
f"https://www.amazon.com/s?k={query.replace(' ', '+')}&i=digital-text&sprefix={query.replace(' ', '+')}" f"https://www.amazon.com/s?k={query.replace(' ', '+')}&i=digital-text&sprefix={query.replace(' ', '+')}"
f"%2Cdigital-text&ref=nb_sb_noss", f"%2Cdigital-text&ref=nb_sb_noss",
headers=self.headers) headers=self.headers)
results.raise_for_status() results.raise_for_status()
except Exception as e:
log.warning(e)
return None
soup = BS(results.text, 'html.parser') soup = BS(results.text, 'html.parser')
links_list = [next(filter(lambda i: "digital-text" in i["href"], x.findAll("a")))["href"] for x in links_list = [next(filter(lambda i: "digital-text" in i["href"], x.findAll("a")))["href"] for x in
soup.findAll("div", attrs={"data-component-type": "s-search-result"})] soup.findAll("div", attrs={"data-component-type": "s-search-result"})]

View File

@ -21,8 +21,11 @@ from typing import Dict, List, Optional
from urllib.parse import quote from urllib.parse import quote
import requests import requests
from cps import logger
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
log = logger.create()
class ComicVine(Metadata): class ComicVine(Metadata):
__name__ = "ComicVine" __name__ = "ComicVine"
@ -46,10 +49,15 @@ class ComicVine(Metadata):
if title_tokens: if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens] tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = "%20".join(tokens) query = "%20".join(tokens)
try:
result = requests.get( result = requests.get(
f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}", f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}",
headers=ComicVine.HEADERS, headers=ComicVine.HEADERS,
) )
result.raise_for_status()
except Exception as e:
log.warning(e)
return None
for result in result.json()["results"]: for result in result.json()["results"]:
match = self._parse_search_result( match = self._parse_search_result(
result=result, generic_cover=generic_cover, locale=locale result=result, generic_cover=generic_cover, locale=locale

View File

@ -22,9 +22,12 @@ from urllib.parse import quote
import requests import requests
from cps import logger
from cps.isoLanguages import get_lang3, get_language_name from cps.isoLanguages import get_lang3, get_language_name
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
log = logger.create()
class Google(Metadata): class Google(Metadata):
__name__ = "Google" __name__ = "Google"
@ -45,7 +48,12 @@ class Google(Metadata):
if title_tokens: if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens] tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = "+".join(tokens) query = "+".join(tokens)
try:
results = requests.get(Google.SEARCH_URL + query) results = requests.get(Google.SEARCH_URL + query)
results.raise_for_status()
except Exception as e:
log.warning(e)
return None
for result in results.json().get("items", []): for result in results.json().get("items", []):
val.append( val.append(
self._parse_search_result( self._parse_search_result(

View File

@ -27,9 +27,12 @@ from html2text import HTML2Text
from lxml.html import HtmlElement, fromstring, tostring from lxml.html import HtmlElement, fromstring, tostring
from markdown2 import Markdown from markdown2 import Markdown
from cps import logger
from cps.isoLanguages import get_language_name from cps.isoLanguages import get_language_name
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
log = logger.create()
SYMBOLS_TO_TRANSLATE = ( SYMBOLS_TO_TRANSLATE = (
"öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃóÓśŚźŹżŻ", "öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃóÓśŚźŹżŻ",
"oOuUoOoOuUeEaAuUiIaAcCeElLnNoOsSzZzZ", "oOuUoOoOuUeEaAuUiIaAcCeElLnNoOsSzZzZ",
@ -112,7 +115,12 @@ class LubimyCzytac(Metadata):
self, query: str, generic_cover: str = "", locale: str = "en" self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]: ) -> Optional[List[MetaRecord]]:
if self.active: if self.active:
try:
result = requests.get(self._prepare_query(title=query)) result = requests.get(self._prepare_query(title=query))
result.raise_for_status()
except Exception as e:
log.warning(e)
return None
root = fromstring(result.text) root = fromstring(result.text)
lc_parser = LubimyCzytacParser(root=root, metadata=self) lc_parser = LubimyCzytacParser(root=root, metadata=self)
matches = lc_parser.parse_search_results() matches = lc_parser.parse_search_results()
@ -200,7 +208,12 @@ class LubimyCzytacParser:
def parse_single_book( def parse_single_book(
self, match: MetaRecord, generic_cover: str, locale: str self, match: MetaRecord, generic_cover: str, locale: str
) -> MetaRecord: ) -> MetaRecord:
try:
response = requests.get(match.url) response = requests.get(match.url)
response.raise_for_status()
except Exception as e:
log.warning(e)
return None
self.root = fromstring(response.text) self.root = fromstring(response.text)
match.cover = self._parse_cover(generic_cover=generic_cover) match.cover = self._parse_cover(generic_cover=generic_cover)
match.description = self._parse_description() match.description = self._parse_description()

View File

@ -28,8 +28,12 @@ try:
except FakeUserAgentError: except FakeUserAgentError:
raise ImportError("No module named 'scholarly'") raise ImportError("No module named 'scholarly'")
from cps import logger
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
log = logger.create()
class scholar(Metadata): class scholar(Metadata):
__name__ = "Google Scholar" __name__ = "Google Scholar"
__id__ = "googlescholar" __id__ = "googlescholar"
@ -44,7 +48,11 @@ class scholar(Metadata):
if title_tokens: if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens] tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = " ".join(tokens) query = " ".join(tokens)
try:
scholar_gen = itertools.islice(scholarly.search_pubs(query), 10) scholar_gen = itertools.islice(scholarly.search_pubs(query), 10)
except Exception as e:
log.warning(e)
return None
for result in scholar_gen: for result in scholar_gen:
match = self._parse_search_result( match = self._parse_search_result(
result=result, generic_cover="", locale=locale result=result, generic_cover="", locale=locale

View File

@ -130,6 +130,6 @@ def metadata_search():
if active.get(c.__id__, True) if active.get(c.__id__, True)
} }
for future in concurrent.futures.as_completed(meta): for future in concurrent.futures.as_completed(meta):
data.extend([asdict(x) for x in future.result()]) data.extend([asdict(x) for x in future.result() if x])
# log.info({'Time elapsed {}'.format(current_milli_time()-start)}) # log.info({'Time elapsed {}'.format(current_milli_time()-start)})
return Response(json.dumps(data), mimetype="application/json") return Response(json.dumps(data), mimetype="application/json")

View File

@ -92,6 +92,7 @@ $(function () {
data: {"query": keyword}, data: {"query": keyword},
dataType: "json", dataType: "json",
success: function success(data) { success: function success(data) {
if (data.length) {
$("#meta-info").html("<ul id=\"book-list\" class=\"media-list\"></ul>"); $("#meta-info").html("<ul id=\"book-list\" class=\"media-list\"></ul>");
data.forEach(function(book) { data.forEach(function(book) {
var $book = $(templates.bookResult(book)); var $book = $(templates.bookResult(book));
@ -100,6 +101,10 @@ $(function () {
}); });
$("#book-list").append($book); $("#book-list").append($book);
}); });
}
else {
$("#meta-info").html("<p class=\"text-danger\">" + msg.no_result + "!</p>" + $("#meta-info")[0].innerHTML)
}
}, },
error: function error() { error: function error() {
$("#meta-info").html("<p class=\"text-danger\">" + msg.search_error + "!</p>" + $("#meta-info")[0].innerHTML); $("#meta-info").html("<p class=\"text-danger\">" + msg.search_error + "!</p>" + $("#meta-info")[0].innerHTML);