Add exception handling and logger in metadata provider
This commit is contained in:
parent
695ce83681
commit
97cf20764b
|
@ -19,15 +19,20 @@
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup as BS # requirement
|
from bs4 import BeautifulSoup as BS # requirement
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import cchardet #optional for better speed
|
import cchardet #optional for better speed
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
from cps import logger
|
||||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
#from time import time
|
#from time import time
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
|
||||||
|
log = logger.create()
|
||||||
|
|
||||||
|
|
||||||
class Amazon(Metadata):
|
class Amazon(Metadata):
|
||||||
__name__ = "Amazon"
|
__name__ = "Amazon"
|
||||||
__id__ = "amazon"
|
__id__ = "amazon"
|
||||||
|
@ -46,12 +51,16 @@ class Amazon(Metadata):
|
||||||
|
|
||||||
def search(
|
def search(
|
||||||
self, query: str, generic_cover: str = "", locale: str = "en"
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
):
|
) -> Optional[List[MetaRecord]]:
|
||||||
#timer=time()
|
#timer=time()
|
||||||
def inner(link,index)->[dict,int]:
|
def inner(link,index) -> tuple[dict,int]:
|
||||||
with self.session as session:
|
with self.session as session:
|
||||||
r = session.get(f"https://www.amazon.com/{link}")
|
try:
|
||||||
r.raise_for_status()
|
r = session.get(f"https://www.amazon.com/{link}")
|
||||||
|
r.raise_for_status()
|
||||||
|
except Exception as e:
|
||||||
|
log.warning(e)
|
||||||
|
return
|
||||||
long_soup = BS(r.text, "lxml") #~4sec :/
|
long_soup = BS(r.text, "lxml") #~4sec :/
|
||||||
soup2 = long_soup.find("div", attrs={"cel_widget_id": "dpx-books-ppd_csm_instrumentation_wrapper"})
|
soup2 = long_soup.find("div", attrs={"cel_widget_id": "dpx-books-ppd_csm_instrumentation_wrapper"})
|
||||||
if soup2 is None:
|
if soup2 is None:
|
||||||
|
@ -107,11 +116,15 @@ class Amazon(Metadata):
|
||||||
|
|
||||||
val = list()
|
val = list()
|
||||||
if self.active:
|
if self.active:
|
||||||
results = self.session.get(
|
try:
|
||||||
f"https://www.amazon.com/s?k={query.replace(' ', '+')}&i=digital-text&sprefix={query.replace(' ', '+')}"
|
results = self.session.get(
|
||||||
f"%2Cdigital-text&ref=nb_sb_noss",
|
f"https://www.amazon.com/s?k={query.replace(' ', '+')}&i=digital-text&sprefix={query.replace(' ', '+')}"
|
||||||
headers=self.headers)
|
f"%2Cdigital-text&ref=nb_sb_noss",
|
||||||
results.raise_for_status()
|
headers=self.headers)
|
||||||
|
results.raise_for_status()
|
||||||
|
except Exception as e:
|
||||||
|
log.warning(e)
|
||||||
|
return None
|
||||||
soup = BS(results.text, 'html.parser')
|
soup = BS(results.text, 'html.parser')
|
||||||
links_list = [next(filter(lambda i: "digital-text" in i["href"], x.findAll("a")))["href"] for x in
|
links_list = [next(filter(lambda i: "digital-text" in i["href"], x.findAll("a")))["href"] for x in
|
||||||
soup.findAll("div", attrs={"data-component-type": "s-search-result"})]
|
soup.findAll("div", attrs={"data-component-type": "s-search-result"})]
|
||||||
|
|
|
@ -21,8 +21,11 @@ from typing import Dict, List, Optional
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
from cps import logger
|
||||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
|
|
||||||
|
log = logger.create()
|
||||||
|
|
||||||
|
|
||||||
class ComicVine(Metadata):
|
class ComicVine(Metadata):
|
||||||
__name__ = "ComicVine"
|
__name__ = "ComicVine"
|
||||||
|
@ -46,10 +49,15 @@ class ComicVine(Metadata):
|
||||||
if title_tokens:
|
if title_tokens:
|
||||||
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
||||||
query = "%20".join(tokens)
|
query = "%20".join(tokens)
|
||||||
result = requests.get(
|
try:
|
||||||
f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}",
|
result = requests.get(
|
||||||
headers=ComicVine.HEADERS,
|
f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}",
|
||||||
)
|
headers=ComicVine.HEADERS,
|
||||||
|
)
|
||||||
|
result.raise_for_status()
|
||||||
|
except Exception as e:
|
||||||
|
log.warning(e)
|
||||||
|
return None
|
||||||
for result in result.json()["results"]:
|
for result in result.json()["results"]:
|
||||||
match = self._parse_search_result(
|
match = self._parse_search_result(
|
||||||
result=result, generic_cover=generic_cover, locale=locale
|
result=result, generic_cover=generic_cover, locale=locale
|
||||||
|
|
|
@ -22,9 +22,12 @@ from urllib.parse import quote
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from cps import logger
|
||||||
from cps.isoLanguages import get_lang3, get_language_name
|
from cps.isoLanguages import get_lang3, get_language_name
|
||||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
|
|
||||||
|
log = logger.create()
|
||||||
|
|
||||||
|
|
||||||
class Google(Metadata):
|
class Google(Metadata):
|
||||||
__name__ = "Google"
|
__name__ = "Google"
|
||||||
|
@ -45,7 +48,12 @@ class Google(Metadata):
|
||||||
if title_tokens:
|
if title_tokens:
|
||||||
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
||||||
query = "+".join(tokens)
|
query = "+".join(tokens)
|
||||||
results = requests.get(Google.SEARCH_URL + query)
|
try:
|
||||||
|
results = requests.get(Google.SEARCH_URL + query)
|
||||||
|
results.raise_for_status()
|
||||||
|
except Exception as e:
|
||||||
|
log.warning(e)
|
||||||
|
return None
|
||||||
for result in results.json().get("items", []):
|
for result in results.json().get("items", []):
|
||||||
val.append(
|
val.append(
|
||||||
self._parse_search_result(
|
self._parse_search_result(
|
||||||
|
|
|
@ -27,9 +27,12 @@ from html2text import HTML2Text
|
||||||
from lxml.html import HtmlElement, fromstring, tostring
|
from lxml.html import HtmlElement, fromstring, tostring
|
||||||
from markdown2 import Markdown
|
from markdown2 import Markdown
|
||||||
|
|
||||||
|
from cps import logger
|
||||||
from cps.isoLanguages import get_language_name
|
from cps.isoLanguages import get_language_name
|
||||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
|
|
||||||
|
log = logger.create()
|
||||||
|
|
||||||
SYMBOLS_TO_TRANSLATE = (
|
SYMBOLS_TO_TRANSLATE = (
|
||||||
"öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃóÓśŚźŹżŻ",
|
"öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃóÓśŚźŹżŻ",
|
||||||
"oOuUoOoOuUeEaAuUiIaAcCeElLnNoOsSzZzZ",
|
"oOuUoOoOuUeEaAuUiIaAcCeElLnNoOsSzZzZ",
|
||||||
|
@ -112,7 +115,12 @@ class LubimyCzytac(Metadata):
|
||||||
self, query: str, generic_cover: str = "", locale: str = "en"
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
) -> Optional[List[MetaRecord]]:
|
) -> Optional[List[MetaRecord]]:
|
||||||
if self.active:
|
if self.active:
|
||||||
result = requests.get(self._prepare_query(title=query))
|
try:
|
||||||
|
result = requests.get(self._prepare_query(title=query))
|
||||||
|
result.raise_for_status()
|
||||||
|
except Exception as e:
|
||||||
|
log.warning(e)
|
||||||
|
return None
|
||||||
root = fromstring(result.text)
|
root = fromstring(result.text)
|
||||||
lc_parser = LubimyCzytacParser(root=root, metadata=self)
|
lc_parser = LubimyCzytacParser(root=root, metadata=self)
|
||||||
matches = lc_parser.parse_search_results()
|
matches = lc_parser.parse_search_results()
|
||||||
|
@ -200,7 +208,12 @@ class LubimyCzytacParser:
|
||||||
def parse_single_book(
|
def parse_single_book(
|
||||||
self, match: MetaRecord, generic_cover: str, locale: str
|
self, match: MetaRecord, generic_cover: str, locale: str
|
||||||
) -> MetaRecord:
|
) -> MetaRecord:
|
||||||
response = requests.get(match.url)
|
try:
|
||||||
|
response = requests.get(match.url)
|
||||||
|
response.raise_for_status()
|
||||||
|
except Exception as e:
|
||||||
|
log.warning(e)
|
||||||
|
return None
|
||||||
self.root = fromstring(response.text)
|
self.root = fromstring(response.text)
|
||||||
match.cover = self._parse_cover(generic_cover=generic_cover)
|
match.cover = self._parse_cover(generic_cover=generic_cover)
|
||||||
match.description = self._parse_description()
|
match.description = self._parse_description()
|
||||||
|
|
|
@ -28,8 +28,12 @@ try:
|
||||||
except FakeUserAgentError:
|
except FakeUserAgentError:
|
||||||
raise ImportError("No module named 'scholarly'")
|
raise ImportError("No module named 'scholarly'")
|
||||||
|
|
||||||
|
from cps import logger
|
||||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
|
|
||||||
|
log = logger.create()
|
||||||
|
|
||||||
|
|
||||||
class scholar(Metadata):
|
class scholar(Metadata):
|
||||||
__name__ = "Google Scholar"
|
__name__ = "Google Scholar"
|
||||||
__id__ = "googlescholar"
|
__id__ = "googlescholar"
|
||||||
|
@ -44,7 +48,11 @@ class scholar(Metadata):
|
||||||
if title_tokens:
|
if title_tokens:
|
||||||
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
||||||
query = " ".join(tokens)
|
query = " ".join(tokens)
|
||||||
scholar_gen = itertools.islice(scholarly.search_pubs(query), 10)
|
try:
|
||||||
|
scholar_gen = itertools.islice(scholarly.search_pubs(query), 10)
|
||||||
|
except Exception as e:
|
||||||
|
log.warning(e)
|
||||||
|
return None
|
||||||
for result in scholar_gen:
|
for result in scholar_gen:
|
||||||
match = self._parse_search_result(
|
match = self._parse_search_result(
|
||||||
result=result, generic_cover="", locale=locale
|
result=result, generic_cover="", locale=locale
|
||||||
|
|
|
@ -130,6 +130,6 @@ def metadata_search():
|
||||||
if active.get(c.__id__, True)
|
if active.get(c.__id__, True)
|
||||||
}
|
}
|
||||||
for future in concurrent.futures.as_completed(meta):
|
for future in concurrent.futures.as_completed(meta):
|
||||||
data.extend([asdict(x) for x in future.result()])
|
data.extend([asdict(x) for x in future.result() if x])
|
||||||
# log.info({'Time elapsed {}'.format(current_milli_time()-start)})
|
# log.info({'Time elapsed {}'.format(current_milli_time()-start)})
|
||||||
return Response(json.dumps(data), mimetype="application/json")
|
return Response(json.dumps(data), mimetype="application/json")
|
||||||
|
|
|
@ -92,14 +92,19 @@ $(function () {
|
||||||
data: {"query": keyword},
|
data: {"query": keyword},
|
||||||
dataType: "json",
|
dataType: "json",
|
||||||
success: function success(data) {
|
success: function success(data) {
|
||||||
$("#meta-info").html("<ul id=\"book-list\" class=\"media-list\"></ul>");
|
if (data.length) {
|
||||||
data.forEach(function(book) {
|
$("#meta-info").html("<ul id=\"book-list\" class=\"media-list\"></ul>");
|
||||||
var $book = $(templates.bookResult(book));
|
data.forEach(function(book) {
|
||||||
$book.find("img").on("click", function () {
|
var $book = $(templates.bookResult(book));
|
||||||
populateForm(book);
|
$book.find("img").on("click", function () {
|
||||||
|
populateForm(book);
|
||||||
|
});
|
||||||
|
$("#book-list").append($book);
|
||||||
});
|
});
|
||||||
$("#book-list").append($book);
|
}
|
||||||
});
|
else {
|
||||||
|
$("#meta-info").html("<p class=\"text-danger\">" + msg.no_result + "!</p>" + $("#meta-info")[0].innerHTML)
|
||||||
|
}
|
||||||
},
|
},
|
||||||
error: function error() {
|
error: function error() {
|
||||||
$("#meta-info").html("<p class=\"text-danger\">" + msg.search_error + "!</p>" + $("#meta-info")[0].innerHTML);
|
$("#meta-info").html("<p class=\"text-danger\">" + msg.search_error + "!</p>" + $("#meta-info")[0].innerHTML);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user