add series, languages and isbn to google provider
This commit is contained in:
parent
362fdc5716
commit
d64589914f
|
@ -17,41 +17,93 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
# Google Books api document: https://developers.google.com/books/docs/v1/using
|
# Google Books api document: https://developers.google.com/books/docs/v1/using
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from cps.services.Metadata import Metadata
|
from cps.isoLanguages import get_lang3, get_language_name
|
||||||
|
from cps.services.Metadata import MetaRecord, Metadata
|
||||||
|
|
||||||
|
|
||||||
class Google(Metadata):
|
class Google(Metadata):
|
||||||
__name__ = "Google"
|
__name__ = "Google"
|
||||||
__id__ = "google"
|
__id__ = "google"
|
||||||
BASE_URL = "https://www.googleapis.com/books/v1/volumes?q="
|
DESCRIPTION = "Google Books"
|
||||||
|
META_URL = "https://books.google.com/"
|
||||||
|
BOOK_URL = "https://books.google.com/books?id="
|
||||||
|
SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q="
|
||||||
|
ISBN_TYPE = "ISBN_13"
|
||||||
|
|
||||||
def search(self, query, generic_cover=""):
|
def search(
|
||||||
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
|
) -> Optional[List[MetaRecord]]:
|
||||||
if self.active:
|
if self.active:
|
||||||
val = list()
|
val = list()
|
||||||
result = requests.get(Google.BASE_URL + query.replace(" ","+"))
|
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
|
||||||
for r in result.json()['items']:
|
if title_tokens:
|
||||||
v = dict()
|
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
||||||
v['id'] = r['id']
|
query = "+".join(tokens)
|
||||||
v['title'] = r['volumeInfo']['title']
|
results = requests.get(Google.SEARCH_URL + query)
|
||||||
v['authors'] = r['volumeInfo'].get('authors', [])
|
for result in results.json()["items"]:
|
||||||
v['description'] = r['volumeInfo'].get('description', "")
|
val.append(
|
||||||
v['publisher'] = r['volumeInfo'].get('publisher', "")
|
self._parse_search_result(
|
||||||
v['publishedDate'] = r['volumeInfo'].get('publishedDate', "")
|
result=result, generic_cover=generic_cover, locale=locale
|
||||||
v['tags'] = r['volumeInfo'].get('categories', [])
|
)
|
||||||
v['rating'] = r['volumeInfo'].get('averageRating', 0)
|
)
|
||||||
if r['volumeInfo'].get('imageLinks'):
|
|
||||||
v['cover'] = r['volumeInfo']['imageLinks']['thumbnail'].replace("http://", "https://")
|
|
||||||
else:
|
|
||||||
# v['cover'] = "/../../../static/generic_cover.jpg"
|
|
||||||
v['cover'] = generic_cover
|
|
||||||
v['source'] = {
|
|
||||||
"id": self.__id__,
|
|
||||||
"description": "Google Books",
|
|
||||||
"link": "https://books.google.com/"}
|
|
||||||
v['url'] = "https://books.google.com/books?id=" + r['id']
|
|
||||||
val.append(v)
|
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
def _parse_search_result(
|
||||||
|
self, result: Dict, generic_cover: str, locale: str
|
||||||
|
) -> MetaRecord:
|
||||||
|
match = dict()
|
||||||
|
match["id"] = result["id"]
|
||||||
|
match["title"] = result["volumeInfo"]["title"]
|
||||||
|
match["authors"] = result["volumeInfo"].get("authors", [])
|
||||||
|
match["url"] = Google.BOOK_URL + result["id"]
|
||||||
|
match["cover"] = self._parse_cover(result=result, generic_cover=generic_cover)
|
||||||
|
match["description"] = result["volumeInfo"].get("description", "")
|
||||||
|
match["languages"] = self._parse_languages(result=result, locale=locale)
|
||||||
|
match["publisher"] = result["volumeInfo"].get("publisher", "")
|
||||||
|
match["publishedDate"] = result["volumeInfo"].get("publishedDate", "")
|
||||||
|
match["rating"] = result["volumeInfo"].get("averageRating", 0)
|
||||||
|
match["series"], match["series_index"] = "", 1
|
||||||
|
match["tags"] = result["volumeInfo"].get("categories", [])
|
||||||
|
|
||||||
|
match["source"] = {
|
||||||
|
"id": self.__id__,
|
||||||
|
"description": Google.DESCRIPTION,
|
||||||
|
"link": Google.META_URL,
|
||||||
|
}
|
||||||
|
|
||||||
|
match["identifiers"] = {
|
||||||
|
"google": match.get("id"),
|
||||||
|
}
|
||||||
|
match = self._parse_isbn(result=result, match=match)
|
||||||
|
return match
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_isbn(result: Dict, match: Dict) -> Dict:
|
||||||
|
identifiers = result["volumeInfo"].get("industryIdentifiers", [])
|
||||||
|
for identifier in identifiers:
|
||||||
|
if identifier.get("type") == Google.ISBN_TYPE:
|
||||||
|
match["identifiers"]["isbn"] = identifier.get("identifier")
|
||||||
|
break
|
||||||
|
return match
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_cover(result: Dict, generic_cover: str) -> str:
|
||||||
|
if result["volumeInfo"].get("imageLinks"):
|
||||||
|
cover_url = result["volumeInfo"]["imageLinks"]["thumbnail"]
|
||||||
|
return cover_url.replace("http://", "https://")
|
||||||
|
return generic_cover
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_languages(result: Dict, locale: str) -> List[str]:
|
||||||
|
language_iso2 = result.get("language", "")
|
||||||
|
languages = (
|
||||||
|
[get_language_name(locale, get_lang3(language_iso2))]
|
||||||
|
if language_iso2
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
return languages
|
||||||
|
|
|
@ -107,7 +107,9 @@ class LubimyCzytac(Metadata):
|
||||||
|
|
||||||
SUMMARY = "//script[@type='application/ld+json']//text()"
|
SUMMARY = "//script[@type='application/ld+json']//text()"
|
||||||
|
|
||||||
def search(self, query: str, generic_cover: str = "") -> Optional[List]:
|
def search(
|
||||||
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
|
) -> Optional[List[MetaRecord]]:
|
||||||
if self.active:
|
if self.active:
|
||||||
result = requests.get(self._prepare_query(title=query))
|
result = requests.get(self._prepare_query(title=query))
|
||||||
root = fromstring(result.text)
|
root = fromstring(result.text)
|
||||||
|
@ -117,10 +119,7 @@ class LubimyCzytac(Metadata):
|
||||||
with ThreadPool(processes=10) as pool:
|
with ThreadPool(processes=10) as pool:
|
||||||
final_matches = pool.starmap(
|
final_matches = pool.starmap(
|
||||||
lc_parser.parse_single_book,
|
lc_parser.parse_single_book,
|
||||||
[
|
[(match, generic_cover) for match in matches],
|
||||||
(match, generic_cover)
|
|
||||||
for match in matches
|
|
||||||
],
|
|
||||||
)
|
)
|
||||||
return final_matches
|
return final_matches
|
||||||
return matches
|
return matches
|
||||||
|
@ -192,26 +191,25 @@ class LubimyCzytacParser:
|
||||||
)
|
)
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
def parse_single_book(
|
def parse_single_book(self, match: Dict, generic_cover: str) -> MetaRecord:
|
||||||
self, match: Dict, generic_cover: str
|
|
||||||
) -> MetaRecord:
|
|
||||||
response = requests.get(match.get("url"))
|
response = requests.get(match.get("url"))
|
||||||
self.root = fromstring(response.text)
|
self.root = fromstring(response.text)
|
||||||
match["series"], match["series_index"] = self._parse_series()
|
match["cover"] = self._parse_cover(generic_cover=generic_cover)
|
||||||
match["tags"] = self._parse_tags()
|
match["description"] = self._parse_description()
|
||||||
|
match["languages"] = self._parse_languages()
|
||||||
match["publisher"] = self._parse_publisher()
|
match["publisher"] = self._parse_publisher()
|
||||||
match["publishedDate"] = self._parse_from_summary(
|
match["publishedDate"] = self._parse_from_summary(
|
||||||
attribute_name="datePublished"
|
attribute_name="datePublished"
|
||||||
)
|
)
|
||||||
match["rating"] = self._parse_rating()
|
match["rating"] = self._parse_rating()
|
||||||
match["description"] = self._parse_description()
|
match["series"], match["series_index"] = self._parse_series()
|
||||||
match["cover"] = self._parse_cover(generic_cover=generic_cover)
|
match["tags"] = self._parse_tags()
|
||||||
|
|
||||||
match["source"] = {
|
match["source"] = {
|
||||||
"id": self.metadata.__id__,
|
"id": self.metadata.__id__,
|
||||||
"description": self.metadata.__name__,
|
"description": self.metadata.__name__,
|
||||||
"link": LubimyCzytac.BASE_URL,
|
"link": LubimyCzytac.BASE_URL,
|
||||||
}
|
}
|
||||||
match["languages"] = self._parse_languages()
|
|
||||||
match["identifiers"] = {
|
match["identifiers"] = {
|
||||||
"isbn": self._parse_isbn(),
|
"isbn": self._parse_isbn(),
|
||||||
"lubimyczytac": match["id"],
|
"lubimyczytac": match["id"],
|
||||||
|
|
|
@ -30,7 +30,7 @@ from sqlalchemy.exc import InvalidRequestError, OperationalError
|
||||||
from sqlalchemy.orm.attributes import flag_modified
|
from sqlalchemy.orm.attributes import flag_modified
|
||||||
|
|
||||||
from cps.services.Metadata import Metadata
|
from cps.services.Metadata import Metadata
|
||||||
from . import constants, logger, ub
|
from . import constants, get_locale, logger, ub
|
||||||
|
|
||||||
meta = Blueprint("metadata", __name__)
|
meta = Blueprint("metadata", __name__)
|
||||||
|
|
||||||
|
@ -113,11 +113,12 @@ def metadata_search():
|
||||||
query = request.form.to_dict().get("query")
|
query = request.form.to_dict().get("query")
|
||||||
data = list()
|
data = list()
|
||||||
active = current_user.view_settings.get("metadata", {})
|
active = current_user.view_settings.get("metadata", {})
|
||||||
|
locale = get_locale()
|
||||||
if query:
|
if query:
|
||||||
static_cover = url_for("static", filename="generic_cover.jpg")
|
static_cover = url_for("static", filename="generic_cover.jpg")
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||||
meta = {
|
meta = {
|
||||||
executor.submit(c.search, query, static_cover): c
|
executor.submit(c.search, query, static_cover, locale): c
|
||||||
for c in cl
|
for c in cl
|
||||||
if active.get(c.__id__, True)
|
if active.get(c.__id__, True)
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,30 @@ import re
|
||||||
from typing import Dict, Generator, List, Optional, TypedDict, Union
|
from typing import Dict, Generator, List, Optional, TypedDict, Union
|
||||||
|
|
||||||
|
|
||||||
|
class MetaSourceInfo(TypedDict):
|
||||||
|
id: str
|
||||||
|
description: str
|
||||||
|
link: str
|
||||||
|
|
||||||
|
|
||||||
|
class MetaRecord(TypedDict):
|
||||||
|
id: Union[str, int]
|
||||||
|
title: str
|
||||||
|
authors: List[str]
|
||||||
|
url: str
|
||||||
|
cover: str
|
||||||
|
series: Optional[str]
|
||||||
|
series_index: Optional[Union[int, float]]
|
||||||
|
tags: Optional[List[str]]
|
||||||
|
publisher: Optional[str]
|
||||||
|
publishedDate: Optional[str]
|
||||||
|
rating: Optional[int]
|
||||||
|
description: Optional[str]
|
||||||
|
source: MetaSourceInfo
|
||||||
|
languages: Optional[List[str]]
|
||||||
|
identifiers: Dict[str, Union[str, int]]
|
||||||
|
|
||||||
|
|
||||||
class Metadata:
|
class Metadata:
|
||||||
__name__ = "Generic"
|
__name__ = "Generic"
|
||||||
__id__ = "generic"
|
__id__ = "generic"
|
||||||
|
@ -31,7 +55,9 @@ class Metadata:
|
||||||
self.active = state
|
self.active = state
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def search(self, query: str, generic_cover: str = ""):
|
def search(
|
||||||
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
|
) -> Optional[List[MetaRecord]]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -73,27 +99,3 @@ class Metadata:
|
||||||
not strip_joiners or token.lower() not in ("a", "and", "the", "&")
|
not strip_joiners or token.lower() not in ("a", "and", "the", "&")
|
||||||
):
|
):
|
||||||
yield token
|
yield token
|
||||||
|
|
||||||
|
|
||||||
class MetaSourceInfo(TypedDict):
|
|
||||||
id: str
|
|
||||||
description: str
|
|
||||||
link: str
|
|
||||||
|
|
||||||
|
|
||||||
class MetaRecord(TypedDict):
|
|
||||||
id: Union[str, int]
|
|
||||||
title: str
|
|
||||||
authors: List[str]
|
|
||||||
url: str
|
|
||||||
cover: str
|
|
||||||
series: Optional[str]
|
|
||||||
series_index: Optional[Union[int, float]]
|
|
||||||
tags: Optional[List[str]]
|
|
||||||
publisher: Optional[str]
|
|
||||||
publishedDate: Optional[str]
|
|
||||||
rating: Optional[int]
|
|
||||||
description: Optional[str]
|
|
||||||
source: MetaSourceInfo
|
|
||||||
languages: Optional[List[str]]
|
|
||||||
identifiers: Dict[str, Union[str, int]]
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user