add series, languages and isbn to google provider

This commit is contained in:
collerek 2021-12-13 15:14:19 +01:00
parent 362fdc5716
commit d64589914f
4 changed files with 119 additions and 66 deletions

View File

@ -17,41 +17,93 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Google Books api document: https://developers.google.com/books/docs/v1/using
from typing import Dict, List, Optional
from urllib.parse import quote
import requests
from cps.services.Metadata import Metadata
from cps.isoLanguages import get_lang3, get_language_name
from cps.services.Metadata import MetaRecord, Metadata
class Google(Metadata):
__name__ = "Google"
__id__ = "google"
BASE_URL = "https://www.googleapis.com/books/v1/volumes?q="
DESCRIPTION = "Google Books"
META_URL = "https://books.google.com/"
BOOK_URL = "https://books.google.com/books?id="
SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q="
ISBN_TYPE = "ISBN_13"
def search(self, query, generic_cover=""):
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
if self.active:
val = list()
result = requests.get(Google.BASE_URL + query.replace(" ","+"))
for r in result.json()['items']:
v = dict()
v['id'] = r['id']
v['title'] = r['volumeInfo']['title']
v['authors'] = r['volumeInfo'].get('authors', [])
v['description'] = r['volumeInfo'].get('description', "")
v['publisher'] = r['volumeInfo'].get('publisher', "")
v['publishedDate'] = r['volumeInfo'].get('publishedDate', "")
v['tags'] = r['volumeInfo'].get('categories', [])
v['rating'] = r['volumeInfo'].get('averageRating', 0)
if r['volumeInfo'].get('imageLinks'):
v['cover'] = r['volumeInfo']['imageLinks']['thumbnail'].replace("http://", "https://")
else:
# v['cover'] = "/../../../static/generic_cover.jpg"
v['cover'] = generic_cover
v['source'] = {
"id": self.__id__,
"description": "Google Books",
"link": "https://books.google.com/"}
v['url'] = "https://books.google.com/books?id=" + r['id']
val.append(v)
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
if title_tokens:
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
query = "+".join(tokens)
results = requests.get(Google.SEARCH_URL + query)
for result in results.json()["items"]:
val.append(
self._parse_search_result(
result=result, generic_cover=generic_cover, locale=locale
)
)
return val
def _parse_search_result(
self, result: Dict, generic_cover: str, locale: str
) -> MetaRecord:
match = dict()
match["id"] = result["id"]
match["title"] = result["volumeInfo"]["title"]
match["authors"] = result["volumeInfo"].get("authors", [])
match["url"] = Google.BOOK_URL + result["id"]
match["cover"] = self._parse_cover(result=result, generic_cover=generic_cover)
match["description"] = result["volumeInfo"].get("description", "")
match["languages"] = self._parse_languages(result=result, locale=locale)
match["publisher"] = result["volumeInfo"].get("publisher", "")
match["publishedDate"] = result["volumeInfo"].get("publishedDate", "")
match["rating"] = result["volumeInfo"].get("averageRating", 0)
match["series"], match["series_index"] = "", 1
match["tags"] = result["volumeInfo"].get("categories", [])
match["source"] = {
"id": self.__id__,
"description": Google.DESCRIPTION,
"link": Google.META_URL,
}
match["identifiers"] = {
"google": match.get("id"),
}
match = self._parse_isbn(result=result, match=match)
return match
@staticmethod
def _parse_isbn(result: Dict, match: Dict) -> Dict:
identifiers = result["volumeInfo"].get("industryIdentifiers", [])
for identifier in identifiers:
if identifier.get("type") == Google.ISBN_TYPE:
match["identifiers"]["isbn"] = identifier.get("identifier")
break
return match
@staticmethod
def _parse_cover(result: Dict, generic_cover: str) -> str:
if result["volumeInfo"].get("imageLinks"):
cover_url = result["volumeInfo"]["imageLinks"]["thumbnail"]
return cover_url.replace("http://", "https://")
return generic_cover
@staticmethod
def _parse_languages(result: Dict, locale: str) -> List[str]:
language_iso2 = result.get("language", "")
languages = (
[get_language_name(locale, get_lang3(language_iso2))]
if language_iso2
else []
)
return languages

View File

@ -107,7 +107,9 @@ class LubimyCzytac(Metadata):
SUMMARY = "//script[@type='application/ld+json']//text()"
def search(self, query: str, generic_cover: str = "") -> Optional[List]:
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
if self.active:
result = requests.get(self._prepare_query(title=query))
root = fromstring(result.text)
@ -117,10 +119,7 @@ class LubimyCzytac(Metadata):
with ThreadPool(processes=10) as pool:
final_matches = pool.starmap(
lc_parser.parse_single_book,
[
(match, generic_cover)
for match in matches
],
[(match, generic_cover) for match in matches],
)
return final_matches
return matches
@ -192,26 +191,25 @@ class LubimyCzytacParser:
)
return matches
def parse_single_book(
self, match: Dict, generic_cover: str
) -> MetaRecord:
def parse_single_book(self, match: Dict, generic_cover: str) -> MetaRecord:
response = requests.get(match.get("url"))
self.root = fromstring(response.text)
match["series"], match["series_index"] = self._parse_series()
match["tags"] = self._parse_tags()
match["cover"] = self._parse_cover(generic_cover=generic_cover)
match["description"] = self._parse_description()
match["languages"] = self._parse_languages()
match["publisher"] = self._parse_publisher()
match["publishedDate"] = self._parse_from_summary(
attribute_name="datePublished"
)
match["rating"] = self._parse_rating()
match["description"] = self._parse_description()
match["cover"] = self._parse_cover(generic_cover=generic_cover)
match["series"], match["series_index"] = self._parse_series()
match["tags"] = self._parse_tags()
match["source"] = {
"id": self.metadata.__id__,
"description": self.metadata.__name__,
"link": LubimyCzytac.BASE_URL,
}
match["languages"] = self._parse_languages()
match["identifiers"] = {
"isbn": self._parse_isbn(),
"lubimyczytac": match["id"],

View File

@ -30,7 +30,7 @@ from sqlalchemy.exc import InvalidRequestError, OperationalError
from sqlalchemy.orm.attributes import flag_modified
from cps.services.Metadata import Metadata
from . import constants, logger, ub
from . import constants, get_locale, logger, ub
meta = Blueprint("metadata", __name__)
@ -113,11 +113,12 @@ def metadata_search():
query = request.form.to_dict().get("query")
data = list()
active = current_user.view_settings.get("metadata", {})
locale = get_locale()
if query:
static_cover = url_for("static", filename="generic_cover.jpg")
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
meta = {
executor.submit(c.search, query, static_cover): c
executor.submit(c.search, query, static_cover, locale): c
for c in cl
if active.get(c.__id__, True)
}

View File

@ -20,6 +20,30 @@ import re
from typing import Dict, Generator, List, Optional, TypedDict, Union
class MetaSourceInfo(TypedDict):
id: str
description: str
link: str
class MetaRecord(TypedDict):
id: Union[str, int]
title: str
authors: List[str]
url: str
cover: str
series: Optional[str]
series_index: Optional[Union[int, float]]
tags: Optional[List[str]]
publisher: Optional[str]
publishedDate: Optional[str]
rating: Optional[int]
description: Optional[str]
source: MetaSourceInfo
languages: Optional[List[str]]
identifiers: Dict[str, Union[str, int]]
class Metadata:
__name__ = "Generic"
__id__ = "generic"
@ -31,7 +55,9 @@ class Metadata:
self.active = state
@abc.abstractmethod
def search(self, query: str, generic_cover: str = ""):
def search(
self, query: str, generic_cover: str = "", locale: str = "en"
) -> Optional[List[MetaRecord]]:
pass
@staticmethod
@ -73,27 +99,3 @@ class Metadata:
not strip_joiners or token.lower() not in ("a", "and", "the", "&")
):
yield token
class MetaSourceInfo(TypedDict):
id: str
description: str
link: str
class MetaRecord(TypedDict):
id: Union[str, int]
title: str
authors: List[str]
url: str
cover: str
series: Optional[str]
series_index: Optional[Union[int, float]]
tags: Optional[List[str]]
publisher: Optional[str]
publishedDate: Optional[str]
rating: Optional[int]
description: Optional[str]
source: MetaSourceInfo
languages: Optional[List[str]]
identifiers: Dict[str, Union[str, int]]