add series, languages and isbn to google provider
This commit is contained in:
parent
362fdc5716
commit
d64589914f
|
@ -17,41 +17,93 @@
|
|||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# Google Books api document: https://developers.google.com/books/docs/v1/using
|
||||
from typing import Dict, List, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
import requests
|
||||
|
||||
from cps.services.Metadata import Metadata
|
||||
from cps.isoLanguages import get_lang3, get_language_name
|
||||
from cps.services.Metadata import MetaRecord, Metadata
|
||||
|
||||
|
||||
class Google(Metadata):
|
||||
__name__ = "Google"
|
||||
__id__ = "google"
|
||||
BASE_URL = "https://www.googleapis.com/books/v1/volumes?q="
|
||||
DESCRIPTION = "Google Books"
|
||||
META_URL = "https://books.google.com/"
|
||||
BOOK_URL = "https://books.google.com/books?id="
|
||||
SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q="
|
||||
ISBN_TYPE = "ISBN_13"
|
||||
|
||||
def search(self, query, generic_cover=""):
|
||||
def search(
|
||||
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||
) -> Optional[List[MetaRecord]]:
|
||||
if self.active:
|
||||
val = list()
|
||||
result = requests.get(Google.BASE_URL + query.replace(" ","+"))
|
||||
for r in result.json()['items']:
|
||||
v = dict()
|
||||
v['id'] = r['id']
|
||||
v['title'] = r['volumeInfo']['title']
|
||||
v['authors'] = r['volumeInfo'].get('authors', [])
|
||||
v['description'] = r['volumeInfo'].get('description', "")
|
||||
v['publisher'] = r['volumeInfo'].get('publisher', "")
|
||||
v['publishedDate'] = r['volumeInfo'].get('publishedDate', "")
|
||||
v['tags'] = r['volumeInfo'].get('categories', [])
|
||||
v['rating'] = r['volumeInfo'].get('averageRating', 0)
|
||||
if r['volumeInfo'].get('imageLinks'):
|
||||
v['cover'] = r['volumeInfo']['imageLinks']['thumbnail'].replace("http://", "https://")
|
||||
else:
|
||||
# v['cover'] = "/../../../static/generic_cover.jpg"
|
||||
v['cover'] = generic_cover
|
||||
v['source'] = {
|
||||
"id": self.__id__,
|
||||
"description": "Google Books",
|
||||
"link": "https://books.google.com/"}
|
||||
v['url'] = "https://books.google.com/books?id=" + r['id']
|
||||
val.append(v)
|
||||
return val
|
||||
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
|
||||
if title_tokens:
|
||||
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
||||
query = "+".join(tokens)
|
||||
results = requests.get(Google.SEARCH_URL + query)
|
||||
for result in results.json()["items"]:
|
||||
val.append(
|
||||
self._parse_search_result(
|
||||
result=result, generic_cover=generic_cover, locale=locale
|
||||
)
|
||||
)
|
||||
return val
|
||||
|
||||
def _parse_search_result(
|
||||
self, result: Dict, generic_cover: str, locale: str
|
||||
) -> MetaRecord:
|
||||
match = dict()
|
||||
match["id"] = result["id"]
|
||||
match["title"] = result["volumeInfo"]["title"]
|
||||
match["authors"] = result["volumeInfo"].get("authors", [])
|
||||
match["url"] = Google.BOOK_URL + result["id"]
|
||||
match["cover"] = self._parse_cover(result=result, generic_cover=generic_cover)
|
||||
match["description"] = result["volumeInfo"].get("description", "")
|
||||
match["languages"] = self._parse_languages(result=result, locale=locale)
|
||||
match["publisher"] = result["volumeInfo"].get("publisher", "")
|
||||
match["publishedDate"] = result["volumeInfo"].get("publishedDate", "")
|
||||
match["rating"] = result["volumeInfo"].get("averageRating", 0)
|
||||
match["series"], match["series_index"] = "", 1
|
||||
match["tags"] = result["volumeInfo"].get("categories", [])
|
||||
|
||||
match["source"] = {
|
||||
"id": self.__id__,
|
||||
"description": Google.DESCRIPTION,
|
||||
"link": Google.META_URL,
|
||||
}
|
||||
|
||||
match["identifiers"] = {
|
||||
"google": match.get("id"),
|
||||
}
|
||||
match = self._parse_isbn(result=result, match=match)
|
||||
return match
|
||||
|
||||
@staticmethod
|
||||
def _parse_isbn(result: Dict, match: Dict) -> Dict:
|
||||
identifiers = result["volumeInfo"].get("industryIdentifiers", [])
|
||||
for identifier in identifiers:
|
||||
if identifier.get("type") == Google.ISBN_TYPE:
|
||||
match["identifiers"]["isbn"] = identifier.get("identifier")
|
||||
break
|
||||
return match
|
||||
|
||||
@staticmethod
|
||||
def _parse_cover(result: Dict, generic_cover: str) -> str:
|
||||
if result["volumeInfo"].get("imageLinks"):
|
||||
cover_url = result["volumeInfo"]["imageLinks"]["thumbnail"]
|
||||
return cover_url.replace("http://", "https://")
|
||||
return generic_cover
|
||||
|
||||
@staticmethod
|
||||
def _parse_languages(result: Dict, locale: str) -> List[str]:
|
||||
language_iso2 = result.get("language", "")
|
||||
languages = (
|
||||
[get_language_name(locale, get_lang3(language_iso2))]
|
||||
if language_iso2
|
||||
else []
|
||||
)
|
||||
return languages
|
||||
|
|
|
@ -107,7 +107,9 @@ class LubimyCzytac(Metadata):
|
|||
|
||||
SUMMARY = "//script[@type='application/ld+json']//text()"
|
||||
|
||||
def search(self, query: str, generic_cover: str = "") -> Optional[List]:
|
||||
def search(
|
||||
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||
) -> Optional[List[MetaRecord]]:
|
||||
if self.active:
|
||||
result = requests.get(self._prepare_query(title=query))
|
||||
root = fromstring(result.text)
|
||||
|
@ -117,10 +119,7 @@ class LubimyCzytac(Metadata):
|
|||
with ThreadPool(processes=10) as pool:
|
||||
final_matches = pool.starmap(
|
||||
lc_parser.parse_single_book,
|
||||
[
|
||||
(match, generic_cover)
|
||||
for match in matches
|
||||
],
|
||||
[(match, generic_cover) for match in matches],
|
||||
)
|
||||
return final_matches
|
||||
return matches
|
||||
|
@ -192,26 +191,25 @@ class LubimyCzytacParser:
|
|||
)
|
||||
return matches
|
||||
|
||||
def parse_single_book(
|
||||
self, match: Dict, generic_cover: str
|
||||
) -> MetaRecord:
|
||||
def parse_single_book(self, match: Dict, generic_cover: str) -> MetaRecord:
|
||||
response = requests.get(match.get("url"))
|
||||
self.root = fromstring(response.text)
|
||||
match["series"], match["series_index"] = self._parse_series()
|
||||
match["tags"] = self._parse_tags()
|
||||
match["cover"] = self._parse_cover(generic_cover=generic_cover)
|
||||
match["description"] = self._parse_description()
|
||||
match["languages"] = self._parse_languages()
|
||||
match["publisher"] = self._parse_publisher()
|
||||
match["publishedDate"] = self._parse_from_summary(
|
||||
attribute_name="datePublished"
|
||||
)
|
||||
match["rating"] = self._parse_rating()
|
||||
match["description"] = self._parse_description()
|
||||
match["cover"] = self._parse_cover(generic_cover=generic_cover)
|
||||
match["series"], match["series_index"] = self._parse_series()
|
||||
match["tags"] = self._parse_tags()
|
||||
|
||||
match["source"] = {
|
||||
"id": self.metadata.__id__,
|
||||
"description": self.metadata.__name__,
|
||||
"link": LubimyCzytac.BASE_URL,
|
||||
}
|
||||
match["languages"] = self._parse_languages()
|
||||
match["identifiers"] = {
|
||||
"isbn": self._parse_isbn(),
|
||||
"lubimyczytac": match["id"],
|
||||
|
|
|
@ -30,7 +30,7 @@ from sqlalchemy.exc import InvalidRequestError, OperationalError
|
|||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
from cps.services.Metadata import Metadata
|
||||
from . import constants, logger, ub
|
||||
from . import constants, get_locale, logger, ub
|
||||
|
||||
meta = Blueprint("metadata", __name__)
|
||||
|
||||
|
@ -113,11 +113,12 @@ def metadata_search():
|
|||
query = request.form.to_dict().get("query")
|
||||
data = list()
|
||||
active = current_user.view_settings.get("metadata", {})
|
||||
locale = get_locale()
|
||||
if query:
|
||||
static_cover = url_for("static", filename="generic_cover.jpg")
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||
meta = {
|
||||
executor.submit(c.search, query, static_cover): c
|
||||
executor.submit(c.search, query, static_cover, locale): c
|
||||
for c in cl
|
||||
if active.get(c.__id__, True)
|
||||
}
|
||||
|
|
|
@ -20,6 +20,30 @@ import re
|
|||
from typing import Dict, Generator, List, Optional, TypedDict, Union
|
||||
|
||||
|
||||
class MetaSourceInfo(TypedDict):
|
||||
id: str
|
||||
description: str
|
||||
link: str
|
||||
|
||||
|
||||
class MetaRecord(TypedDict):
|
||||
id: Union[str, int]
|
||||
title: str
|
||||
authors: List[str]
|
||||
url: str
|
||||
cover: str
|
||||
series: Optional[str]
|
||||
series_index: Optional[Union[int, float]]
|
||||
tags: Optional[List[str]]
|
||||
publisher: Optional[str]
|
||||
publishedDate: Optional[str]
|
||||
rating: Optional[int]
|
||||
description: Optional[str]
|
||||
source: MetaSourceInfo
|
||||
languages: Optional[List[str]]
|
||||
identifiers: Dict[str, Union[str, int]]
|
||||
|
||||
|
||||
class Metadata:
|
||||
__name__ = "Generic"
|
||||
__id__ = "generic"
|
||||
|
@ -31,7 +55,9 @@ class Metadata:
|
|||
self.active = state
|
||||
|
||||
@abc.abstractmethod
|
||||
def search(self, query: str, generic_cover: str = ""):
|
||||
def search(
|
||||
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||
) -> Optional[List[MetaRecord]]:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
|
@ -73,27 +99,3 @@ class Metadata:
|
|||
not strip_joiners or token.lower() not in ("a", "and", "the", "&")
|
||||
):
|
||||
yield token
|
||||
|
||||
|
||||
class MetaSourceInfo(TypedDict):
|
||||
id: str
|
||||
description: str
|
||||
link: str
|
||||
|
||||
|
||||
class MetaRecord(TypedDict):
|
||||
id: Union[str, int]
|
||||
title: str
|
||||
authors: List[str]
|
||||
url: str
|
||||
cover: str
|
||||
series: Optional[str]
|
||||
series_index: Optional[Union[int, float]]
|
||||
tags: Optional[List[str]]
|
||||
publisher: Optional[str]
|
||||
publishedDate: Optional[str]
|
||||
rating: Optional[int]
|
||||
description: Optional[str]
|
||||
source: MetaSourceInfo
|
||||
languages: Optional[List[str]]
|
||||
identifiers: Dict[str, Union[str, int]]
|
||||
|
|
Loading…
Reference in New Issue
Block a user