Merge remote-tracking branch 'lubimyczytac/add_lubimyczytac.pl_meta_provider' into Develop
# Conflicts: # optional-requirements.txt
This commit is contained in:
commit
4f3c396450
|
@ -17,49 +17,68 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
# ComicVine api document: https://comicvine.gamespot.com/api/documentation
|
# ComicVine api document: https://comicvine.gamespot.com/api/documentation
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from cps.services.Metadata import Metadata
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
|
|
||||||
|
|
||||||
class ComicVine(Metadata):
|
class ComicVine(Metadata):
|
||||||
__name__ = "ComicVine"
|
__name__ = "ComicVine"
|
||||||
__id__ = "comicvine"
|
__id__ = "comicvine"
|
||||||
|
DESCRIPTION = "ComicVine Books"
|
||||||
|
META_URL = "https://comicvine.gamespot.com/"
|
||||||
|
API_KEY = "57558043c53943d5d1e96a9ad425b0eb85532ee6"
|
||||||
|
BASE_URL = (
|
||||||
|
f"https://comicvine.gamespot.com/api/search?api_key={API_KEY}"
|
||||||
|
f"&resources=issue&query="
|
||||||
|
)
|
||||||
|
QUERY_PARAMS = "&sort=name:desc&format=json"
|
||||||
|
HEADERS = {"User-Agent": "Not Evil Browser"}
|
||||||
|
|
||||||
def search(self, query, generic_cover=""):
|
def search(
|
||||||
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
|
) -> Optional[List[MetaRecord]]:
|
||||||
val = list()
|
val = list()
|
||||||
apikey = "57558043c53943d5d1e96a9ad425b0eb85532ee6"
|
|
||||||
if self.active:
|
if self.active:
|
||||||
headers = {
|
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
|
||||||
'User-Agent': 'Not Evil Browser'
|
if title_tokens:
|
||||||
}
|
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
||||||
|
query = "%20".join(tokens)
|
||||||
result = requests.get("https://comicvine.gamespot.com/api/search?api_key="
|
result = requests.get(
|
||||||
+ apikey + "&resources=issue&query=" + query + "&sort=name:desc&format=json", headers=headers)
|
f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}",
|
||||||
for r in result.json().get('results'):
|
headers=ComicVine.HEADERS,
|
||||||
seriesTitle = r['volume'].get('name', "")
|
)
|
||||||
if r.get('store_date'):
|
for result in result.json()["results"]:
|
||||||
dateFomers = r.get('store_date')
|
match = self._parse_search_result(
|
||||||
else:
|
result=result, generic_cover=generic_cover, locale=locale
|
||||||
dateFomers = r.get('date_added')
|
)
|
||||||
v = dict()
|
val.append(match)
|
||||||
v['id'] = r['id']
|
|
||||||
v['title'] = seriesTitle + " #" + r.get('issue_number', "0") + " - " + ( r.get('name', "") or "")
|
|
||||||
v['authors'] = r.get('authors', [])
|
|
||||||
v['description'] = r.get('description', "")
|
|
||||||
v['publisher'] = ""
|
|
||||||
v['publishedDate'] = dateFomers
|
|
||||||
v['tags'] = ["Comics", seriesTitle]
|
|
||||||
v['rating'] = 0
|
|
||||||
v['series'] = seriesTitle
|
|
||||||
v['cover'] = r['image'].get('original_url')
|
|
||||||
v['source'] = {
|
|
||||||
"id": self.__id__,
|
|
||||||
"description": "ComicVine Books",
|
|
||||||
"link": "https://comicvine.gamespot.com/"
|
|
||||||
}
|
|
||||||
v['url'] = r.get('site_detail_url', "")
|
|
||||||
val.append(v)
|
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
def _parse_search_result(
|
||||||
|
self, result: Dict, generic_cover: str, locale: str
|
||||||
|
) -> MetaRecord:
|
||||||
|
series = result["volume"].get("name", "")
|
||||||
|
series_index = result.get("issue_number", 0)
|
||||||
|
issue_name = result.get("name", "")
|
||||||
|
match = MetaRecord(
|
||||||
|
id=result["id"],
|
||||||
|
title=f"{series}#{series_index} - {issue_name}",
|
||||||
|
authors=result.get("authors", []),
|
||||||
|
url=result.get("site_detail_url", ""),
|
||||||
|
source=MetaSourceInfo(
|
||||||
|
id=self.__id__,
|
||||||
|
description=ComicVine.DESCRIPTION,
|
||||||
|
link=ComicVine.META_URL,
|
||||||
|
),
|
||||||
|
series=series,
|
||||||
|
)
|
||||||
|
match.cover = result["image"].get("original_url", generic_cover)
|
||||||
|
match.description = result.get("description", "")
|
||||||
|
match.publishedDate = result.get("store_date", result.get("date_added"))
|
||||||
|
match.series_index = series_index
|
||||||
|
match.tags = ["Comics", series]
|
||||||
|
match.identifiers = {"comicvine": match.id}
|
||||||
|
return match
|
||||||
|
|
|
@ -17,39 +17,93 @@
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
# Google Books api document: https://developers.google.com/books/docs/v1/using
|
# Google Books api document: https://developers.google.com/books/docs/v1/using
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from cps.services.Metadata import Metadata
|
|
||||||
|
from cps.isoLanguages import get_lang3, get_language_name
|
||||||
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
|
|
||||||
|
|
||||||
class Google(Metadata):
|
class Google(Metadata):
|
||||||
__name__ = "Google"
|
__name__ = "Google"
|
||||||
__id__ = "google"
|
__id__ = "google"
|
||||||
|
DESCRIPTION = "Google Books"
|
||||||
|
META_URL = "https://books.google.com/"
|
||||||
|
BOOK_URL = "https://books.google.com/books?id="
|
||||||
|
SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q="
|
||||||
|
ISBN_TYPE = "ISBN_13"
|
||||||
|
|
||||||
def search(self, query, generic_cover=""):
|
def search(
|
||||||
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
|
) -> Optional[List[MetaRecord]]:
|
||||||
|
val = list()
|
||||||
if self.active:
|
if self.active:
|
||||||
val = list()
|
|
||||||
result = requests.get("https://www.googleapis.com/books/v1/volumes?q="+query.replace(" ","+"))
|
|
||||||
for r in result.json().get('items'):
|
|
||||||
v = dict()
|
|
||||||
v['id'] = r['id']
|
|
||||||
v['title'] = r['volumeInfo'].get('title',"")
|
|
||||||
v['authors'] = r['volumeInfo'].get('authors', [])
|
|
||||||
v['description'] = r['volumeInfo'].get('description', "")
|
|
||||||
v['publisher'] = r['volumeInfo'].get('publisher', "")
|
|
||||||
v['publishedDate'] = r['volumeInfo'].get('publishedDate', "")
|
|
||||||
v['tags'] = r['volumeInfo'].get('categories', [])
|
|
||||||
v['rating'] = r['volumeInfo'].get('averageRating', 0)
|
|
||||||
if r['volumeInfo'].get('imageLinks'):
|
|
||||||
v['cover'] = r['volumeInfo']['imageLinks']['thumbnail'].replace("http://", "https://")
|
|
||||||
else:
|
|
||||||
v['cover'] = "/../../../static/generic_cover.jpg"
|
|
||||||
v['source'] = {
|
|
||||||
"id": self.__id__,
|
|
||||||
"description": "Google Books",
|
|
||||||
"link": "https://books.google.com/"}
|
|
||||||
v['url'] = "https://books.google.com/books?id=" + r['id']
|
|
||||||
val.append(v)
|
|
||||||
return val
|
|
||||||
|
|
||||||
|
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
|
||||||
|
if title_tokens:
|
||||||
|
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
||||||
|
query = "+".join(tokens)
|
||||||
|
results = requests.get(Google.SEARCH_URL + query)
|
||||||
|
for result in results.json()["items"]:
|
||||||
|
val.append(
|
||||||
|
self._parse_search_result(
|
||||||
|
result=result, generic_cover=generic_cover, locale=locale
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return val
|
||||||
|
|
||||||
|
def _parse_search_result(
|
||||||
|
self, result: Dict, generic_cover: str, locale: str
|
||||||
|
) -> MetaRecord:
|
||||||
|
match = MetaRecord(
|
||||||
|
id=result["id"],
|
||||||
|
title=result["volumeInfo"]["title"],
|
||||||
|
authors=result["volumeInfo"].get("authors", []),
|
||||||
|
url=Google.BOOK_URL + result["id"],
|
||||||
|
source=MetaSourceInfo(
|
||||||
|
id=self.__id__,
|
||||||
|
description=Google.DESCRIPTION,
|
||||||
|
link=Google.META_URL,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
match.cover = self._parse_cover(result=result, generic_cover=generic_cover)
|
||||||
|
match.description = result["volumeInfo"].get("description", "")
|
||||||
|
match.languages = self._parse_languages(result=result, locale=locale)
|
||||||
|
match.publisher = result["volumeInfo"].get("publisher", "")
|
||||||
|
match.publishedDate = result["volumeInfo"].get("publishedDate", "")
|
||||||
|
match.rating = result["volumeInfo"].get("averageRating", 0)
|
||||||
|
match.series, match.series_index = "", 1
|
||||||
|
match.tags = result["volumeInfo"].get("categories", [])
|
||||||
|
|
||||||
|
match.identifiers = {"google": match.id}
|
||||||
|
match = self._parse_isbn(result=result, match=match)
|
||||||
|
return match
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_isbn(result: Dict, match: MetaRecord) -> MetaRecord:
|
||||||
|
identifiers = result["volumeInfo"].get("industryIdentifiers", [])
|
||||||
|
for identifier in identifiers:
|
||||||
|
if identifier.get("type") == Google.ISBN_TYPE:
|
||||||
|
match.identifiers["isbn"] = identifier.get("identifier")
|
||||||
|
break
|
||||||
|
return match
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_cover(result: Dict, generic_cover: str) -> str:
|
||||||
|
if result["volumeInfo"].get("imageLinks"):
|
||||||
|
cover_url = result["volumeInfo"]["imageLinks"]["thumbnail"]
|
||||||
|
return cover_url.replace("http://", "https://")
|
||||||
|
return generic_cover
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_languages(result: Dict, locale: str) -> List[str]:
|
||||||
|
language_iso2 = result["volumeInfo"].get("language", "")
|
||||||
|
languages = (
|
||||||
|
[get_language_name(locale, get_lang3(language_iso2))]
|
||||||
|
if language_iso2
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
return languages
|
||||||
|
|
337
cps/metadata_provider/lubimyczytac.py
Normal file
337
cps/metadata_provider/lubimyczytac.py
Normal file
|
@ -0,0 +1,337 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
|
||||||
|
# Copyright (C) 2021 OzzieIsaacs
|
||||||
|
#
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from multiprocessing.pool import ThreadPool
|
||||||
|
from typing import List, Optional, Tuple, Union
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from dateutil import parser
|
||||||
|
from html2text import HTML2Text
|
||||||
|
from lxml.html import HtmlElement, fromstring, tostring
|
||||||
|
from markdown2 import Markdown
|
||||||
|
|
||||||
|
from cps.isoLanguages import get_language_name
|
||||||
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
|
|
||||||
|
SYMBOLS_TO_TRANSLATE = (
|
||||||
|
"öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃóÓśŚźŹżŻ",
|
||||||
|
"oOuUoOoOuUeEaAuUiIaAcCeElLnNoOsSzZzZ",
|
||||||
|
)
|
||||||
|
SYMBOL_TRANSLATION_MAP = dict(
|
||||||
|
[(ord(a), ord(b)) for (a, b) in zip(*SYMBOLS_TO_TRANSLATE)]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_int_or_float(value: str) -> Union[int, float]:
|
||||||
|
number_as_float = float(value)
|
||||||
|
number_as_int = int(number_as_float)
|
||||||
|
return number_as_int if number_as_float == number_as_int else number_as_float
|
||||||
|
|
||||||
|
|
||||||
|
def strip_accents(s: Optional[str]) -> Optional[str]:
|
||||||
|
return s.translate(SYMBOL_TRANSLATION_MAP) if s is not None else s
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_comments_html(html: str) -> str:
|
||||||
|
text = html2text(html)
|
||||||
|
md = Markdown()
|
||||||
|
html = md.convert(text)
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
def html2text(html: str) -> str:
|
||||||
|
# replace <u> tags with <span> as <u> becomes emphasis in html2text
|
||||||
|
if isinstance(html, bytes):
|
||||||
|
html = html.decode("utf-8")
|
||||||
|
html = re.sub(
|
||||||
|
r"<\s*(?P<solidus>/?)\s*[uU]\b(?P<rest>[^>]*)>",
|
||||||
|
r"<\g<solidus>span\g<rest>>",
|
||||||
|
html,
|
||||||
|
)
|
||||||
|
h2t = HTML2Text()
|
||||||
|
h2t.body_width = 0
|
||||||
|
h2t.single_line_break = True
|
||||||
|
h2t.emphasis_mark = "*"
|
||||||
|
return h2t.handle(html)
|
||||||
|
|
||||||
|
|
||||||
|
class LubimyCzytac(Metadata):
|
||||||
|
__name__ = "LubimyCzytac.pl"
|
||||||
|
__id__ = "lubimyczytac"
|
||||||
|
|
||||||
|
BASE_URL = "https://lubimyczytac.pl"
|
||||||
|
|
||||||
|
BOOK_SEARCH_RESULT_XPATH = (
|
||||||
|
"*//div[@class='listSearch']//div[@class='authorAllBooks__single']"
|
||||||
|
)
|
||||||
|
SINGLE_BOOK_RESULT_XPATH = ".//div[contains(@class,'authorAllBooks__singleText')]"
|
||||||
|
TITLE_PATH = "/div/a[contains(@class,'authorAllBooks__singleTextTitle')]"
|
||||||
|
TITLE_TEXT_PATH = f"{TITLE_PATH}//text()"
|
||||||
|
URL_PATH = f"{TITLE_PATH}/@href"
|
||||||
|
AUTHORS_PATH = "/div/a[contains(@href,'autor')]//text()"
|
||||||
|
|
||||||
|
SIBLINGS = "/following-sibling::dd"
|
||||||
|
|
||||||
|
CONTAINER = "//section[@class='container book']"
|
||||||
|
PUBLISHER = f"{CONTAINER}//dt[contains(text(),'Wydawnictwo:')]{SIBLINGS}/a/text()"
|
||||||
|
LANGUAGES = f"{CONTAINER}//dt[contains(text(),'Język:')]{SIBLINGS}/text()"
|
||||||
|
DESCRIPTION = f"{CONTAINER}//div[@class='collapse-content']"
|
||||||
|
SERIES = f"{CONTAINER}//span/a[contains(@href,'/cykl/')]/text()"
|
||||||
|
|
||||||
|
DETAILS = "//div[@id='book-details']"
|
||||||
|
PUBLISH_DATE = "//dt[contains(@title,'Data pierwszego wydania"
|
||||||
|
FIRST_PUBLISH_DATE = f"{DETAILS}{PUBLISH_DATE} oryginalnego')]{SIBLINGS}[1]/text()"
|
||||||
|
FIRST_PUBLISH_DATE_PL = f"{DETAILS}{PUBLISH_DATE} polskiego')]{SIBLINGS}[1]/text()"
|
||||||
|
TAGS = "//nav[@aria-label='breadcrumb']//a[contains(@href,'/ksiazki/k/')]/text()"
|
||||||
|
|
||||||
|
RATING = "//meta[@property='books:rating:value']/@content"
|
||||||
|
COVER = "//meta[@property='og:image']/@content"
|
||||||
|
ISBN = "//meta[@property='books:isbn']/@content"
|
||||||
|
META_TITLE = "//meta[@property='og:description']/@content"
|
||||||
|
|
||||||
|
SUMMARY = "//script[@type='application/ld+json']//text()"
|
||||||
|
|
||||||
|
def search(
|
||||||
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
|
) -> Optional[List[MetaRecord]]:
|
||||||
|
if self.active:
|
||||||
|
result = requests.get(self._prepare_query(title=query))
|
||||||
|
root = fromstring(result.text)
|
||||||
|
lc_parser = LubimyCzytacParser(root=root, metadata=self)
|
||||||
|
matches = lc_parser.parse_search_results()
|
||||||
|
if matches:
|
||||||
|
with ThreadPool(processes=10) as pool:
|
||||||
|
final_matches = pool.starmap(
|
||||||
|
lc_parser.parse_single_book,
|
||||||
|
[(match, generic_cover, locale) for match in matches],
|
||||||
|
)
|
||||||
|
return final_matches
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def _prepare_query(self, title: str) -> str:
|
||||||
|
query = ""
|
||||||
|
characters_to_remove = "\?()\/"
|
||||||
|
pattern = "[" + characters_to_remove + "]"
|
||||||
|
title = re.sub(pattern, "", title)
|
||||||
|
title = title.replace("_", " ")
|
||||||
|
if '"' in title or ",," in title:
|
||||||
|
title = title.split('"')[0].split(",,")[0]
|
||||||
|
|
||||||
|
if "/" in title:
|
||||||
|
title_tokens = [
|
||||||
|
token for token in title.lower().split(" ") if len(token) > 1
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
title_tokens = list(self.get_title_tokens(title, strip_joiners=False))
|
||||||
|
if title_tokens:
|
||||||
|
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
||||||
|
query = query + "%20".join(tokens)
|
||||||
|
if not query:
|
||||||
|
return ""
|
||||||
|
return f"{LubimyCzytac.BASE_URL}/szukaj/ksiazki?phrase={query}"
|
||||||
|
|
||||||
|
|
||||||
|
class LubimyCzytacParser:
|
||||||
|
PAGES_TEMPLATE = "<p id='strony'>Książka ma {0} stron(y).</p>"
|
||||||
|
PUBLISH_DATE_TEMPLATE = "<p id='pierwsze_wydanie'>Data pierwszego wydania: {0}</p>"
|
||||||
|
PUBLISH_DATE_PL_TEMPLATE = (
|
||||||
|
"<p id='pierwsze_wydanie'>Data pierwszego wydania w Polsce: {0}</p>"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self, root: HtmlElement, metadata: Metadata) -> None:
|
||||||
|
self.root = root
|
||||||
|
self.metadata = metadata
|
||||||
|
|
||||||
|
def parse_search_results(self) -> List[MetaRecord]:
|
||||||
|
matches = []
|
||||||
|
results = self.root.xpath(LubimyCzytac.BOOK_SEARCH_RESULT_XPATH)
|
||||||
|
for result in results:
|
||||||
|
title = self._parse_xpath_node(
|
||||||
|
root=result,
|
||||||
|
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
|
||||||
|
f"{LubimyCzytac.TITLE_TEXT_PATH}",
|
||||||
|
)
|
||||||
|
|
||||||
|
book_url = self._parse_xpath_node(
|
||||||
|
root=result,
|
||||||
|
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
|
||||||
|
f"{LubimyCzytac.URL_PATH}",
|
||||||
|
)
|
||||||
|
authors = self._parse_xpath_node(
|
||||||
|
root=result,
|
||||||
|
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
|
||||||
|
f"{LubimyCzytac.AUTHORS_PATH}",
|
||||||
|
take_first=False,
|
||||||
|
)
|
||||||
|
if not all([title, book_url, authors]):
|
||||||
|
continue
|
||||||
|
matches.append(
|
||||||
|
MetaRecord(
|
||||||
|
id=book_url.replace(f"/ksiazka/", "").split("/")[0],
|
||||||
|
title=title,
|
||||||
|
authors=[strip_accents(author) for author in authors],
|
||||||
|
url=LubimyCzytac.BASE_URL + book_url,
|
||||||
|
source=MetaSourceInfo(
|
||||||
|
id=self.metadata.__id__,
|
||||||
|
description=self.metadata.__name__,
|
||||||
|
link=LubimyCzytac.BASE_URL,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return matches
|
||||||
|
|
||||||
|
def parse_single_book(
|
||||||
|
self, match: MetaRecord, generic_cover: str, locale: str
|
||||||
|
) -> MetaRecord:
|
||||||
|
response = requests.get(match.url)
|
||||||
|
self.root = fromstring(response.text)
|
||||||
|
match.cover = self._parse_cover(generic_cover=generic_cover)
|
||||||
|
match.description = self._parse_description()
|
||||||
|
match.languages = self._parse_languages(locale=locale)
|
||||||
|
match.publisher = self._parse_publisher()
|
||||||
|
match.publishedDate = self._parse_from_summary(attribute_name="datePublished")
|
||||||
|
match.rating = self._parse_rating()
|
||||||
|
match.series, match.series_index = self._parse_series()
|
||||||
|
match.tags = self._parse_tags()
|
||||||
|
match.identifiers = {
|
||||||
|
"isbn": self._parse_isbn(),
|
||||||
|
"lubimyczytac": match.id,
|
||||||
|
}
|
||||||
|
return match
|
||||||
|
|
||||||
|
def _parse_xpath_node(
|
||||||
|
self,
|
||||||
|
xpath: str,
|
||||||
|
root: HtmlElement = None,
|
||||||
|
take_first: bool = True,
|
||||||
|
strip_element: bool = True,
|
||||||
|
) -> Optional[Union[str, List[str]]]:
|
||||||
|
root = root if root is not None else self.root
|
||||||
|
node = root.xpath(xpath)
|
||||||
|
if not node:
|
||||||
|
return None
|
||||||
|
return (
|
||||||
|
(node[0].strip() if strip_element else node[0])
|
||||||
|
if take_first
|
||||||
|
else [x.strip() for x in node]
|
||||||
|
)
|
||||||
|
|
||||||
|
def _parse_cover(self, generic_cover) -> Optional[str]:
|
||||||
|
return (
|
||||||
|
self._parse_xpath_node(xpath=LubimyCzytac.COVER, take_first=True)
|
||||||
|
or generic_cover
|
||||||
|
)
|
||||||
|
|
||||||
|
def _parse_publisher(self) -> Optional[str]:
|
||||||
|
return self._parse_xpath_node(xpath=LubimyCzytac.PUBLISHER, take_first=True)
|
||||||
|
|
||||||
|
def _parse_languages(self, locale: str) -> List[str]:
|
||||||
|
languages = list()
|
||||||
|
lang = self._parse_xpath_node(xpath=LubimyCzytac.LANGUAGES, take_first=True)
|
||||||
|
if lang:
|
||||||
|
if "polski" in lang:
|
||||||
|
languages.append("pol")
|
||||||
|
if "angielski" in lang:
|
||||||
|
languages.append("eng")
|
||||||
|
return [get_language_name(locale, language) for language in languages]
|
||||||
|
|
||||||
|
def _parse_series(self) -> Tuple[Optional[str], Optional[Union[float, int]]]:
|
||||||
|
series_index = 0
|
||||||
|
series = self._parse_xpath_node(xpath=LubimyCzytac.SERIES, take_first=True)
|
||||||
|
if series:
|
||||||
|
if "tom " in series:
|
||||||
|
series_name, series_info = series.split(" (tom ", 1)
|
||||||
|
series_info = series_info.replace(" ", "").replace(")", "")
|
||||||
|
# Check if book is not a bundle, i.e. chapter 1-3
|
||||||
|
if "-" in series_info:
|
||||||
|
series_info = series_info.split("-", 1)[0]
|
||||||
|
if series_info.replace(".", "").isdigit() is True:
|
||||||
|
series_index = get_int_or_float(series_info)
|
||||||
|
return series_name, series_index
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
def _parse_tags(self) -> List[str]:
|
||||||
|
tags = self._parse_xpath_node(xpath=LubimyCzytac.TAGS, take_first=False)
|
||||||
|
return [
|
||||||
|
strip_accents(w.replace(", itd.", " itd."))
|
||||||
|
for w in tags
|
||||||
|
if isinstance(w, str)
|
||||||
|
]
|
||||||
|
|
||||||
|
def _parse_from_summary(self, attribute_name: str) -> Optional[str]:
|
||||||
|
value = None
|
||||||
|
summary_text = self._parse_xpath_node(xpath=LubimyCzytac.SUMMARY)
|
||||||
|
if summary_text:
|
||||||
|
data = json.loads(summary_text)
|
||||||
|
value = data.get(attribute_name)
|
||||||
|
return value.strip() if value is not None else value
|
||||||
|
|
||||||
|
def _parse_rating(self) -> Optional[str]:
|
||||||
|
rating = self._parse_xpath_node(xpath=LubimyCzytac.RATING)
|
||||||
|
return round(float(rating.replace(",", ".")) / 2) if rating else rating
|
||||||
|
|
||||||
|
def _parse_date(self, xpath="first_publish") -> Optional[datetime.datetime]:
|
||||||
|
options = {
|
||||||
|
"first_publish": LubimyCzytac.FIRST_PUBLISH_DATE,
|
||||||
|
"first_publish_pl": LubimyCzytac.FIRST_PUBLISH_DATE_PL,
|
||||||
|
}
|
||||||
|
date = self._parse_xpath_node(xpath=options.get(xpath))
|
||||||
|
return parser.parse(date) if date else None
|
||||||
|
|
||||||
|
def _parse_isbn(self) -> Optional[str]:
|
||||||
|
return self._parse_xpath_node(xpath=LubimyCzytac.ISBN)
|
||||||
|
|
||||||
|
def _parse_description(self) -> str:
|
||||||
|
description = ""
|
||||||
|
description_node = self._parse_xpath_node(
|
||||||
|
xpath=LubimyCzytac.DESCRIPTION, strip_element=False
|
||||||
|
)
|
||||||
|
if description_node is not None:
|
||||||
|
for source in self.root.xpath('//p[@class="source"]'):
|
||||||
|
source.getparent().remove(source)
|
||||||
|
description = tostring(description_node, method="html")
|
||||||
|
description = sanitize_comments_html(description)
|
||||||
|
|
||||||
|
else:
|
||||||
|
description_node = self._parse_xpath_node(xpath=LubimyCzytac.META_TITLE)
|
||||||
|
if description_node is not None:
|
||||||
|
description = description_node
|
||||||
|
description = sanitize_comments_html(description)
|
||||||
|
description = self._add_extra_info_to_description(description=description)
|
||||||
|
return description
|
||||||
|
|
||||||
|
def _add_extra_info_to_description(self, description: str) -> str:
|
||||||
|
pages = self._parse_from_summary(attribute_name="numberOfPages")
|
||||||
|
if pages:
|
||||||
|
description += LubimyCzytacParser.PAGES_TEMPLATE.format(pages)
|
||||||
|
|
||||||
|
first_publish_date = self._parse_date()
|
||||||
|
if first_publish_date:
|
||||||
|
description += LubimyCzytacParser.PUBLISH_DATE_TEMPLATE.format(
|
||||||
|
first_publish_date.strftime("%d.%m.%Y")
|
||||||
|
)
|
||||||
|
|
||||||
|
first_publish_date_pl = self._parse_date(xpath="first_publish_pl")
|
||||||
|
if first_publish_date_pl:
|
||||||
|
description += LubimyCzytacParser.PUBLISH_DATE_PL_TEMPLATE.format(
|
||||||
|
first_publish_date_pl.strftime("%d.%m.%Y")
|
||||||
|
)
|
||||||
|
|
||||||
|
return description
|
|
@ -15,46 +15,52 @@
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
import itertools
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
from scholarly import scholarly
|
from scholarly import scholarly
|
||||||
|
|
||||||
from cps.services.Metadata import Metadata
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
|
|
||||||
class scholar(Metadata):
|
class scholar(Metadata):
|
||||||
__name__ = "Google Scholar"
|
__name__ = "Google Scholar"
|
||||||
__id__ = "googlescholar"
|
__id__ = "googlescholar"
|
||||||
|
META_URL = "https://scholar.google.com/"
|
||||||
|
|
||||||
def search(self, query, generic_cover=""):
|
def search(
|
||||||
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
|
) -> Optional[List[MetaRecord]]:
|
||||||
val = list()
|
val = list()
|
||||||
if self.active:
|
if self.active:
|
||||||
scholar_gen = scholarly.search_pubs(' '.join(query.split('+')))
|
title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
|
||||||
i = 0
|
if title_tokens:
|
||||||
for publication in scholar_gen:
|
tokens = [quote(t.encode("utf-8")) for t in title_tokens]
|
||||||
v = dict()
|
query = " ".join(tokens)
|
||||||
v['id'] = publication['url_scholarbib'].split(':')[1]
|
scholar_gen = itertools.islice(scholarly.search_pubs(query), 10)
|
||||||
v['title'] = publication['bib'].get('title')
|
for result in scholar_gen:
|
||||||
v['authors'] = publication['bib'].get('author', [])
|
match = self._parse_search_result(
|
||||||
v['description'] = publication['bib'].get('abstract', "")
|
result=result, generic_cover=generic_cover, locale=locale
|
||||||
v['publisher'] = publication['bib'].get('venue', "")
|
)
|
||||||
if publication['bib'].get('pub_year'):
|
val.append(match)
|
||||||
v['publishedDate'] = publication['bib'].get('pub_year')+"-01-01"
|
|
||||||
else:
|
|
||||||
v['publishedDate'] = ""
|
|
||||||
v['tags'] = []
|
|
||||||
v['rating'] = 0
|
|
||||||
v['series'] = ""
|
|
||||||
v['cover'] = ""
|
|
||||||
v['url'] = publication.get('pub_url') or publication.get('eprint_url') or "",
|
|
||||||
v['source'] = {
|
|
||||||
"id": self.__id__,
|
|
||||||
"description": "Google Scholar",
|
|
||||||
"link": "https://scholar.google.com/"
|
|
||||||
}
|
|
||||||
val.append(v)
|
|
||||||
i += 1
|
|
||||||
if (i >= 10):
|
|
||||||
break
|
|
||||||
return val
|
return val
|
||||||
|
|
||||||
|
def _parse_search_result(
|
||||||
|
self, result: Dict, generic_cover: str, locale: str
|
||||||
|
) -> MetaRecord:
|
||||||
|
match = MetaRecord(
|
||||||
|
id=result.get("pub_url", result.get("eprint_url", "")),
|
||||||
|
title=result["bib"].get("title"),
|
||||||
|
authors=result["bib"].get("author", []),
|
||||||
|
url=result.get("pub_url", result.get("eprint_url", "")),
|
||||||
|
source=MetaSourceInfo(
|
||||||
|
id=self.__id__, description=self.__name__, link=scholar.META_URL
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
match.cover = result.get("image", {}).get("original_url", generic_cover)
|
||||||
|
match.description = result["bib"].get("abstract", "")
|
||||||
|
match.publisher = result["bib"].get("venue", "")
|
||||||
|
match.publishedDate = result["bib"].get("pub_year") + "-01-01"
|
||||||
|
match.identifiers = {"scholar": match.id}
|
||||||
|
return match
|
||||||
|
|
11
cps/opds.py
11
cps/opds.py
|
@ -432,17 +432,9 @@ def feed_languagesindex():
|
||||||
if current_user.filter_language() == u"all":
|
if current_user.filter_language() == u"all":
|
||||||
languages = calibre_db.speaking_language()
|
languages = calibre_db.speaking_language()
|
||||||
else:
|
else:
|
||||||
#try:
|
|
||||||
# cur_l = LC.parse(current_user.filter_language())
|
|
||||||
#except UnknownLocaleError:
|
|
||||||
# cur_l = None
|
|
||||||
languages = calibre_db.session.query(db.Languages).filter(
|
languages = calibre_db.session.query(db.Languages).filter(
|
||||||
db.Languages.lang_code == current_user.filter_language()).all()
|
db.Languages.lang_code == current_user.filter_language()).all()
|
||||||
languages[0].name = isoLanguages.get_language_name(get_locale(), languages[0].lang_code)
|
languages[0].name = isoLanguages.get_language_name(get_locale(), languages[0].lang_code)
|
||||||
#if cur_l:
|
|
||||||
# languages[0].name = cur_l.get_language_name(get_locale())
|
|
||||||
#else:
|
|
||||||
# languages[0].name = _(isoLanguages.get(part3=languages[0].lang_code).name)
|
|
||||||
pagination = Pagination((int(off) / (int(config.config_books_per_page)) + 1), config.config_books_per_page,
|
pagination = Pagination((int(off) / (int(config.config_books_per_page)) + 1), config.config_books_per_page,
|
||||||
len(languages))
|
len(languages))
|
||||||
return render_xml_template('feed.xml', listelements=languages, folder='opds.feed_languages', pagination=pagination)
|
return render_xml_template('feed.xml', listelements=languages, folder='opds.feed_languages', pagination=pagination)
|
||||||
|
@ -530,7 +522,8 @@ def feed_search(term):
|
||||||
entries, __, ___ = calibre_db.get_search_results(term, config_read_column=config.config_read_column)
|
entries, __, ___ = calibre_db.get_search_results(term, config_read_column=config.config_read_column)
|
||||||
entries_count = len(entries) if len(entries) > 0 else 1
|
entries_count = len(entries) if len(entries) > 0 else 1
|
||||||
pagination = Pagination(1, entries_count, entries_count)
|
pagination = Pagination(1, entries_count, entries_count)
|
||||||
return render_xml_template('feed.xml', searchterm=term, entries=entries, pagination=pagination)
|
items = [entry[0] for entry in entries]
|
||||||
|
return render_xml_template('feed.xml', searchterm=term, entries=items, pagination=pagination)
|
||||||
else:
|
else:
|
||||||
return render_xml_template('feed.xml', searchterm="")
|
return render_xml_template('feed.xml', searchterm="")
|
||||||
|
|
||||||
|
|
|
@ -16,25 +16,27 @@
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import importlib
|
|
||||||
import sys
|
|
||||||
import inspect
|
|
||||||
import datetime
|
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
import importlib
|
||||||
|
import inspect
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
# from time import time
|
||||||
|
from dataclasses import asdict
|
||||||
|
|
||||||
from flask import Blueprint, request, Response, url_for
|
from flask import Blueprint, Response, request, url_for
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
from flask_login import login_required
|
from flask_login import login_required
|
||||||
|
from sqlalchemy.exc import InvalidRequestError, OperationalError
|
||||||
from sqlalchemy.orm.attributes import flag_modified
|
from sqlalchemy.orm.attributes import flag_modified
|
||||||
from sqlalchemy.exc import OperationalError, InvalidRequestError
|
|
||||||
|
|
||||||
from . import constants, logger, ub
|
|
||||||
from cps.services.Metadata import Metadata
|
from cps.services.Metadata import Metadata
|
||||||
|
from . import constants, get_locale, logger, ub
|
||||||
|
|
||||||
|
# current_milli_time = lambda: int(round(time() * 1000))
|
||||||
|
|
||||||
meta = Blueprint('metadata', __name__)
|
meta = Blueprint("metadata", __name__)
|
||||||
|
|
||||||
log = logger.create()
|
log = logger.create()
|
||||||
|
|
||||||
|
@ -42,7 +44,7 @@ new_list = list()
|
||||||
meta_dir = os.path.join(constants.BASE_DIR, "cps", "metadata_provider")
|
meta_dir = os.path.join(constants.BASE_DIR, "cps", "metadata_provider")
|
||||||
modules = os.listdir(os.path.join(constants.BASE_DIR, "cps", "metadata_provider"))
|
modules = os.listdir(os.path.join(constants.BASE_DIR, "cps", "metadata_provider"))
|
||||||
for f in modules:
|
for f in modules:
|
||||||
if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith('__init__.py'):
|
if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith("__init__.py"):
|
||||||
a = os.path.basename(f)[:-3]
|
a = os.path.basename(f)[:-3]
|
||||||
try:
|
try:
|
||||||
importlib.import_module("cps.metadata_provider." + a)
|
importlib.import_module("cps.metadata_provider." + a)
|
||||||
|
@ -51,34 +53,46 @@ for f in modules:
|
||||||
log.error("Import error for metadata source: {}".format(a))
|
log.error("Import error for metadata source: {}".format(a))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def list_classes(provider_list):
|
def list_classes(provider_list):
|
||||||
classes = list()
|
classes = list()
|
||||||
for element in provider_list:
|
for element in provider_list:
|
||||||
for name, obj in inspect.getmembers(sys.modules["cps.metadata_provider." + element]):
|
for name, obj in inspect.getmembers(
|
||||||
if inspect.isclass(obj) and name != "Metadata" and issubclass(obj, Metadata):
|
sys.modules["cps.metadata_provider." + element]
|
||||||
|
):
|
||||||
|
if (
|
||||||
|
inspect.isclass(obj)
|
||||||
|
and name != "Metadata"
|
||||||
|
and issubclass(obj, Metadata)
|
||||||
|
):
|
||||||
classes.append(obj())
|
classes.append(obj())
|
||||||
return classes
|
return classes
|
||||||
|
|
||||||
|
|
||||||
cl = list_classes(new_list)
|
cl = list_classes(new_list)
|
||||||
|
|
||||||
|
|
||||||
@meta.route("/metadata/provider")
|
@meta.route("/metadata/provider")
|
||||||
@login_required
|
@login_required
|
||||||
def metadata_provider():
|
def metadata_provider():
|
||||||
active = current_user.view_settings.get('metadata', {})
|
active = current_user.view_settings.get("metadata", {})
|
||||||
provider = list()
|
provider = list()
|
||||||
for c in cl:
|
for c in cl:
|
||||||
ac = active.get(c.__id__, True)
|
ac = active.get(c.__id__, True)
|
||||||
provider.append({"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__})
|
provider.append(
|
||||||
return Response(json.dumps(provider), mimetype='application/json')
|
{"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__}
|
||||||
|
)
|
||||||
|
return Response(json.dumps(provider), mimetype="application/json")
|
||||||
|
|
||||||
@meta.route("/metadata/provider", methods=['POST'])
|
|
||||||
@meta.route("/metadata/provider/<prov_name>", methods=['POST'])
|
@meta.route("/metadata/provider", methods=["POST"])
|
||||||
|
@meta.route("/metadata/provider/<prov_name>", methods=["POST"])
|
||||||
@login_required
|
@login_required
|
||||||
def metadata_change_active_provider(prov_name):
|
def metadata_change_active_provider(prov_name):
|
||||||
new_state = request.get_json()
|
new_state = request.get_json()
|
||||||
active = current_user.view_settings.get('metadata', {})
|
active = current_user.view_settings.get("metadata", {})
|
||||||
active[new_state['id']] = new_state['value']
|
active[new_state["id"]] = new_state["value"]
|
||||||
current_user.view_settings['metadata'] = active
|
current_user.view_settings["metadata"] = active
|
||||||
try:
|
try:
|
||||||
try:
|
try:
|
||||||
flag_modified(current_user, "view_settings")
|
flag_modified(current_user, "view_settings")
|
||||||
|
@ -89,29 +103,33 @@ def metadata_change_active_provider(prov_name):
|
||||||
log.error("Invalid request received: {}".format(request))
|
log.error("Invalid request received: {}".format(request))
|
||||||
return "Invalid request", 400
|
return "Invalid request", 400
|
||||||
if "initial" in new_state and prov_name:
|
if "initial" in new_state and prov_name:
|
||||||
for c in cl:
|
data = []
|
||||||
if c.__id__ == prov_name:
|
provider = next((c for c in cl if c.__id__ == prov_name), None)
|
||||||
data = c.search(new_state.get('query', ""))
|
if provider is not None:
|
||||||
break
|
data = provider.search(new_state.get("query", ""))
|
||||||
return Response(json.dumps(data), mimetype='application/json')
|
return Response(
|
||||||
|
json.dumps([asdict(x) for x in data]), mimetype="application/json"
|
||||||
|
)
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
@meta.route("/metadata/search", methods=['POST'])
|
|
||||||
|
@meta.route("/metadata/search", methods=["POST"])
|
||||||
@login_required
|
@login_required
|
||||||
def metadata_search():
|
def metadata_search():
|
||||||
query = request.form.to_dict().get('query')
|
query = request.form.to_dict().get("query")
|
||||||
data = list()
|
data = list()
|
||||||
active = current_user.view_settings.get('metadata', {})
|
active = current_user.view_settings.get("metadata", {})
|
||||||
|
locale = get_locale()
|
||||||
if query:
|
if query:
|
||||||
generic_cover = ""
|
static_cover = url_for("static", filename="generic_cover.jpg")
|
||||||
|
# start = current_milli_time()
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||||
meta = {executor.submit(c.search, query, generic_cover): c for c in cl if active.get(c.__id__, True)}
|
meta = {
|
||||||
|
executor.submit(c.search, query, static_cover, locale): c
|
||||||
|
for c in cl
|
||||||
|
if active.get(c.__id__, True)
|
||||||
|
}
|
||||||
for future in concurrent.futures.as_completed(meta):
|
for future in concurrent.futures.as_completed(meta):
|
||||||
data.extend(future.result())
|
data.extend([asdict(x) for x in future.result()])
|
||||||
return Response(json.dumps(data), mimetype='application/json')
|
# log.info({'Time elapsed {}'.format(current_milli_time()-start)})
|
||||||
|
return Response(json.dumps(data), mimetype="application/json")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -15,13 +15,93 @@
|
||||||
#
|
#
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
import abc
|
||||||
|
import dataclasses
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import Dict, Generator, List, Optional, Union
|
||||||
|
|
||||||
|
from cps import constants
|
||||||
|
|
||||||
|
|
||||||
class Metadata():
|
@dataclasses.dataclass
|
||||||
|
class MetaSourceInfo:
|
||||||
|
id: str
|
||||||
|
description: str
|
||||||
|
link: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class MetaRecord:
|
||||||
|
id: Union[str, int]
|
||||||
|
title: str
|
||||||
|
authors: List[str]
|
||||||
|
url: str
|
||||||
|
source: MetaSourceInfo
|
||||||
|
cover: str = os.path.join(constants.STATIC_DIR, 'generic_cover.jpg')
|
||||||
|
description: Optional[str] = ""
|
||||||
|
series: Optional[str] = None
|
||||||
|
series_index: Optional[Union[int, float]] = 0
|
||||||
|
identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict)
|
||||||
|
publisher: Optional[str] = None
|
||||||
|
publishedDate: Optional[str] = None
|
||||||
|
rating: Optional[int] = 0
|
||||||
|
languages: Optional[List[str]] = dataclasses.field(default_factory=list)
|
||||||
|
tags: Optional[List[str]] = dataclasses.field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
class Metadata:
|
||||||
__name__ = "Generic"
|
__name__ = "Generic"
|
||||||
|
__id__ = "generic"
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.active = True
|
self.active = True
|
||||||
|
|
||||||
def set_status(self, state):
|
def set_status(self, state):
|
||||||
self.active = state
|
self.active = state
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def search(
|
||||||
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
|
) -> Optional[List[MetaRecord]]:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_title_tokens(
|
||||||
|
title: str, strip_joiners: bool = True
|
||||||
|
) -> Generator[str, None, None]:
|
||||||
|
"""
|
||||||
|
Taken from calibre source code
|
||||||
|
"""
|
||||||
|
title_patterns = [
|
||||||
|
(re.compile(pat, re.IGNORECASE), repl)
|
||||||
|
for pat, repl in [
|
||||||
|
# Remove things like: (2010) (Omnibus) etc.
|
||||||
|
(
|
||||||
|
r"(?i)[({\[](\d{4}|omnibus|anthology|hardcover|"
|
||||||
|
r"audiobook|audio\scd|paperback|turtleback|"
|
||||||
|
r"mass\s*market|edition|ed\.)[\])}]",
|
||||||
|
"",
|
||||||
|
),
|
||||||
|
# Remove any strings that contain the substring edition inside
|
||||||
|
# parentheses
|
||||||
|
(r"(?i)[({\[].*?(edition|ed.).*?[\]})]", ""),
|
||||||
|
# Remove commas used a separators in numbers
|
||||||
|
(r"(\d+),(\d+)", r"\1\2"),
|
||||||
|
# Remove hyphens only if they have whitespace before them
|
||||||
|
(r"(\s-)", " "),
|
||||||
|
# Replace other special chars with a space
|
||||||
|
(r"""[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”""", " "),
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
for pat, repl in title_patterns:
|
||||||
|
title = pat.sub(repl, title)
|
||||||
|
|
||||||
|
tokens = title.split()
|
||||||
|
for token in tokens:
|
||||||
|
token = token.strip().strip('"').strip("'")
|
||||||
|
if token and (
|
||||||
|
not strip_joiners or token.lower() not in ("a", "and", "the", "&")
|
||||||
|
):
|
||||||
|
yield token
|
||||||
|
|
|
@ -26,19 +26,26 @@ $(function () {
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function getUniqueValues(attribute_name, book){
|
||||||
|
var presentArray = $.map($("#"+attribute_name).val().split(","), $.trim);
|
||||||
|
if ( presentArray.length === 1 && presentArray[0] === "") {
|
||||||
|
presentArray = [];
|
||||||
|
}
|
||||||
|
$.each(book[attribute_name], function(i, el) {
|
||||||
|
if ($.inArray(el, presentArray) === -1) presentArray.push(el);
|
||||||
|
});
|
||||||
|
return presentArray
|
||||||
|
}
|
||||||
|
|
||||||
function populateForm (book) {
|
function populateForm (book) {
|
||||||
tinymce.get("description").setContent(book.description);
|
tinymce.get("description").setContent(book.description);
|
||||||
var uniqueTags = $.map($("#tags").val().split(","), $.trim);
|
var uniqueTags = getUniqueValues('tags', book)
|
||||||
if ( uniqueTags.length == 1 && uniqueTags[0] == "") {
|
var uniqueLanguages = getUniqueValues('languages', book)
|
||||||
uniqueTags = [];
|
|
||||||
}
|
|
||||||
$.each(book.tags, function(i, el) {
|
|
||||||
if ($.inArray(el, uniqueTags) === -1) uniqueTags.push(el);
|
|
||||||
});
|
|
||||||
var ampSeparatedAuthors = (book.authors || []).join(" & ");
|
var ampSeparatedAuthors = (book.authors || []).join(" & ");
|
||||||
$("#bookAuthor").val(ampSeparatedAuthors);
|
$("#bookAuthor").val(ampSeparatedAuthors);
|
||||||
$("#book_title").val(book.title);
|
$("#book_title").val(book.title);
|
||||||
$("#tags").val(uniqueTags.join(", "));
|
$("#tags").val(uniqueTags.join(", "));
|
||||||
|
$("#languages").val(uniqueLanguages.join(", "));
|
||||||
$("#rating").data("rating").setValue(Math.round(book.rating));
|
$("#rating").data("rating").setValue(Math.round(book.rating));
|
||||||
if(book.cover && $("#cover_url").length){
|
if(book.cover && $("#cover_url").length){
|
||||||
$(".cover img").attr("src", book.cover);
|
$(".cover img").attr("src", book.cover);
|
||||||
|
@ -48,7 +55,32 @@ $(function () {
|
||||||
$("#publisher").val(book.publisher);
|
$("#publisher").val(book.publisher);
|
||||||
if (typeof book.series !== "undefined") {
|
if (typeof book.series !== "undefined") {
|
||||||
$("#series").val(book.series);
|
$("#series").val(book.series);
|
||||||
|
$("#series_index").val(book.series_index);
|
||||||
}
|
}
|
||||||
|
if (typeof book.identifiers !== "undefined") {
|
||||||
|
populateIdentifiers(book.identifiers)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function populateIdentifiers(identifiers){
|
||||||
|
for (const property in identifiers) {
|
||||||
|
console.log(`${property}: ${identifiers[property]}`);
|
||||||
|
if ($('input[name="identifier-type-'+property+'"]').length) {
|
||||||
|
$('input[name="identifier-val-'+property+'"]').val(identifiers[property])
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
addIdentifier(property, identifiers[property])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function addIdentifier(name, value){
|
||||||
|
var line = '<tr>';
|
||||||
|
line += '<td><input type="text" class="form-control" name="identifier-type-'+ name +'" required="required" placeholder="' + _("Identifier Type") +'" value="'+ name +'"></td>';
|
||||||
|
line += '<td><input type="text" class="form-control" name="identifier-val-'+ name +'" required="required" placeholder="' + _("Identifier Value") +'" value="'+ value +'"></td>';
|
||||||
|
line += '<td><a class="btn btn-default" onclick="removeIdentifierLine(this)">'+_("Remove")+'</a></td>';
|
||||||
|
line += '</tr>';
|
||||||
|
$("#identifier-table").append(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
function doSearch (keyword) {
|
function doSearch (keyword) {
|
||||||
|
|
|
@ -40,35 +40,35 @@
|
||||||
{% if entries and entries[0] %}
|
{% if entries and entries[0] %}
|
||||||
{% for entry in entries %}
|
{% for entry in entries %}
|
||||||
<entry>
|
<entry>
|
||||||
<title>{{entry[0].title}}</title>
|
<title>{{entry.title}}</title>
|
||||||
<id>urn:uuid:{{entry[0].uuid}}</id>
|
<id>urn:uuid:{{entry.uuid}}</id>
|
||||||
<updated>{{entry[0].atom_timestamp}}</updated>
|
<updated>{{entry.atom_timestamp}}</updated>
|
||||||
{% if entry[0].authors.__len__() > 0 %}
|
{% if entry.authors.__len__() > 0 %}
|
||||||
<author>
|
<author>
|
||||||
<name>{{entry[0].authors[0].name}}</name>
|
<name>{{entry.authors[0].name}}</name>
|
||||||
</author>
|
</author>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if entry[0].publishers.__len__() > 0 %}
|
{% if entry.publishers.__len__() > 0 %}
|
||||||
<publisher>
|
<publisher>
|
||||||
<name>{{entry[0].publishers[0].name}}</name>
|
<name>{{entry.publishers[0].name}}</name>
|
||||||
</publisher>
|
</publisher>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% for lang in entry[0].languages %}
|
{% for lang in entry.languages %}
|
||||||
<dcterms:language>{{lang.lang_code}}</dcterms:language>
|
<dcterms:language>{{lang.lang_code}}</dcterms:language>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% for tag in entry[0].tags %}
|
{% for tag in entry.tags %}
|
||||||
<category scheme="http://www.bisg.org/standards/bisac_subject/index.html"
|
<category scheme="http://www.bisg.org/standards/bisac_subject/index.html"
|
||||||
term="{{tag.name}}"
|
term="{{tag.name}}"
|
||||||
label="{{tag.name}}"/>
|
label="{{tag.name}}"/>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% if entry[0].comments[0] %}<summary>{{entry[0].comments[0].text|striptags}}</summary>{% endif %}
|
{% if entry.comments[0] %}<summary>{{entry.comments[0].text|striptags}}</summary>{% endif %}
|
||||||
{% if entry[0].has_cover %}
|
{% if entry.has_cover %}
|
||||||
<link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry[0].id)}}" rel="http://opds-spec.org/image"/>
|
<link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry.id)}}" rel="http://opds-spec.org/image"/>
|
||||||
<link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry[0].id)}}" rel="http://opds-spec.org/image/thumbnail"/>
|
<link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry.id)}}" rel="http://opds-spec.org/image/thumbnail"/>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% for format in entry[0].data %}
|
{% for format in entry.data %}
|
||||||
<link rel="http://opds-spec.org/acquisition" href="{{ url_for('opds.opds_download_link', book_id=entry[0].id, book_format=format.format|lower)}}"
|
<link rel="http://opds-spec.org/acquisition" href="{{ url_for('opds.opds_download_link', book_id=entry.id, book_format=format.format|lower)}}"
|
||||||
length="{{format.uncompressed_size}}" mtime="{{entry[0].atom_timestamp}}" type="{{format.format|lower|mimetype}}"/>
|
length="{{format.uncompressed_size}}" mtime="{{entry.atom_timestamp}}" type="{{format.format|lower|mimetype}}"/>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</entry>
|
</entry>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
|
@ -31,6 +31,9 @@ SQLAlchemy-Utils>=0.33.5,<0.39.0
|
||||||
# metadata extraction
|
# metadata extraction
|
||||||
rarfile>=2.7
|
rarfile>=2.7
|
||||||
scholarly>=1.2.0,<1.6
|
scholarly>=1.2.0,<1.6
|
||||||
|
markdown2==2.4.2
|
||||||
|
html2text==2020.1.16
|
||||||
|
python-dateutil==2.8.2
|
||||||
|
|
||||||
# Comics
|
# Comics
|
||||||
natsort>=2.2.0,<8.1.0
|
natsort>=2.2.0,<8.1.0
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user