Merge remote-tracking branch 'lubimyczytac/add_lubimyczytac.pl_meta_provider' into Develop
# Conflicts: # optional-requirements.txt
This commit is contained in:
		
						commit
						4f3c396450
					
				| 
						 | 
				
			
			@ -17,49 +17,68 @@
 | 
			
		|||
#  along with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
# ComicVine api document: https://comicvine.gamespot.com/api/documentation
 | 
			
		||||
from typing import Dict, List, Optional
 | 
			
		||||
from urllib.parse import quote
 | 
			
		||||
 | 
			
		||||
import requests
 | 
			
		||||
from cps.services.Metadata import Metadata
 | 
			
		||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ComicVine(Metadata):
 | 
			
		||||
    __name__ = "ComicVine"
 | 
			
		||||
    __id__ = "comicvine"
 | 
			
		||||
    DESCRIPTION = "ComicVine Books"
 | 
			
		||||
    META_URL = "https://comicvine.gamespot.com/"
 | 
			
		||||
    API_KEY = "57558043c53943d5d1e96a9ad425b0eb85532ee6"
 | 
			
		||||
    BASE_URL = (
 | 
			
		||||
        f"https://comicvine.gamespot.com/api/search?api_key={API_KEY}"
 | 
			
		||||
        f"&resources=issue&query="
 | 
			
		||||
    )
 | 
			
		||||
    QUERY_PARAMS = "&sort=name:desc&format=json"
 | 
			
		||||
    HEADERS = {"User-Agent": "Not Evil Browser"}
 | 
			
		||||
 | 
			
		||||
    def search(self, query, generic_cover=""):
 | 
			
		||||
    def search(
 | 
			
		||||
        self, query: str, generic_cover: str = "", locale: str = "en"
 | 
			
		||||
    ) -> Optional[List[MetaRecord]]:
 | 
			
		||||
        val = list()
 | 
			
		||||
        apikey = "57558043c53943d5d1e96a9ad425b0eb85532ee6"
 | 
			
		||||
        if self.active:
 | 
			
		||||
            headers = {
 | 
			
		||||
                'User-Agent': 'Not Evil Browser'
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            result = requests.get("https://comicvine.gamespot.com/api/search?api_key="
 | 
			
		||||
                                  + apikey + "&resources=issue&query=" + query + "&sort=name:desc&format=json", headers=headers)
 | 
			
		||||
            for r in result.json().get('results'):
 | 
			
		||||
                seriesTitle = r['volume'].get('name', "")
 | 
			
		||||
                if r.get('store_date'):
 | 
			
		||||
                    dateFomers = r.get('store_date')
 | 
			
		||||
                else:
 | 
			
		||||
                    dateFomers = r.get('date_added')
 | 
			
		||||
                v = dict()
 | 
			
		||||
                v['id'] = r['id']
 | 
			
		||||
                v['title'] = seriesTitle + " #" + r.get('issue_number', "0") + " - " + ( r.get('name', "") or "")
 | 
			
		||||
                v['authors'] = r.get('authors', [])
 | 
			
		||||
                v['description'] = r.get('description', "")
 | 
			
		||||
                v['publisher'] = ""
 | 
			
		||||
                v['publishedDate'] = dateFomers
 | 
			
		||||
                v['tags'] = ["Comics", seriesTitle]
 | 
			
		||||
                v['rating'] = 0
 | 
			
		||||
                v['series'] = seriesTitle
 | 
			
		||||
                v['cover'] = r['image'].get('original_url')
 | 
			
		||||
                v['source'] = {
 | 
			
		||||
                    "id": self.__id__,
 | 
			
		||||
                    "description": "ComicVine Books",
 | 
			
		||||
                    "link": "https://comicvine.gamespot.com/"
 | 
			
		||||
                }
 | 
			
		||||
                v['url'] = r.get('site_detail_url', "")
 | 
			
		||||
                val.append(v)
 | 
			
		||||
            title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
 | 
			
		||||
            if title_tokens:
 | 
			
		||||
                tokens = [quote(t.encode("utf-8")) for t in title_tokens]
 | 
			
		||||
                query = "%20".join(tokens)
 | 
			
		||||
            result = requests.get(
 | 
			
		||||
                f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}",
 | 
			
		||||
                headers=ComicVine.HEADERS,
 | 
			
		||||
            )
 | 
			
		||||
            for result in result.json()["results"]:
 | 
			
		||||
                match = self._parse_search_result(
 | 
			
		||||
                    result=result, generic_cover=generic_cover, locale=locale
 | 
			
		||||
                )
 | 
			
		||||
                val.append(match)
 | 
			
		||||
        return val
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def _parse_search_result(
 | 
			
		||||
        self, result: Dict, generic_cover: str, locale: str
 | 
			
		||||
    ) -> MetaRecord:
 | 
			
		||||
        series = result["volume"].get("name", "")
 | 
			
		||||
        series_index = result.get("issue_number", 0)
 | 
			
		||||
        issue_name = result.get("name", "")
 | 
			
		||||
        match = MetaRecord(
 | 
			
		||||
            id=result["id"],
 | 
			
		||||
            title=f"{series}#{series_index} - {issue_name}",
 | 
			
		||||
            authors=result.get("authors", []),
 | 
			
		||||
            url=result.get("site_detail_url", ""),
 | 
			
		||||
            source=MetaSourceInfo(
 | 
			
		||||
                id=self.__id__,
 | 
			
		||||
                description=ComicVine.DESCRIPTION,
 | 
			
		||||
                link=ComicVine.META_URL,
 | 
			
		||||
            ),
 | 
			
		||||
            series=series,
 | 
			
		||||
        )
 | 
			
		||||
        match.cover = result["image"].get("original_url", generic_cover)
 | 
			
		||||
        match.description = result.get("description", "")
 | 
			
		||||
        match.publishedDate = result.get("store_date", result.get("date_added"))
 | 
			
		||||
        match.series_index = series_index
 | 
			
		||||
        match.tags = ["Comics", series]
 | 
			
		||||
        match.identifiers = {"comicvine": match.id}
 | 
			
		||||
        return match
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -17,39 +17,93 @@
 | 
			
		|||
#  along with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
# Google Books api document: https://developers.google.com/books/docs/v1/using
 | 
			
		||||
 | 
			
		||||
from typing import Dict, List, Optional
 | 
			
		||||
from urllib.parse import quote
 | 
			
		||||
 | 
			
		||||
import requests
 | 
			
		||||
from cps.services.Metadata import Metadata
 | 
			
		||||
 | 
			
		||||
from cps.isoLanguages import get_lang3, get_language_name
 | 
			
		||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Google(Metadata):
 | 
			
		||||
    __name__ = "Google"
 | 
			
		||||
    __id__ = "google"
 | 
			
		||||
    DESCRIPTION = "Google Books"
 | 
			
		||||
    META_URL = "https://books.google.com/"
 | 
			
		||||
    BOOK_URL = "https://books.google.com/books?id="
 | 
			
		||||
    SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q="
 | 
			
		||||
    ISBN_TYPE = "ISBN_13"
 | 
			
		||||
 | 
			
		||||
    def search(self, query, generic_cover=""):
 | 
			
		||||
        if self.active:
 | 
			
		||||
    def search(
 | 
			
		||||
        self, query: str, generic_cover: str = "", locale: str = "en"
 | 
			
		||||
    ) -> Optional[List[MetaRecord]]:
 | 
			
		||||
        val = list()    
 | 
			
		||||
            result = requests.get("https://www.googleapis.com/books/v1/volumes?q="+query.replace(" ","+"))
 | 
			
		||||
            for r in result.json().get('items'):
 | 
			
		||||
                v = dict()
 | 
			
		||||
                v['id'] = r['id']
 | 
			
		||||
                v['title'] = r['volumeInfo'].get('title',"")
 | 
			
		||||
                v['authors'] = r['volumeInfo'].get('authors', [])
 | 
			
		||||
                v['description'] = r['volumeInfo'].get('description', "")
 | 
			
		||||
                v['publisher'] = r['volumeInfo'].get('publisher', "")
 | 
			
		||||
                v['publishedDate'] = r['volumeInfo'].get('publishedDate', "")
 | 
			
		||||
                v['tags'] = r['volumeInfo'].get('categories', [])
 | 
			
		||||
                v['rating'] = r['volumeInfo'].get('averageRating', 0)
 | 
			
		||||
                if r['volumeInfo'].get('imageLinks'):
 | 
			
		||||
                    v['cover'] = r['volumeInfo']['imageLinks']['thumbnail'].replace("http://", "https://")
 | 
			
		||||
                else:
 | 
			
		||||
                    v['cover'] = "/../../../static/generic_cover.jpg"
 | 
			
		||||
                v['source'] = {
 | 
			
		||||
                    "id": self.__id__,
 | 
			
		||||
                    "description": "Google Books",
 | 
			
		||||
                    "link": "https://books.google.com/"}
 | 
			
		||||
                v['url'] = "https://books.google.com/books?id=" + r['id']
 | 
			
		||||
                val.append(v)
 | 
			
		||||
        if self.active:
 | 
			
		||||
 | 
			
		||||
            title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
 | 
			
		||||
            if title_tokens:
 | 
			
		||||
                tokens = [quote(t.encode("utf-8")) for t in title_tokens]
 | 
			
		||||
                query = "+".join(tokens)
 | 
			
		||||
            results = requests.get(Google.SEARCH_URL + query)
 | 
			
		||||
            for result in results.json()["items"]:
 | 
			
		||||
                val.append(
 | 
			
		||||
                    self._parse_search_result(
 | 
			
		||||
                        result=result, generic_cover=generic_cover, locale=locale
 | 
			
		||||
                    )
 | 
			
		||||
                )
 | 
			
		||||
        return val
 | 
			
		||||
 | 
			
		||||
    def _parse_search_result(
 | 
			
		||||
        self, result: Dict, generic_cover: str, locale: str
 | 
			
		||||
    ) -> MetaRecord:
 | 
			
		||||
        match = MetaRecord(
 | 
			
		||||
            id=result["id"],
 | 
			
		||||
            title=result["volumeInfo"]["title"],
 | 
			
		||||
            authors=result["volumeInfo"].get("authors", []),
 | 
			
		||||
            url=Google.BOOK_URL + result["id"],
 | 
			
		||||
            source=MetaSourceInfo(
 | 
			
		||||
                id=self.__id__,
 | 
			
		||||
                description=Google.DESCRIPTION,
 | 
			
		||||
                link=Google.META_URL,
 | 
			
		||||
            ),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        match.cover = self._parse_cover(result=result, generic_cover=generic_cover)
 | 
			
		||||
        match.description = result["volumeInfo"].get("description", "")
 | 
			
		||||
        match.languages = self._parse_languages(result=result, locale=locale)
 | 
			
		||||
        match.publisher = result["volumeInfo"].get("publisher", "")
 | 
			
		||||
        match.publishedDate = result["volumeInfo"].get("publishedDate", "")
 | 
			
		||||
        match.rating = result["volumeInfo"].get("averageRating", 0)
 | 
			
		||||
        match.series, match.series_index = "", 1
 | 
			
		||||
        match.tags = result["volumeInfo"].get("categories", [])
 | 
			
		||||
 | 
			
		||||
        match.identifiers = {"google": match.id}
 | 
			
		||||
        match = self._parse_isbn(result=result, match=match)
 | 
			
		||||
        return match
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _parse_isbn(result: Dict, match: MetaRecord) -> MetaRecord:
 | 
			
		||||
        identifiers = result["volumeInfo"].get("industryIdentifiers", [])
 | 
			
		||||
        for identifier in identifiers:
 | 
			
		||||
            if identifier.get("type") == Google.ISBN_TYPE:
 | 
			
		||||
                match.identifiers["isbn"] = identifier.get("identifier")
 | 
			
		||||
                break
 | 
			
		||||
        return match
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _parse_cover(result: Dict, generic_cover: str) -> str:
 | 
			
		||||
        if result["volumeInfo"].get("imageLinks"):
 | 
			
		||||
            cover_url = result["volumeInfo"]["imageLinks"]["thumbnail"]
 | 
			
		||||
            return cover_url.replace("http://", "https://")
 | 
			
		||||
        return generic_cover
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def _parse_languages(result: Dict, locale: str) -> List[str]:
 | 
			
		||||
        language_iso2 = result["volumeInfo"].get("language", "")
 | 
			
		||||
        languages = (
 | 
			
		||||
            [get_language_name(locale, get_lang3(language_iso2))]
 | 
			
		||||
            if language_iso2
 | 
			
		||||
            else []
 | 
			
		||||
        )
 | 
			
		||||
        return languages
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										337
									
								
								cps/metadata_provider/lubimyczytac.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										337
									
								
								cps/metadata_provider/lubimyczytac.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,337 @@
 | 
			
		|||
# -*- coding: utf-8 -*-
 | 
			
		||||
#  This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
 | 
			
		||||
#    Copyright (C) 2021 OzzieIsaacs
 | 
			
		||||
#
 | 
			
		||||
#  This program is free software: you can redistribute it and/or modify
 | 
			
		||||
#  it under the terms of the GNU General Public License as published by
 | 
			
		||||
#  the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
#  (at your option) any later version.
 | 
			
		||||
#
 | 
			
		||||
#  This program is distributed in the hope that it will be useful,
 | 
			
		||||
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
#  GNU General Public License for more details.
 | 
			
		||||
#
 | 
			
		||||
#  You should have received a copy of the GNU General Public License
 | 
			
		||||
#  along with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
import datetime
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
from multiprocessing.pool import ThreadPool
 | 
			
		||||
from typing import List, Optional, Tuple, Union
 | 
			
		||||
from urllib.parse import quote
 | 
			
		||||
 | 
			
		||||
import requests
 | 
			
		||||
from dateutil import parser
 | 
			
		||||
from html2text import HTML2Text
 | 
			
		||||
from lxml.html import HtmlElement, fromstring, tostring
 | 
			
		||||
from markdown2 import Markdown
 | 
			
		||||
 | 
			
		||||
from cps.isoLanguages import get_language_name
 | 
			
		||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
 | 
			
		||||
 | 
			
		||||
SYMBOLS_TO_TRANSLATE = (
 | 
			
		||||
    "öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃóÓśŚźŹżŻ",
 | 
			
		||||
    "oOuUoOoOuUeEaAuUiIaAcCeElLnNoOsSzZzZ",
 | 
			
		||||
)
 | 
			
		||||
SYMBOL_TRANSLATION_MAP = dict(
 | 
			
		||||
    [(ord(a), ord(b)) for (a, b) in zip(*SYMBOLS_TO_TRANSLATE)]
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_int_or_float(value: str) -> Union[int, float]:
 | 
			
		||||
    number_as_float = float(value)
 | 
			
		||||
    number_as_int = int(number_as_float)
 | 
			
		||||
    return number_as_int if number_as_float == number_as_int else number_as_float
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def strip_accents(s: Optional[str]) -> Optional[str]:
 | 
			
		||||
    return s.translate(SYMBOL_TRANSLATION_MAP) if s is not None else s
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def sanitize_comments_html(html: str) -> str:
 | 
			
		||||
    text = html2text(html)
 | 
			
		||||
    md = Markdown()
 | 
			
		||||
    html = md.convert(text)
 | 
			
		||||
    return html
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def html2text(html: str) -> str:
 | 
			
		||||
    # replace <u> tags with <span> as <u> becomes emphasis in html2text
 | 
			
		||||
    if isinstance(html, bytes):
 | 
			
		||||
        html = html.decode("utf-8")
 | 
			
		||||
    html = re.sub(
 | 
			
		||||
        r"<\s*(?P<solidus>/?)\s*[uU]\b(?P<rest>[^>]*)>",
 | 
			
		||||
        r"<\g<solidus>span\g<rest>>",
 | 
			
		||||
        html,
 | 
			
		||||
    )
 | 
			
		||||
    h2t = HTML2Text()
 | 
			
		||||
    h2t.body_width = 0
 | 
			
		||||
    h2t.single_line_break = True
 | 
			
		||||
    h2t.emphasis_mark = "*"
 | 
			
		||||
    return h2t.handle(html)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LubimyCzytac(Metadata):
 | 
			
		||||
    __name__ = "LubimyCzytac.pl"
 | 
			
		||||
    __id__ = "lubimyczytac"
 | 
			
		||||
 | 
			
		||||
    BASE_URL = "https://lubimyczytac.pl"
 | 
			
		||||
 | 
			
		||||
    BOOK_SEARCH_RESULT_XPATH = (
 | 
			
		||||
        "*//div[@class='listSearch']//div[@class='authorAllBooks__single']"
 | 
			
		||||
    )
 | 
			
		||||
    SINGLE_BOOK_RESULT_XPATH = ".//div[contains(@class,'authorAllBooks__singleText')]"
 | 
			
		||||
    TITLE_PATH = "/div/a[contains(@class,'authorAllBooks__singleTextTitle')]"
 | 
			
		||||
    TITLE_TEXT_PATH = f"{TITLE_PATH}//text()"
 | 
			
		||||
    URL_PATH = f"{TITLE_PATH}/@href"
 | 
			
		||||
    AUTHORS_PATH = "/div/a[contains(@href,'autor')]//text()"
 | 
			
		||||
 | 
			
		||||
    SIBLINGS = "/following-sibling::dd"
 | 
			
		||||
 | 
			
		||||
    CONTAINER = "//section[@class='container book']"
 | 
			
		||||
    PUBLISHER = f"{CONTAINER}//dt[contains(text(),'Wydawnictwo:')]{SIBLINGS}/a/text()"
 | 
			
		||||
    LANGUAGES = f"{CONTAINER}//dt[contains(text(),'Język:')]{SIBLINGS}/text()"
 | 
			
		||||
    DESCRIPTION = f"{CONTAINER}//div[@class='collapse-content']"
 | 
			
		||||
    SERIES = f"{CONTAINER}//span/a[contains(@href,'/cykl/')]/text()"
 | 
			
		||||
 | 
			
		||||
    DETAILS = "//div[@id='book-details']"
 | 
			
		||||
    PUBLISH_DATE = "//dt[contains(@title,'Data pierwszego wydania"
 | 
			
		||||
    FIRST_PUBLISH_DATE = f"{DETAILS}{PUBLISH_DATE} oryginalnego')]{SIBLINGS}[1]/text()"
 | 
			
		||||
    FIRST_PUBLISH_DATE_PL = f"{DETAILS}{PUBLISH_DATE} polskiego')]{SIBLINGS}[1]/text()"
 | 
			
		||||
    TAGS = "//nav[@aria-label='breadcrumb']//a[contains(@href,'/ksiazki/k/')]/text()"
 | 
			
		||||
 | 
			
		||||
    RATING = "//meta[@property='books:rating:value']/@content"
 | 
			
		||||
    COVER = "//meta[@property='og:image']/@content"
 | 
			
		||||
    ISBN = "//meta[@property='books:isbn']/@content"
 | 
			
		||||
    META_TITLE = "//meta[@property='og:description']/@content"
 | 
			
		||||
 | 
			
		||||
    SUMMARY = "//script[@type='application/ld+json']//text()"
 | 
			
		||||
 | 
			
		||||
    def search(
 | 
			
		||||
        self, query: str, generic_cover: str = "", locale: str = "en"
 | 
			
		||||
    ) -> Optional[List[MetaRecord]]:
 | 
			
		||||
        if self.active:
 | 
			
		||||
            result = requests.get(self._prepare_query(title=query))
 | 
			
		||||
            root = fromstring(result.text)
 | 
			
		||||
            lc_parser = LubimyCzytacParser(root=root, metadata=self)
 | 
			
		||||
            matches = lc_parser.parse_search_results()
 | 
			
		||||
            if matches:
 | 
			
		||||
                with ThreadPool(processes=10) as pool:
 | 
			
		||||
                    final_matches = pool.starmap(
 | 
			
		||||
                        lc_parser.parse_single_book,
 | 
			
		||||
                        [(match, generic_cover, locale) for match in matches],
 | 
			
		||||
                    )
 | 
			
		||||
                return final_matches
 | 
			
		||||
            return matches
 | 
			
		||||
 | 
			
		||||
    def _prepare_query(self, title: str) -> str:
 | 
			
		||||
        query = ""
 | 
			
		||||
        characters_to_remove = "\?()\/"
 | 
			
		||||
        pattern = "[" + characters_to_remove + "]"
 | 
			
		||||
        title = re.sub(pattern, "", title)
 | 
			
		||||
        title = title.replace("_", " ")
 | 
			
		||||
        if '"' in title or ",," in title:
 | 
			
		||||
            title = title.split('"')[0].split(",,")[0]
 | 
			
		||||
 | 
			
		||||
        if "/" in title:
 | 
			
		||||
            title_tokens = [
 | 
			
		||||
                token for token in title.lower().split(" ") if len(token) > 1
 | 
			
		||||
            ]
 | 
			
		||||
        else:
 | 
			
		||||
            title_tokens = list(self.get_title_tokens(title, strip_joiners=False))
 | 
			
		||||
        if title_tokens:
 | 
			
		||||
            tokens = [quote(t.encode("utf-8")) for t in title_tokens]
 | 
			
		||||
            query = query + "%20".join(tokens)
 | 
			
		||||
        if not query:
 | 
			
		||||
            return ""
 | 
			
		||||
        return f"{LubimyCzytac.BASE_URL}/szukaj/ksiazki?phrase={query}"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LubimyCzytacParser:
 | 
			
		||||
    PAGES_TEMPLATE = "<p id='strony'>Książka ma {0} stron(y).</p>"
 | 
			
		||||
    PUBLISH_DATE_TEMPLATE = "<p id='pierwsze_wydanie'>Data pierwszego wydania: {0}</p>"
 | 
			
		||||
    PUBLISH_DATE_PL_TEMPLATE = (
 | 
			
		||||
        "<p id='pierwsze_wydanie'>Data pierwszego wydania w Polsce: {0}</p>"
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    def __init__(self, root: HtmlElement, metadata: Metadata) -> None:
 | 
			
		||||
        self.root = root
 | 
			
		||||
        self.metadata = metadata
 | 
			
		||||
 | 
			
		||||
    def parse_search_results(self) -> List[MetaRecord]:
 | 
			
		||||
        matches = []
 | 
			
		||||
        results = self.root.xpath(LubimyCzytac.BOOK_SEARCH_RESULT_XPATH)
 | 
			
		||||
        for result in results:
 | 
			
		||||
            title = self._parse_xpath_node(
 | 
			
		||||
                root=result,
 | 
			
		||||
                xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
 | 
			
		||||
                f"{LubimyCzytac.TITLE_TEXT_PATH}",
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            book_url = self._parse_xpath_node(
 | 
			
		||||
                root=result,
 | 
			
		||||
                xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
 | 
			
		||||
                f"{LubimyCzytac.URL_PATH}",
 | 
			
		||||
            )
 | 
			
		||||
            authors = self._parse_xpath_node(
 | 
			
		||||
                root=result,
 | 
			
		||||
                xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
 | 
			
		||||
                f"{LubimyCzytac.AUTHORS_PATH}",
 | 
			
		||||
                take_first=False,
 | 
			
		||||
            )
 | 
			
		||||
            if not all([title, book_url, authors]):
 | 
			
		||||
                continue
 | 
			
		||||
            matches.append(
 | 
			
		||||
                MetaRecord(
 | 
			
		||||
                    id=book_url.replace(f"/ksiazka/", "").split("/")[0],
 | 
			
		||||
                    title=title,
 | 
			
		||||
                    authors=[strip_accents(author) for author in authors],
 | 
			
		||||
                    url=LubimyCzytac.BASE_URL + book_url,
 | 
			
		||||
                    source=MetaSourceInfo(
 | 
			
		||||
                        id=self.metadata.__id__,
 | 
			
		||||
                        description=self.metadata.__name__,
 | 
			
		||||
                        link=LubimyCzytac.BASE_URL,
 | 
			
		||||
                    ),
 | 
			
		||||
                )
 | 
			
		||||
            )
 | 
			
		||||
        return matches
 | 
			
		||||
 | 
			
		||||
    def parse_single_book(
 | 
			
		||||
        self, match: MetaRecord, generic_cover: str, locale: str
 | 
			
		||||
    ) -> MetaRecord:
 | 
			
		||||
        response = requests.get(match.url)
 | 
			
		||||
        self.root = fromstring(response.text)
 | 
			
		||||
        match.cover = self._parse_cover(generic_cover=generic_cover)
 | 
			
		||||
        match.description = self._parse_description()
 | 
			
		||||
        match.languages = self._parse_languages(locale=locale)
 | 
			
		||||
        match.publisher = self._parse_publisher()
 | 
			
		||||
        match.publishedDate = self._parse_from_summary(attribute_name="datePublished")
 | 
			
		||||
        match.rating = self._parse_rating()
 | 
			
		||||
        match.series, match.series_index = self._parse_series()
 | 
			
		||||
        match.tags = self._parse_tags()
 | 
			
		||||
        match.identifiers = {
 | 
			
		||||
            "isbn": self._parse_isbn(),
 | 
			
		||||
            "lubimyczytac": match.id,
 | 
			
		||||
        }
 | 
			
		||||
        return match
 | 
			
		||||
 | 
			
		||||
    def _parse_xpath_node(
 | 
			
		||||
        self,
 | 
			
		||||
        xpath: str,
 | 
			
		||||
        root: HtmlElement = None,
 | 
			
		||||
        take_first: bool = True,
 | 
			
		||||
        strip_element: bool = True,
 | 
			
		||||
    ) -> Optional[Union[str, List[str]]]:
 | 
			
		||||
        root = root if root is not None else self.root
 | 
			
		||||
        node = root.xpath(xpath)
 | 
			
		||||
        if not node:
 | 
			
		||||
            return None
 | 
			
		||||
        return (
 | 
			
		||||
            (node[0].strip() if strip_element else node[0])
 | 
			
		||||
            if take_first
 | 
			
		||||
            else [x.strip() for x in node]
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def _parse_cover(self, generic_cover) -> Optional[str]:
 | 
			
		||||
        return (
 | 
			
		||||
            self._parse_xpath_node(xpath=LubimyCzytac.COVER, take_first=True)
 | 
			
		||||
            or generic_cover
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def _parse_publisher(self) -> Optional[str]:
 | 
			
		||||
        return self._parse_xpath_node(xpath=LubimyCzytac.PUBLISHER, take_first=True)
 | 
			
		||||
 | 
			
		||||
    def _parse_languages(self, locale: str) -> List[str]:
 | 
			
		||||
        languages = list()
 | 
			
		||||
        lang = self._parse_xpath_node(xpath=LubimyCzytac.LANGUAGES, take_first=True)
 | 
			
		||||
        if lang:
 | 
			
		||||
            if "polski" in lang:
 | 
			
		||||
                languages.append("pol")
 | 
			
		||||
            if "angielski" in lang:
 | 
			
		||||
                languages.append("eng")
 | 
			
		||||
        return [get_language_name(locale, language) for language in languages]
 | 
			
		||||
 | 
			
		||||
    def _parse_series(self) -> Tuple[Optional[str], Optional[Union[float, int]]]:
 | 
			
		||||
        series_index = 0
 | 
			
		||||
        series = self._parse_xpath_node(xpath=LubimyCzytac.SERIES, take_first=True)
 | 
			
		||||
        if series:
 | 
			
		||||
            if "tom " in series:
 | 
			
		||||
                series_name, series_info = series.split(" (tom ", 1)
 | 
			
		||||
                series_info = series_info.replace(" ", "").replace(")", "")
 | 
			
		||||
                # Check if book is not a bundle, i.e. chapter 1-3
 | 
			
		||||
                if "-" in series_info:
 | 
			
		||||
                    series_info = series_info.split("-", 1)[0]
 | 
			
		||||
                if series_info.replace(".", "").isdigit() is True:
 | 
			
		||||
                    series_index = get_int_or_float(series_info)
 | 
			
		||||
                return series_name, series_index
 | 
			
		||||
        return None, None
 | 
			
		||||
 | 
			
		||||
    def _parse_tags(self) -> List[str]:
 | 
			
		||||
        tags = self._parse_xpath_node(xpath=LubimyCzytac.TAGS, take_first=False)
 | 
			
		||||
        return [
 | 
			
		||||
            strip_accents(w.replace(", itd.", " itd."))
 | 
			
		||||
            for w in tags
 | 
			
		||||
            if isinstance(w, str)
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
    def _parse_from_summary(self, attribute_name: str) -> Optional[str]:
 | 
			
		||||
        value = None
 | 
			
		||||
        summary_text = self._parse_xpath_node(xpath=LubimyCzytac.SUMMARY)
 | 
			
		||||
        if summary_text:
 | 
			
		||||
            data = json.loads(summary_text)
 | 
			
		||||
            value = data.get(attribute_name)
 | 
			
		||||
        return value.strip() if value is not None else value
 | 
			
		||||
 | 
			
		||||
    def _parse_rating(self) -> Optional[str]:
 | 
			
		||||
        rating = self._parse_xpath_node(xpath=LubimyCzytac.RATING)
 | 
			
		||||
        return round(float(rating.replace(",", ".")) / 2) if rating else rating
 | 
			
		||||
 | 
			
		||||
    def _parse_date(self, xpath="first_publish") -> Optional[datetime.datetime]:
 | 
			
		||||
        options = {
 | 
			
		||||
            "first_publish": LubimyCzytac.FIRST_PUBLISH_DATE,
 | 
			
		||||
            "first_publish_pl": LubimyCzytac.FIRST_PUBLISH_DATE_PL,
 | 
			
		||||
        }
 | 
			
		||||
        date = self._parse_xpath_node(xpath=options.get(xpath))
 | 
			
		||||
        return parser.parse(date) if date else None
 | 
			
		||||
 | 
			
		||||
    def _parse_isbn(self) -> Optional[str]:
 | 
			
		||||
        return self._parse_xpath_node(xpath=LubimyCzytac.ISBN)
 | 
			
		||||
 | 
			
		||||
    def _parse_description(self) -> str:
 | 
			
		||||
        description = ""
 | 
			
		||||
        description_node = self._parse_xpath_node(
 | 
			
		||||
            xpath=LubimyCzytac.DESCRIPTION, strip_element=False
 | 
			
		||||
        )
 | 
			
		||||
        if description_node is not None:
 | 
			
		||||
            for source in self.root.xpath('//p[@class="source"]'):
 | 
			
		||||
                source.getparent().remove(source)
 | 
			
		||||
            description = tostring(description_node, method="html")
 | 
			
		||||
            description = sanitize_comments_html(description)
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            description_node = self._parse_xpath_node(xpath=LubimyCzytac.META_TITLE)
 | 
			
		||||
            if description_node is not None:
 | 
			
		||||
                description = description_node
 | 
			
		||||
                description = sanitize_comments_html(description)
 | 
			
		||||
        description = self._add_extra_info_to_description(description=description)
 | 
			
		||||
        return description
 | 
			
		||||
 | 
			
		||||
    def _add_extra_info_to_description(self, description: str) -> str:
 | 
			
		||||
        pages = self._parse_from_summary(attribute_name="numberOfPages")
 | 
			
		||||
        if pages:
 | 
			
		||||
            description += LubimyCzytacParser.PAGES_TEMPLATE.format(pages)
 | 
			
		||||
 | 
			
		||||
        first_publish_date = self._parse_date()
 | 
			
		||||
        if first_publish_date:
 | 
			
		||||
            description += LubimyCzytacParser.PUBLISH_DATE_TEMPLATE.format(
 | 
			
		||||
                first_publish_date.strftime("%d.%m.%Y")
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        first_publish_date_pl = self._parse_date(xpath="first_publish_pl")
 | 
			
		||||
        if first_publish_date_pl:
 | 
			
		||||
            description += LubimyCzytacParser.PUBLISH_DATE_PL_TEMPLATE.format(
 | 
			
		||||
                first_publish_date_pl.strftime("%d.%m.%Y")
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        return description
 | 
			
		||||
| 
						 | 
				
			
			@ -15,46 +15,52 @@
 | 
			
		|||
#
 | 
			
		||||
#  You should have received a copy of the GNU General Public License
 | 
			
		||||
#  along with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
import itertools
 | 
			
		||||
from typing import Dict, List, Optional
 | 
			
		||||
from urllib.parse import quote
 | 
			
		||||
 | 
			
		||||
from scholarly import scholarly
 | 
			
		||||
 | 
			
		||||
from cps.services.Metadata import Metadata
 | 
			
		||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
 | 
			
		||||
 | 
			
		||||
class scholar(Metadata):
 | 
			
		||||
    __name__ = "Google Scholar"
 | 
			
		||||
    __id__ = "googlescholar"
 | 
			
		||||
    META_URL = "https://scholar.google.com/"
 | 
			
		||||
 | 
			
		||||
    def search(self, query, generic_cover=""):
 | 
			
		||||
    def search(
 | 
			
		||||
        self, query: str, generic_cover: str = "", locale: str = "en"
 | 
			
		||||
    ) -> Optional[List[MetaRecord]]:
 | 
			
		||||
        val = list()
 | 
			
		||||
        if self.active:
 | 
			
		||||
            scholar_gen = scholarly.search_pubs(' '.join(query.split('+')))
 | 
			
		||||
            i = 0
 | 
			
		||||
            for publication in scholar_gen:
 | 
			
		||||
                v = dict()
 | 
			
		||||
                v['id'] = publication['url_scholarbib'].split(':')[1]
 | 
			
		||||
                v['title'] = publication['bib'].get('title')
 | 
			
		||||
                v['authors'] = publication['bib'].get('author', [])
 | 
			
		||||
                v['description'] = publication['bib'].get('abstract', "")
 | 
			
		||||
                v['publisher'] = publication['bib'].get('venue', "")
 | 
			
		||||
                if publication['bib'].get('pub_year'):
 | 
			
		||||
                    v['publishedDate'] = publication['bib'].get('pub_year')+"-01-01"
 | 
			
		||||
                else:
 | 
			
		||||
                    v['publishedDate'] = ""
 | 
			
		||||
                v['tags'] = []
 | 
			
		||||
                v['rating'] = 0
 | 
			
		||||
                v['series'] = ""
 | 
			
		||||
                v['cover'] = ""
 | 
			
		||||
                v['url'] = publication.get('pub_url') or publication.get('eprint_url') or "",
 | 
			
		||||
                v['source'] = {
 | 
			
		||||
                    "id": self.__id__,
 | 
			
		||||
                    "description": "Google Scholar",
 | 
			
		||||
                    "link": "https://scholar.google.com/"
 | 
			
		||||
                }
 | 
			
		||||
                val.append(v)
 | 
			
		||||
                i += 1
 | 
			
		||||
                if (i >= 10):
 | 
			
		||||
                    break
 | 
			
		||||
            title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
 | 
			
		||||
            if title_tokens:
 | 
			
		||||
                tokens = [quote(t.encode("utf-8")) for t in title_tokens]
 | 
			
		||||
                query = " ".join(tokens)
 | 
			
		||||
            scholar_gen = itertools.islice(scholarly.search_pubs(query), 10)
 | 
			
		||||
            for result in scholar_gen:
 | 
			
		||||
                match = self._parse_search_result(
 | 
			
		||||
                    result=result, generic_cover=generic_cover, locale=locale
 | 
			
		||||
                )
 | 
			
		||||
                val.append(match)
 | 
			
		||||
        return val
 | 
			
		||||
 | 
			
		||||
    def _parse_search_result(
 | 
			
		||||
        self, result: Dict, generic_cover: str, locale: str
 | 
			
		||||
    ) -> MetaRecord:
 | 
			
		||||
        match = MetaRecord(
 | 
			
		||||
            id=result.get("pub_url", result.get("eprint_url", "")),
 | 
			
		||||
            title=result["bib"].get("title"),
 | 
			
		||||
            authors=result["bib"].get("author", []),
 | 
			
		||||
            url=result.get("pub_url", result.get("eprint_url", "")),
 | 
			
		||||
            source=MetaSourceInfo(
 | 
			
		||||
                id=self.__id__, description=self.__name__, link=scholar.META_URL
 | 
			
		||||
            ),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        match.cover = result.get("image", {}).get("original_url", generic_cover)
 | 
			
		||||
        match.description = result["bib"].get("abstract", "")
 | 
			
		||||
        match.publisher = result["bib"].get("venue", "")
 | 
			
		||||
        match.publishedDate = result["bib"].get("pub_year") + "-01-01"
 | 
			
		||||
        match.identifiers = {"scholar": match.id}
 | 
			
		||||
        return match
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										11
									
								
								cps/opds.py
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								cps/opds.py
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -432,17 +432,9 @@ def feed_languagesindex():
 | 
			
		|||
    if current_user.filter_language() == u"all":
 | 
			
		||||
        languages = calibre_db.speaking_language()
 | 
			
		||||
    else:
 | 
			
		||||
        #try:
 | 
			
		||||
        #    cur_l = LC.parse(current_user.filter_language())
 | 
			
		||||
        #except UnknownLocaleError:
 | 
			
		||||
        #    cur_l = None
 | 
			
		||||
        languages = calibre_db.session.query(db.Languages).filter(
 | 
			
		||||
            db.Languages.lang_code == current_user.filter_language()).all()
 | 
			
		||||
        languages[0].name = isoLanguages.get_language_name(get_locale(), languages[0].lang_code)
 | 
			
		||||
        #if cur_l:
 | 
			
		||||
        #    languages[0].name = cur_l.get_language_name(get_locale())
 | 
			
		||||
        #else:
 | 
			
		||||
        #    languages[0].name = _(isoLanguages.get(part3=languages[0].lang_code).name)
 | 
			
		||||
    pagination = Pagination((int(off) / (int(config.config_books_per_page)) + 1), config.config_books_per_page,
 | 
			
		||||
                            len(languages))
 | 
			
		||||
    return render_xml_template('feed.xml', listelements=languages, folder='opds.feed_languages', pagination=pagination)
 | 
			
		||||
| 
						 | 
				
			
			@ -530,7 +522,8 @@ def feed_search(term):
 | 
			
		|||
        entries, __, ___ = calibre_db.get_search_results(term, config_read_column=config.config_read_column)
 | 
			
		||||
        entries_count = len(entries) if len(entries) > 0 else 1
 | 
			
		||||
        pagination = Pagination(1, entries_count, entries_count)
 | 
			
		||||
        return render_xml_template('feed.xml', searchterm=term, entries=entries, pagination=pagination)
 | 
			
		||||
        items = [entry[0] for entry in entries]
 | 
			
		||||
        return render_xml_template('feed.xml', searchterm=term, entries=items, pagination=pagination)
 | 
			
		||||
    else:
 | 
			
		||||
        return render_xml_template('feed.xml', searchterm="")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -16,25 +16,27 @@
 | 
			
		|||
#  You should have received a copy of the GNU General Public License
 | 
			
		||||
#  along with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import json
 | 
			
		||||
import importlib
 | 
			
		||||
import sys
 | 
			
		||||
import inspect
 | 
			
		||||
import datetime
 | 
			
		||||
import concurrent.futures
 | 
			
		||||
import importlib
 | 
			
		||||
import inspect
 | 
			
		||||
import json
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
# from time import time
 | 
			
		||||
from dataclasses import asdict
 | 
			
		||||
 | 
			
		||||
from flask import Blueprint, request, Response, url_for
 | 
			
		||||
from flask import Blueprint, Response, request, url_for
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from flask_login import login_required
 | 
			
		||||
from sqlalchemy.exc import InvalidRequestError, OperationalError
 | 
			
		||||
from sqlalchemy.orm.attributes import flag_modified
 | 
			
		||||
from sqlalchemy.exc import OperationalError, InvalidRequestError
 | 
			
		||||
 | 
			
		||||
from . import constants, logger, ub
 | 
			
		||||
from cps.services.Metadata import Metadata
 | 
			
		||||
from . import constants, get_locale, logger, ub
 | 
			
		||||
 | 
			
		||||
# current_milli_time = lambda: int(round(time() * 1000))
 | 
			
		||||
 | 
			
		||||
meta = Blueprint('metadata', __name__)
 | 
			
		||||
meta = Blueprint("metadata", __name__)
 | 
			
		||||
 | 
			
		||||
log = logger.create()
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -42,7 +44,7 @@ new_list = list()
 | 
			
		|||
meta_dir = os.path.join(constants.BASE_DIR, "cps", "metadata_provider")
 | 
			
		||||
modules = os.listdir(os.path.join(constants.BASE_DIR, "cps", "metadata_provider"))
 | 
			
		||||
for f in modules:
 | 
			
		||||
    if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith('__init__.py'):
 | 
			
		||||
    if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith("__init__.py"):
 | 
			
		||||
        a = os.path.basename(f)[:-3]
 | 
			
		||||
        try:
 | 
			
		||||
            importlib.import_module("cps.metadata_provider." + a)
 | 
			
		||||
| 
						 | 
				
			
			@ -51,34 +53,46 @@ for f in modules:
 | 
			
		|||
            log.error("Import error for metadata source: {}".format(a))
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def list_classes(provider_list):
 | 
			
		||||
    classes = list()
 | 
			
		||||
    for element in provider_list:
 | 
			
		||||
        for name, obj in inspect.getmembers(sys.modules["cps.metadata_provider." + element]):
 | 
			
		||||
            if inspect.isclass(obj) and name != "Metadata" and issubclass(obj, Metadata):
 | 
			
		||||
        for name, obj in inspect.getmembers(
 | 
			
		||||
            sys.modules["cps.metadata_provider." + element]
 | 
			
		||||
        ):
 | 
			
		||||
            if (
 | 
			
		||||
                inspect.isclass(obj)
 | 
			
		||||
                and name != "Metadata"
 | 
			
		||||
                and issubclass(obj, Metadata)
 | 
			
		||||
            ):
 | 
			
		||||
                classes.append(obj())
 | 
			
		||||
    return classes
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
cl = list_classes(new_list)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@meta.route("/metadata/provider")
 | 
			
		||||
@login_required
 | 
			
		||||
def metadata_provider():
 | 
			
		||||
    active = current_user.view_settings.get('metadata', {})
 | 
			
		||||
    active = current_user.view_settings.get("metadata", {})
 | 
			
		||||
    provider = list()
 | 
			
		||||
    for c in cl:
 | 
			
		||||
        ac = active.get(c.__id__, True)
 | 
			
		||||
        provider.append({"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__})
 | 
			
		||||
    return Response(json.dumps(provider), mimetype='application/json')
 | 
			
		||||
        provider.append(
 | 
			
		||||
            {"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__}
 | 
			
		||||
        )
 | 
			
		||||
    return Response(json.dumps(provider), mimetype="application/json")
 | 
			
		||||
 | 
			
		||||
@meta.route("/metadata/provider", methods=['POST'])
 | 
			
		||||
@meta.route("/metadata/provider/<prov_name>", methods=['POST'])
 | 
			
		||||
 | 
			
		||||
@meta.route("/metadata/provider", methods=["POST"])
 | 
			
		||||
@meta.route("/metadata/provider/<prov_name>", methods=["POST"])
 | 
			
		||||
@login_required
 | 
			
		||||
def metadata_change_active_provider(prov_name):
 | 
			
		||||
    new_state = request.get_json()
 | 
			
		||||
    active = current_user.view_settings.get('metadata', {})
 | 
			
		||||
    active[new_state['id']] = new_state['value']
 | 
			
		||||
    current_user.view_settings['metadata'] = active
 | 
			
		||||
    active = current_user.view_settings.get("metadata", {})
 | 
			
		||||
    active[new_state["id"]] = new_state["value"]
 | 
			
		||||
    current_user.view_settings["metadata"] = active
 | 
			
		||||
    try:
 | 
			
		||||
        try:
 | 
			
		||||
            flag_modified(current_user, "view_settings")
 | 
			
		||||
| 
						 | 
				
			
			@ -89,29 +103,33 @@ def metadata_change_active_provider(prov_name):
 | 
			
		|||
        log.error("Invalid request received: {}".format(request))
 | 
			
		||||
        return "Invalid request", 400
 | 
			
		||||
    if "initial" in new_state and prov_name:
 | 
			
		||||
        for c in cl:
 | 
			
		||||
            if c.__id__ == prov_name:
 | 
			
		||||
                data = c.search(new_state.get('query', ""))
 | 
			
		||||
                break
 | 
			
		||||
        return Response(json.dumps(data), mimetype='application/json')
 | 
			
		||||
        data = []
 | 
			
		||||
        provider = next((c for c in cl if c.__id__ == prov_name), None)
 | 
			
		||||
        if provider is not None:
 | 
			
		||||
            data = provider.search(new_state.get("query", ""))
 | 
			
		||||
        return Response(
 | 
			
		||||
            json.dumps([asdict(x) for x in data]), mimetype="application/json"
 | 
			
		||||
        )
 | 
			
		||||
    return ""
 | 
			
		||||
 | 
			
		||||
@meta.route("/metadata/search", methods=['POST'])
 | 
			
		||||
 | 
			
		||||
@meta.route("/metadata/search", methods=["POST"])
 | 
			
		||||
@login_required
 | 
			
		||||
def metadata_search():
 | 
			
		||||
    query = request.form.to_dict().get('query')
 | 
			
		||||
    query = request.form.to_dict().get("query")
 | 
			
		||||
    data = list()
 | 
			
		||||
    active = current_user.view_settings.get('metadata', {})
 | 
			
		||||
    active = current_user.view_settings.get("metadata", {})
 | 
			
		||||
    locale = get_locale()
 | 
			
		||||
    if query:
 | 
			
		||||
        generic_cover = ""
 | 
			
		||||
        static_cover = url_for("static", filename="generic_cover.jpg")
 | 
			
		||||
        # start = current_milli_time()
 | 
			
		||||
        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
 | 
			
		||||
            meta = {executor.submit(c.search, query, generic_cover): c for c in cl if active.get(c.__id__, True)}
 | 
			
		||||
            meta = {
 | 
			
		||||
                executor.submit(c.search, query, static_cover, locale): c
 | 
			
		||||
                for c in cl
 | 
			
		||||
                if active.get(c.__id__, True)
 | 
			
		||||
            }
 | 
			
		||||
            for future in concurrent.futures.as_completed(meta):
 | 
			
		||||
                data.extend(future.result())
 | 
			
		||||
    return Response(json.dumps(data), mimetype='application/json')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
                data.extend([asdict(x) for x in future.result()])
 | 
			
		||||
    # log.info({'Time elapsed {}'.format(current_milli_time()-start)})
 | 
			
		||||
    return Response(json.dumps(data), mimetype="application/json")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -15,13 +15,93 @@
 | 
			
		|||
#
 | 
			
		||||
#  You should have received a copy of the GNU General Public License
 | 
			
		||||
#  along with this program. If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
import abc
 | 
			
		||||
import dataclasses
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
from typing import Dict, Generator, List, Optional, Union
 | 
			
		||||
 | 
			
		||||
from cps import constants
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Metadata():
 | 
			
		||||
@dataclasses.dataclass
 | 
			
		||||
class MetaSourceInfo:
 | 
			
		||||
    id: str
 | 
			
		||||
    description: str
 | 
			
		||||
    link: str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclasses.dataclass
 | 
			
		||||
class MetaRecord:
 | 
			
		||||
    id: Union[str, int]
 | 
			
		||||
    title: str
 | 
			
		||||
    authors: List[str]
 | 
			
		||||
    url: str
 | 
			
		||||
    source: MetaSourceInfo
 | 
			
		||||
    cover: str = os.path.join(constants.STATIC_DIR, 'generic_cover.jpg')
 | 
			
		||||
    description: Optional[str] = ""
 | 
			
		||||
    series: Optional[str] = None
 | 
			
		||||
    series_index: Optional[Union[int, float]] = 0
 | 
			
		||||
    identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict)
 | 
			
		||||
    publisher: Optional[str] = None
 | 
			
		||||
    publishedDate: Optional[str] = None
 | 
			
		||||
    rating: Optional[int] = 0
 | 
			
		||||
    languages: Optional[List[str]] = dataclasses.field(default_factory=list)
 | 
			
		||||
    tags: Optional[List[str]] = dataclasses.field(default_factory=list)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Metadata:
 | 
			
		||||
    __name__ = "Generic"
 | 
			
		||||
    __id__ = "generic"
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.active = True
 | 
			
		||||
 | 
			
		||||
    def set_status(self, state):
 | 
			
		||||
        self.active = state
 | 
			
		||||
 | 
			
		||||
    @abc.abstractmethod
 | 
			
		||||
    def search(
 | 
			
		||||
        self, query: str, generic_cover: str = "", locale: str = "en"
 | 
			
		||||
    ) -> Optional[List[MetaRecord]]:
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def get_title_tokens(
 | 
			
		||||
        title: str, strip_joiners: bool = True
 | 
			
		||||
    ) -> Generator[str, None, None]:
 | 
			
		||||
        """
 | 
			
		||||
        Taken from calibre source code
 | 
			
		||||
        """
 | 
			
		||||
        title_patterns = [
 | 
			
		||||
            (re.compile(pat, re.IGNORECASE), repl)
 | 
			
		||||
            for pat, repl in [
 | 
			
		||||
                # Remove things like: (2010) (Omnibus) etc.
 | 
			
		||||
                (
 | 
			
		||||
                    r"(?i)[({\[](\d{4}|omnibus|anthology|hardcover|"
 | 
			
		||||
                    r"audiobook|audio\scd|paperback|turtleback|"
 | 
			
		||||
                    r"mass\s*market|edition|ed\.)[\])}]",
 | 
			
		||||
                    "",
 | 
			
		||||
                ),
 | 
			
		||||
                # Remove any strings that contain the substring edition inside
 | 
			
		||||
                # parentheses
 | 
			
		||||
                (r"(?i)[({\[].*?(edition|ed.).*?[\]})]", ""),
 | 
			
		||||
                # Remove commas used a separators in numbers
 | 
			
		||||
                (r"(\d+),(\d+)", r"\1\2"),
 | 
			
		||||
                # Remove hyphens only if they have whitespace before them
 | 
			
		||||
                (r"(\s-)", " "),
 | 
			
		||||
                # Replace other special chars with a space
 | 
			
		||||
                (r"""[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”""", " "),
 | 
			
		||||
            ]
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        for pat, repl in title_patterns:
 | 
			
		||||
            title = pat.sub(repl, title)
 | 
			
		||||
 | 
			
		||||
        tokens = title.split()
 | 
			
		||||
        for token in tokens:
 | 
			
		||||
            token = token.strip().strip('"').strip("'")
 | 
			
		||||
            if token and (
 | 
			
		||||
                not strip_joiners or token.lower() not in ("a", "and", "the", "&")
 | 
			
		||||
            ):
 | 
			
		||||
                yield token
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -26,19 +26,26 @@ $(function () {
 | 
			
		|||
       )
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    function getUniqueValues(attribute_name, book){
 | 
			
		||||
        var presentArray = $.map($("#"+attribute_name).val().split(","), $.trim);
 | 
			
		||||
        if ( presentArray.length === 1 && presentArray[0] === "") {
 | 
			
		||||
            presentArray = [];
 | 
			
		||||
        }
 | 
			
		||||
        $.each(book[attribute_name], function(i, el) {
 | 
			
		||||
            if ($.inArray(el, presentArray) === -1) presentArray.push(el);
 | 
			
		||||
        });
 | 
			
		||||
        return presentArray
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    function populateForm (book) {
 | 
			
		||||
        tinymce.get("description").setContent(book.description);
 | 
			
		||||
        var uniqueTags = $.map($("#tags").val().split(","), $.trim);
 | 
			
		||||
        if ( uniqueTags.length == 1 && uniqueTags[0] == "") {
 | 
			
		||||
            uniqueTags = [];
 | 
			
		||||
        }
 | 
			
		||||
        $.each(book.tags, function(i, el) {
 | 
			
		||||
            if ($.inArray(el, uniqueTags) === -1) uniqueTags.push(el);
 | 
			
		||||
        });
 | 
			
		||||
        var uniqueTags = getUniqueValues('tags', book)
 | 
			
		||||
        var uniqueLanguages = getUniqueValues('languages', book)
 | 
			
		||||
        var ampSeparatedAuthors = (book.authors || []).join(" & ");
 | 
			
		||||
        $("#bookAuthor").val(ampSeparatedAuthors);
 | 
			
		||||
        $("#book_title").val(book.title);
 | 
			
		||||
        $("#tags").val(uniqueTags.join(", "));
 | 
			
		||||
        $("#languages").val(uniqueLanguages.join(", "));
 | 
			
		||||
        $("#rating").data("rating").setValue(Math.round(book.rating));
 | 
			
		||||
        if(book.cover && $("#cover_url").length){
 | 
			
		||||
            $(".cover img").attr("src", book.cover);
 | 
			
		||||
| 
						 | 
				
			
			@ -48,7 +55,32 @@ $(function () {
 | 
			
		|||
        $("#publisher").val(book.publisher);
 | 
			
		||||
        if (typeof book.series !== "undefined") {
 | 
			
		||||
            $("#series").val(book.series);
 | 
			
		||||
            $("#series_index").val(book.series_index);
 | 
			
		||||
        }
 | 
			
		||||
        if (typeof book.identifiers !== "undefined") {
 | 
			
		||||
            populateIdentifiers(book.identifiers)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    function populateIdentifiers(identifiers){
 | 
			
		||||
       for (const property in identifiers) {
 | 
			
		||||
          console.log(`${property}: ${identifiers[property]}`);
 | 
			
		||||
          if ($('input[name="identifier-type-'+property+'"]').length) {
 | 
			
		||||
              $('input[name="identifier-val-'+property+'"]').val(identifiers[property])
 | 
			
		||||
          }
 | 
			
		||||
          else {
 | 
			
		||||
              addIdentifier(property, identifiers[property])
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    function addIdentifier(name, value){
 | 
			
		||||
        var line = '<tr>';
 | 
			
		||||
        line += '<td><input type="text" class="form-control" name="identifier-type-'+ name +'" required="required" placeholder="' + _("Identifier Type") +'" value="'+ name +'"></td>';
 | 
			
		||||
        line += '<td><input type="text" class="form-control" name="identifier-val-'+ name +'" required="required" placeholder="' + _("Identifier Value") +'" value="'+ value +'"></td>';
 | 
			
		||||
        line += '<td><a class="btn btn-default" onclick="removeIdentifierLine(this)">'+_("Remove")+'</a></td>';
 | 
			
		||||
        line += '</tr>';
 | 
			
		||||
        $("#identifier-table").append(line);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    function doSearch (keyword) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -40,35 +40,35 @@
 | 
			
		|||
  {% if entries and entries[0] %}
 | 
			
		||||
  {% for entry in entries %}
 | 
			
		||||
  <entry>
 | 
			
		||||
    <title>{{entry[0].title}}</title>
 | 
			
		||||
    <id>urn:uuid:{{entry[0].uuid}}</id>
 | 
			
		||||
    <updated>{{entry[0].atom_timestamp}}</updated>
 | 
			
		||||
    {% if entry[0].authors.__len__() > 0 %}
 | 
			
		||||
    <title>{{entry.title}}</title>
 | 
			
		||||
    <id>urn:uuid:{{entry.uuid}}</id>
 | 
			
		||||
    <updated>{{entry.atom_timestamp}}</updated>
 | 
			
		||||
    {% if entry.authors.__len__() > 0 %}
 | 
			
		||||
      <author>
 | 
			
		||||
        <name>{{entry[0].authors[0].name}}</name>
 | 
			
		||||
        <name>{{entry.authors[0].name}}</name>
 | 
			
		||||
      </author>
 | 
			
		||||
    {% endif %}
 | 
			
		||||
    {% if entry[0].publishers.__len__() > 0 %}
 | 
			
		||||
    {% if entry.publishers.__len__() > 0 %}
 | 
			
		||||
      <publisher>
 | 
			
		||||
        <name>{{entry[0].publishers[0].name}}</name>
 | 
			
		||||
        <name>{{entry.publishers[0].name}}</name>
 | 
			
		||||
      </publisher>
 | 
			
		||||
    {% endif %}
 | 
			
		||||
    {% for lang in entry[0].languages %}
 | 
			
		||||
    {% for lang in entry.languages %}
 | 
			
		||||
      <dcterms:language>{{lang.lang_code}}</dcterms:language>
 | 
			
		||||
    {% endfor %}
 | 
			
		||||
    {% for tag in entry[0].tags %}
 | 
			
		||||
    {% for tag in entry.tags %}
 | 
			
		||||
    <category scheme="http://www.bisg.org/standards/bisac_subject/index.html"
 | 
			
		||||
              term="{{tag.name}}"
 | 
			
		||||
              label="{{tag.name}}"/>
 | 
			
		||||
    {% endfor %}
 | 
			
		||||
    {% if entry[0].comments[0] %}<summary>{{entry[0].comments[0].text|striptags}}</summary>{% endif %}
 | 
			
		||||
    {% if entry[0].has_cover %}
 | 
			
		||||
    <link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry[0].id)}}" rel="http://opds-spec.org/image"/>
 | 
			
		||||
    <link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry[0].id)}}" rel="http://opds-spec.org/image/thumbnail"/>
 | 
			
		||||
    {% if entry.comments[0] %}<summary>{{entry.comments[0].text|striptags}}</summary>{% endif %}
 | 
			
		||||
    {% if entry.has_cover %}
 | 
			
		||||
    <link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry.id)}}" rel="http://opds-spec.org/image"/>
 | 
			
		||||
    <link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry.id)}}" rel="http://opds-spec.org/image/thumbnail"/>
 | 
			
		||||
    {% endif %}
 | 
			
		||||
    {% for format in entry[0].data %}
 | 
			
		||||
    <link rel="http://opds-spec.org/acquisition" href="{{ url_for('opds.opds_download_link', book_id=entry[0].id, book_format=format.format|lower)}}"
 | 
			
		||||
          length="{{format.uncompressed_size}}" mtime="{{entry[0].atom_timestamp}}" type="{{format.format|lower|mimetype}}"/>
 | 
			
		||||
    {% for format in entry.data %}
 | 
			
		||||
    <link rel="http://opds-spec.org/acquisition" href="{{ url_for('opds.opds_download_link', book_id=entry.id, book_format=format.format|lower)}}"
 | 
			
		||||
          length="{{format.uncompressed_size}}" mtime="{{entry.atom_timestamp}}" type="{{format.format|lower|mimetype}}"/>
 | 
			
		||||
    {% endfor %}
 | 
			
		||||
  </entry>
 | 
			
		||||
  {% endfor %}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -31,6 +31,9 @@ SQLAlchemy-Utils>=0.33.5,<0.39.0
 | 
			
		|||
# metadata extraction
 | 
			
		||||
rarfile>=2.7
 | 
			
		||||
scholarly>=1.2.0,<1.6
 | 
			
		||||
markdown2==2.4.2
 | 
			
		||||
html2text==2020.1.16
 | 
			
		||||
python-dateutil==2.8.2
 | 
			
		||||
 | 
			
		||||
# Comics
 | 
			
		||||
natsort>=2.2.0,<8.1.0
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue
	
	Block a user