Merge remote-tracking branch 'lubimyczytac/add_lubimyczytac.pl_meta_provider' into Develop

# Conflicts: # optional-requirements.txt
2022-01-27 18:37:02 +01:00 · 2022-01-27 18:37:02 +01:00 · 4f3c396450
commit 4f3c396450
parent 6339d25af0 20b5a9a2c0
11 changed files with 1163 additions and 1173 deletions
--- a/cps/metadata_provider/comicvine.py
+++ b/cps/metadata_provider/comicvine.py
@ -17,49 +17,68 @@
 #  along with this program. If not, see <http://www.gnu.org/licenses/>.
 # ComicVine api document: https://comicvine.gamespot.com/api/documentation
 from typing import Dict, List, Optional
 from urllib.parse import quote
 import requests
-from cps.services.Metadata import Metadata
+from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
 class ComicVine(Metadata):
    __name__ = "ComicVine"
    __id__ = "comicvine"
    DESCRIPTION = "ComicVine Books"
    META_URL = "https://comicvine.gamespot.com/"
    API_KEY = "57558043c53943d5d1e96a9ad425b0eb85532ee6"
    BASE_URL = (
        f"https://comicvine.gamespot.com/api/search?api_key={API_KEY}"
        f"&resources=issue&query="
    )
    QUERY_PARAMS = "&sort=name:desc&format=json"
    HEADERS = {"User-Agent": "Not Evil Browser"}
-    def search(self, query, generic_cover=""):
+    def search(
        self, query: str, generic_cover: str = "", locale: str = "en"
    ) -> Optional[List[MetaRecord]]:
        val = list()
        apikey = "57558043c53943d5d1e96a9ad425b0eb85532ee6"
        if self.active:
-            headers = {
+            title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
-                'User-Agent': 'Not Evil Browser'
+            if title_tokens:
-            }
+                tokens = [quote(t.encode("utf-8")) for t in title_tokens]
-
+                query = "%20".join(tokens)
-            result = requests.get("https://comicvine.gamespot.com/api/search?api_key="
+            result = requests.get(
-                                  + apikey + "&resources=issue&query=" + query + "&sort=name:desc&format=json", headers=headers)
+                f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}",
-            for r in result.json().get('results'):
+                headers=ComicVine.HEADERS,
-                seriesTitle = r['volume'].get('name', "")
+            )
-                if r.get('store_date'):
+            for result in result.json()["results"]:
-                    dateFomers = r.get('store_date')
+                match = self._parse_search_result(
-                else:
+                    result=result, generic_cover=generic_cover, locale=locale
-                    dateFomers = r.get('date_added')
+                )
-                v = dict()
+                val.append(match)
                v['id'] = r['id']
                v['title'] = seriesTitle + " #" + r.get('issue_number', "0") + " - " + ( r.get('name', "") or "")
                v['authors'] = r.get('authors', [])
                v['description'] = r.get('description', "")
                v['publisher'] = ""
                v['publishedDate'] = dateFomers
                v['tags'] = ["Comics", seriesTitle]
                v['rating'] = 0
                v['series'] = seriesTitle
                v['cover'] = r['image'].get('original_url')
                v['source'] = {
                    "id": self.__id__,
                    "description": "ComicVine Books",
                    "link": "https://comicvine.gamespot.com/"
                }
                v['url'] = r.get('site_detail_url', "")
                val.append(v)
        return val
-
+    def _parse_search_result(
        self, result: Dict, generic_cover: str, locale: str
    ) -> MetaRecord:
        series = result["volume"].get("name", "")
        series_index = result.get("issue_number", 0)
        issue_name = result.get("name", "")
        match = MetaRecord(
            id=result["id"],
            title=f"{series}#{series_index} - {issue_name}",
            authors=result.get("authors", []),
            url=result.get("site_detail_url", ""),
            source=MetaSourceInfo(
                id=self.__id__,
                description=ComicVine.DESCRIPTION,
                link=ComicVine.META_URL,
            ),
            series=series,
        )
        match.cover = result["image"].get("original_url", generic_cover)
        match.description = result.get("description", "")
        match.publishedDate = result.get("store_date", result.get("date_added"))
        match.series_index = series_index
        match.tags = ["Comics", series]
        match.identifiers = {"comicvine": match.id}
        return match
--- a/cps/metadata_provider/google.py
+++ b/cps/metadata_provider/google.py
@ -17,39 +17,93 @@
 #  along with this program. If not, see <http://www.gnu.org/licenses/>.
 # Google Books api document: https://developers.google.com/books/docs/v1/using
-
+from typing import Dict, List, Optional
 from urllib.parse import quote
 import requests
-from cps.services.Metadata import Metadata
+
 from cps.isoLanguages import get_lang3, get_language_name
 from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
 class Google(Metadata):
    __name__ = "Google"
    __id__ = "google"
    DESCRIPTION = "Google Books"
    META_URL = "https://books.google.com/"
    BOOK_URL = "https://books.google.com/books?id="
    SEARCH_URL = "https://www.googleapis.com/books/v1/volumes?q="
    ISBN_TYPE = "ISBN_13"
-    def search(self, query, generic_cover=""):
+    def search(
        self, query: str, generic_cover: str = "", locale: str = "en"
    ) -> Optional[List[MetaRecord]]:
        val = list()    
        if self.active:
            val = list()
            result = requests.get("https://www.googleapis.com/books/v1/volumes?q="+query.replace(" ","+"))
            for r in result.json().get('items'):
                v = dict()
                v['id'] = r['id']
                v['title'] = r['volumeInfo'].get('title',"")
                v['authors'] = r['volumeInfo'].get('authors', [])
                v['description'] = r['volumeInfo'].get('description', "")
                v['publisher'] = r['volumeInfo'].get('publisher', "")
                v['publishedDate'] = r['volumeInfo'].get('publishedDate', "")
                v['tags'] = r['volumeInfo'].get('categories', [])
                v['rating'] = r['volumeInfo'].get('averageRating', 0)
                if r['volumeInfo'].get('imageLinks'):
                    v['cover'] = r['volumeInfo']['imageLinks']['thumbnail'].replace("http://", "https://")
                else:
                    v['cover'] = "/../../../static/generic_cover.jpg"
                v['source'] = {
                    "id": self.__id__,
                    "description": "Google Books",
                    "link": "https://books.google.com/"}
                v['url'] = "https://books.google.com/books?id=" + r['id']
                val.append(v)
            return val
            title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
            if title_tokens:
                tokens = [quote(t.encode("utf-8")) for t in title_tokens]
                query = "+".join(tokens)
            results = requests.get(Google.SEARCH_URL + query)
            for result in results.json()["items"]:
                val.append(
                    self._parse_search_result(
                        result=result, generic_cover=generic_cover, locale=locale
                    )
                )
        return val
    def _parse_search_result(
        self, result: Dict, generic_cover: str, locale: str
    ) -> MetaRecord:
        match = MetaRecord(
            id=result["id"],
            title=result["volumeInfo"]["title"],
            authors=result["volumeInfo"].get("authors", []),
            url=Google.BOOK_URL + result["id"],
            source=MetaSourceInfo(
                id=self.__id__,
                description=Google.DESCRIPTION,
                link=Google.META_URL,
            ),
        )
        match.cover = self._parse_cover(result=result, generic_cover=generic_cover)
        match.description = result["volumeInfo"].get("description", "")
        match.languages = self._parse_languages(result=result, locale=locale)
        match.publisher = result["volumeInfo"].get("publisher", "")
        match.publishedDate = result["volumeInfo"].get("publishedDate", "")
        match.rating = result["volumeInfo"].get("averageRating", 0)
        match.series, match.series_index = "", 1
        match.tags = result["volumeInfo"].get("categories", [])
        match.identifiers = {"google": match.id}
        match = self._parse_isbn(result=result, match=match)
        return match
    @staticmethod
    def _parse_isbn(result: Dict, match: MetaRecord) -> MetaRecord:
        identifiers = result["volumeInfo"].get("industryIdentifiers", [])
        for identifier in identifiers:
            if identifier.get("type") == Google.ISBN_TYPE:
                match.identifiers["isbn"] = identifier.get("identifier")
                break
        return match
    @staticmethod
    def _parse_cover(result: Dict, generic_cover: str) -> str:
        if result["volumeInfo"].get("imageLinks"):
            cover_url = result["volumeInfo"]["imageLinks"]["thumbnail"]
            return cover_url.replace("http://", "https://")
        return generic_cover
    @staticmethod
    def _parse_languages(result: Dict, locale: str) -> List[str]:
        language_iso2 = result["volumeInfo"].get("language", "")
        languages = (
            [get_language_name(locale, get_lang3(language_iso2))]
            if language_iso2
            else []
        )
        return languages
--- a/cps/metadata_provider/lubimyczytac.py
+++ b/cps/metadata_provider/lubimyczytac.py
@ -0,0 +1,337 @@
 # -*- coding: utf-8 -*-
 #  This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
 #    Copyright (C) 2021 OzzieIsaacs
 #
 #  This program is free software: you can redistribute it and/or modify
 #  it under the terms of the GNU General Public License as published by
 #  the Free Software Foundation, either version 3 of the License, or
 #  (at your option) any later version.
 #
 #  This program is distributed in the hope that it will be useful,
 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 #  GNU General Public License for more details.
 #
 #  You should have received a copy of the GNU General Public License
 #  along with this program. If not, see <http://www.gnu.org/licenses/>.
 import datetime
 import json
 import re
 from multiprocessing.pool import ThreadPool
 from typing import List, Optional, Tuple, Union
 from urllib.parse import quote
 import requests
 from dateutil import parser
 from html2text import HTML2Text
 from lxml.html import HtmlElement, fromstring, tostring
 from markdown2 import Markdown
 from cps.isoLanguages import get_language_name
 from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
 SYMBOLS_TO_TRANSLATE = (
    "öÖüÜóÓőŐúÚéÉáÁűŰíÍąĄćĆęĘłŁńŃóÓśŚźŹżŻ",
    "oOuUoOoOuUeEaAuUiIaAcCeElLnNoOsSzZzZ",
 )
 SYMBOL_TRANSLATION_MAP = dict(
    [(ord(a), ord(b)) for (a, b) in zip(*SYMBOLS_TO_TRANSLATE)]
 )
 def get_int_or_float(value: str) -> Union[int, float]:
    number_as_float = float(value)
    number_as_int = int(number_as_float)
    return number_as_int if number_as_float == number_as_int else number_as_float
 def strip_accents(s: Optional[str]) -> Optional[str]:
    return s.translate(SYMBOL_TRANSLATION_MAP) if s is not None else s
 def sanitize_comments_html(html: str) -> str:
    text = html2text(html)
    md = Markdown()
    html = md.convert(text)
    return html
 def html2text(html: str) -> str:
    # replace <u> tags with <span> as <u> becomes emphasis in html2text
    if isinstance(html, bytes):
        html = html.decode("utf-8")
    html = re.sub(
        r"<\s*(?P<solidus>/?)\s*[uU]\b(?P<rest>[^>]*)>",
        r"<\g<solidus>span\g<rest>>",
        html,
    )
    h2t = HTML2Text()
    h2t.body_width = 0
    h2t.single_line_break = True
    h2t.emphasis_mark = "*"
    return h2t.handle(html)
 class LubimyCzytac(Metadata):
    __name__ = "LubimyCzytac.pl"
    __id__ = "lubimyczytac"
    BASE_URL = "https://lubimyczytac.pl"
    BOOK_SEARCH_RESULT_XPATH = (
        "*//div[@class='listSearch']//div[@class='authorAllBooks__single']"
    )
    SINGLE_BOOK_RESULT_XPATH = ".//div[contains(@class,'authorAllBooks__singleText')]"
    TITLE_PATH = "/div/a[contains(@class,'authorAllBooks__singleTextTitle')]"
    TITLE_TEXT_PATH = f"{TITLE_PATH}//text()"
    URL_PATH = f"{TITLE_PATH}/@href"
    AUTHORS_PATH = "/div/a[contains(@href,'autor')]//text()"
    SIBLINGS = "/following-sibling::dd"
    CONTAINER = "//section[@class='container book']"
    PUBLISHER = f"{CONTAINER}//dt[contains(text(),'Wydawnictwo:')]{SIBLINGS}/a/text()"
    LANGUAGES = f"{CONTAINER}//dt[contains(text(),'Język:')]{SIBLINGS}/text()"
    DESCRIPTION = f"{CONTAINER}//div[@class='collapse-content']"
    SERIES = f"{CONTAINER}//span/a[contains(@href,'/cykl/')]/text()"
    DETAILS = "//div[@id='book-details']"
    PUBLISH_DATE = "//dt[contains(@title,'Data pierwszego wydania"
    FIRST_PUBLISH_DATE = f"{DETAILS}{PUBLISH_DATE} oryginalnego')]{SIBLINGS}[1]/text()"
    FIRST_PUBLISH_DATE_PL = f"{DETAILS}{PUBLISH_DATE} polskiego')]{SIBLINGS}[1]/text()"
    TAGS = "//nav[@aria-label='breadcrumb']//a[contains(@href,'/ksiazki/k/')]/text()"
    RATING = "//meta[@property='books:rating:value']/@content"
    COVER = "//meta[@property='og:image']/@content"
    ISBN = "//meta[@property='books:isbn']/@content"
    META_TITLE = "//meta[@property='og:description']/@content"
    SUMMARY = "//script[@type='application/ld+json']//text()"
    def search(
        self, query: str, generic_cover: str = "", locale: str = "en"
    ) -> Optional[List[MetaRecord]]:
        if self.active:
            result = requests.get(self._prepare_query(title=query))
            root = fromstring(result.text)
            lc_parser = LubimyCzytacParser(root=root, metadata=self)
            matches = lc_parser.parse_search_results()
            if matches:
                with ThreadPool(processes=10) as pool:
                    final_matches = pool.starmap(
                        lc_parser.parse_single_book,
                        [(match, generic_cover, locale) for match in matches],
                    )
                return final_matches
            return matches
    def _prepare_query(self, title: str) -> str:
        query = ""
        characters_to_remove = "\?()\/"
        pattern = "[" + characters_to_remove + "]"
        title = re.sub(pattern, "", title)
        title = title.replace("_", " ")
        if '"' in title or ",," in title:
            title = title.split('"')[0].split(",,")[0]
        if "/" in title:
            title_tokens = [
                token for token in title.lower().split(" ") if len(token) > 1
            ]
        else:
            title_tokens = list(self.get_title_tokens(title, strip_joiners=False))
        if title_tokens:
            tokens = [quote(t.encode("utf-8")) for t in title_tokens]
            query = query + "%20".join(tokens)
        if not query:
            return ""
        return f"{LubimyCzytac.BASE_URL}/szukaj/ksiazki?phrase={query}"
 class LubimyCzytacParser:
    PAGES_TEMPLATE = "<p id='strony'>Książka ma {0} stron(y).</p>"
    PUBLISH_DATE_TEMPLATE = "<p id='pierwsze_wydanie'>Data pierwszego wydania: {0}</p>"
    PUBLISH_DATE_PL_TEMPLATE = (
        "<p id='pierwsze_wydanie'>Data pierwszego wydania w Polsce: {0}</p>"
    )
    def __init__(self, root: HtmlElement, metadata: Metadata) -> None:
        self.root = root
        self.metadata = metadata
    def parse_search_results(self) -> List[MetaRecord]:
        matches = []
        results = self.root.xpath(LubimyCzytac.BOOK_SEARCH_RESULT_XPATH)
        for result in results:
            title = self._parse_xpath_node(
                root=result,
                xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
                f"{LubimyCzytac.TITLE_TEXT_PATH}",
            )
            book_url = self._parse_xpath_node(
                root=result,
                xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
                f"{LubimyCzytac.URL_PATH}",
            )
            authors = self._parse_xpath_node(
                root=result,
                xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
                f"{LubimyCzytac.AUTHORS_PATH}",
                take_first=False,
            )
            if not all([title, book_url, authors]):
                continue
            matches.append(
                MetaRecord(
                    id=book_url.replace(f"/ksiazka/", "").split("/")[0],
                    title=title,
                    authors=[strip_accents(author) for author in authors],
                    url=LubimyCzytac.BASE_URL + book_url,
                    source=MetaSourceInfo(
                        id=self.metadata.__id__,
                        description=self.metadata.__name__,
                        link=LubimyCzytac.BASE_URL,
                    ),
                )
            )
        return matches
    def parse_single_book(
        self, match: MetaRecord, generic_cover: str, locale: str
    ) -> MetaRecord:
        response = requests.get(match.url)
        self.root = fromstring(response.text)
        match.cover = self._parse_cover(generic_cover=generic_cover)
        match.description = self._parse_description()
        match.languages = self._parse_languages(locale=locale)
        match.publisher = self._parse_publisher()
        match.publishedDate = self._parse_from_summary(attribute_name="datePublished")
        match.rating = self._parse_rating()
        match.series, match.series_index = self._parse_series()
        match.tags = self._parse_tags()
        match.identifiers = {
            "isbn": self._parse_isbn(),
            "lubimyczytac": match.id,
        }
        return match
    def _parse_xpath_node(
        self,
        xpath: str,
        root: HtmlElement = None,
        take_first: bool = True,
        strip_element: bool = True,
    ) -> Optional[Union[str, List[str]]]:
        root = root if root is not None else self.root
        node = root.xpath(xpath)
        if not node:
            return None
        return (
            (node[0].strip() if strip_element else node[0])
            if take_first
            else [x.strip() for x in node]
        )
    def _parse_cover(self, generic_cover) -> Optional[str]:
        return (
            self._parse_xpath_node(xpath=LubimyCzytac.COVER, take_first=True)
            or generic_cover
        )
    def _parse_publisher(self) -> Optional[str]:
        return self._parse_xpath_node(xpath=LubimyCzytac.PUBLISHER, take_first=True)
    def _parse_languages(self, locale: str) -> List[str]:
        languages = list()
        lang = self._parse_xpath_node(xpath=LubimyCzytac.LANGUAGES, take_first=True)
        if lang:
            if "polski" in lang:
                languages.append("pol")
            if "angielski" in lang:
                languages.append("eng")
        return [get_language_name(locale, language) for language in languages]
    def _parse_series(self) -> Tuple[Optional[str], Optional[Union[float, int]]]:
        series_index = 0
        series = self._parse_xpath_node(xpath=LubimyCzytac.SERIES, take_first=True)
        if series:
            if "tom " in series:
                series_name, series_info = series.split(" (tom ", 1)
                series_info = series_info.replace(" ", "").replace(")", "")
                # Check if book is not a bundle, i.e. chapter 1-3
                if "-" in series_info:
                    series_info = series_info.split("-", 1)[0]
                if series_info.replace(".", "").isdigit() is True:
                    series_index = get_int_or_float(series_info)
                return series_name, series_index
        return None, None
    def _parse_tags(self) -> List[str]:
        tags = self._parse_xpath_node(xpath=LubimyCzytac.TAGS, take_first=False)
        return [
            strip_accents(w.replace(", itd.", " itd."))
            for w in tags
            if isinstance(w, str)
        ]
    def _parse_from_summary(self, attribute_name: str) -> Optional[str]:
        value = None
        summary_text = self._parse_xpath_node(xpath=LubimyCzytac.SUMMARY)
        if summary_text:
            data = json.loads(summary_text)
            value = data.get(attribute_name)
        return value.strip() if value is not None else value
    def _parse_rating(self) -> Optional[str]:
        rating = self._parse_xpath_node(xpath=LubimyCzytac.RATING)
        return round(float(rating.replace(",", ".")) / 2) if rating else rating
    def _parse_date(self, xpath="first_publish") -> Optional[datetime.datetime]:
        options = {
            "first_publish": LubimyCzytac.FIRST_PUBLISH_DATE,
            "first_publish_pl": LubimyCzytac.FIRST_PUBLISH_DATE_PL,
        }
        date = self._parse_xpath_node(xpath=options.get(xpath))
        return parser.parse(date) if date else None
    def _parse_isbn(self) -> Optional[str]:
        return self._parse_xpath_node(xpath=LubimyCzytac.ISBN)
    def _parse_description(self) -> str:
        description = ""
        description_node = self._parse_xpath_node(
            xpath=LubimyCzytac.DESCRIPTION, strip_element=False
        )
        if description_node is not None:
            for source in self.root.xpath('//p[@class="source"]'):
                source.getparent().remove(source)
            description = tostring(description_node, method="html")
            description = sanitize_comments_html(description)
        else:
            description_node = self._parse_xpath_node(xpath=LubimyCzytac.META_TITLE)
            if description_node is not None:
                description = description_node
                description = sanitize_comments_html(description)
        description = self._add_extra_info_to_description(description=description)
        return description
    def _add_extra_info_to_description(self, description: str) -> str:
        pages = self._parse_from_summary(attribute_name="numberOfPages")
        if pages:
            description += LubimyCzytacParser.PAGES_TEMPLATE.format(pages)
        first_publish_date = self._parse_date()
        if first_publish_date:
            description += LubimyCzytacParser.PUBLISH_DATE_TEMPLATE.format(
                first_publish_date.strftime("%d.%m.%Y")
            )
        first_publish_date_pl = self._parse_date(xpath="first_publish_pl")
        if first_publish_date_pl:
            description += LubimyCzytacParser.PUBLISH_DATE_PL_TEMPLATE.format(
                first_publish_date_pl.strftime("%d.%m.%Y")
            )
        return description
--- a/cps/metadata_provider/scholar.py
+++ b/cps/metadata_provider/scholar.py
@ -15,46 +15,52 @@
 #
 #  You should have received a copy of the GNU General Public License
 #  along with this program. If not, see <http://www.gnu.org/licenses/>.
 import itertools
 from typing import Dict, List, Optional
 from urllib.parse import quote
 from scholarly import scholarly
-from cps.services.Metadata import Metadata
+from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
 class scholar(Metadata):
    __name__ = "Google Scholar"
    __id__ = "googlescholar"
    META_URL = "https://scholar.google.com/"
-    def search(self, query, generic_cover=""):
+    def search(
        self, query: str, generic_cover: str = "", locale: str = "en"
    ) -> Optional[List[MetaRecord]]:
        val = list()
        if self.active:
-            scholar_gen = scholarly.search_pubs(' '.join(query.split('+')))
+            title_tokens = list(self.get_title_tokens(query, strip_joiners=False))
-            i = 0
+            if title_tokens:
-            for publication in scholar_gen:
+                tokens = [quote(t.encode("utf-8")) for t in title_tokens]
-                v = dict()
+                query = " ".join(tokens)
-                v['id'] = publication['url_scholarbib'].split(':')[1]
+            scholar_gen = itertools.islice(scholarly.search_pubs(query), 10)
-                v['title'] = publication['bib'].get('title')
+            for result in scholar_gen:
-                v['authors'] = publication['bib'].get('author', [])
+                match = self._parse_search_result(
-                v['description'] = publication['bib'].get('abstract', "")
+                    result=result, generic_cover=generic_cover, locale=locale
-                v['publisher'] = publication['bib'].get('venue', "")
+                )
-                if publication['bib'].get('pub_year'):
+                val.append(match)
                    v['publishedDate'] = publication['bib'].get('pub_year')+"-01-01"
                else:
                    v['publishedDate'] = ""
                v['tags'] = []
                v['rating'] = 0
                v['series'] = ""
                v['cover'] = ""
                v['url'] = publication.get('pub_url') or publication.get('eprint_url') or "",
                v['source'] = {
                    "id": self.__id__,
                    "description": "Google Scholar",
                    "link": "https://scholar.google.com/"
                }
                val.append(v)
                i += 1
                if (i >= 10):
                    break
        return val
    def _parse_search_result(
        self, result: Dict, generic_cover: str, locale: str
    ) -> MetaRecord:
        match = MetaRecord(
            id=result.get("pub_url", result.get("eprint_url", "")),
            title=result["bib"].get("title"),
            authors=result["bib"].get("author", []),
            url=result.get("pub_url", result.get("eprint_url", "")),
            source=MetaSourceInfo(
                id=self.__id__, description=self.__name__, link=scholar.META_URL
            ),
        )
-
+        match.cover = result.get("image", {}).get("original_url", generic_cover)
        match.description = result["bib"].get("abstract", "")
        match.publisher = result["bib"].get("venue", "")
        match.publishedDate = result["bib"].get("pub_year") + "-01-01"
        match.identifiers = {"scholar": match.id}
        return match
--- a/cps/opds.py
+++ b/cps/opds.py
@ -432,17 +432,9 @@ def feed_languagesindex():
    if current_user.filter_language() == u"all":
        languages = calibre_db.speaking_language()
    else:
        #try:
        #    cur_l = LC.parse(current_user.filter_language())
        #except UnknownLocaleError:
        #    cur_l = None
        languages = calibre_db.session.query(db.Languages).filter(
            db.Languages.lang_code == current_user.filter_language()).all()
        languages[0].name = isoLanguages.get_language_name(get_locale(), languages[0].lang_code)
        #if cur_l:
        #    languages[0].name = cur_l.get_language_name(get_locale())
        #else:
        #    languages[0].name = _(isoLanguages.get(part3=languages[0].lang_code).name)
    pagination = Pagination((int(off) / (int(config.config_books_per_page)) + 1), config.config_books_per_page,
                            len(languages))
    return render_xml_template('feed.xml', listelements=languages, folder='opds.feed_languages', pagination=pagination)
@ -530,7 +522,8 @@ def feed_search(term):
        entries, __, ___ = calibre_db.get_search_results(term, config_read_column=config.config_read_column)
        entries_count = len(entries) if len(entries) > 0 else 1
        pagination = Pagination(1, entries_count, entries_count)
-        return render_xml_template('feed.xml', searchterm=term, entries=entries, pagination=pagination)
+        items = [entry[0] for entry in entries]
        return render_xml_template('feed.xml', searchterm=term, entries=items, pagination=pagination)
    else:
        return render_xml_template('feed.xml', searchterm="")
--- a/cps/search_metadata.py
+++ b/cps/search_metadata.py
@ -16,25 +16,27 @@
 #  You should have received a copy of the GNU General Public License
 #  along with this program. If not, see <http://www.gnu.org/licenses/>.
 import os
 import json
 import importlib
 import sys
 import inspect
 import datetime
 import concurrent.futures
 import importlib
 import inspect
 import json
 import os
 import sys
 # from time import time
 from dataclasses import asdict
-from flask import Blueprint, request, Response, url_for
+from flask import Blueprint, Response, request, url_for
 from flask_login import current_user
 from flask_login import login_required
 from sqlalchemy.exc import InvalidRequestError, OperationalError
 from sqlalchemy.orm.attributes import flag_modified
 from sqlalchemy.exc import OperationalError, InvalidRequestError
 from . import constants, logger, ub
 from cps.services.Metadata import Metadata
 from . import constants, get_locale, logger, ub
 # current_milli_time = lambda: int(round(time() * 1000))
-meta = Blueprint('metadata', __name__)
+meta = Blueprint("metadata", __name__)
 log = logger.create()
@ -42,7 +44,7 @@ new_list = list()
 meta_dir = os.path.join(constants.BASE_DIR, "cps", "metadata_provider")
 modules = os.listdir(os.path.join(constants.BASE_DIR, "cps", "metadata_provider"))
 for f in modules:
-    if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith('__init__.py'):
+    if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith("__init__.py"):
        a = os.path.basename(f)[:-3]
        try:
            importlib.import_module("cps.metadata_provider." + a)
@ -51,34 +53,46 @@ for f in modules:
            log.error("Import error for metadata source: {}".format(a))
            pass
 def list_classes(provider_list):
    classes = list()
    for element in provider_list:
-        for name, obj in inspect.getmembers(sys.modules["cps.metadata_provider." + element]):
+        for name, obj in inspect.getmembers(
-            if inspect.isclass(obj) and name != "Metadata" and issubclass(obj, Metadata):
+            sys.modules["cps.metadata_provider." + element]
        ):
            if (
                inspect.isclass(obj)
                and name != "Metadata"
                and issubclass(obj, Metadata)
            ):
                classes.append(obj())
    return classes
 cl = list_classes(new_list)
@meta.route("/metadata/provider")
@login_required
 def metadata_provider():
-    active = current_user.view_settings.get('metadata', {})
+    active = current_user.view_settings.get("metadata", {})
    provider = list()
    for c in cl:
        ac = active.get(c.__id__, True)
-        provider.append({"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__})
+        provider.append(
-    return Response(json.dumps(provider), mimetype='application/json')
+            {"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__}
        )
    return Response(json.dumps(provider), mimetype="application/json")
-@meta.route("/metadata/provider", methods=['POST'])
+
-@meta.route("/metadata/provider/<prov_name>", methods=['POST'])
+@meta.route("/metadata/provider", methods=["POST"])
@meta.route("/metadata/provider/<prov_name>", methods=["POST"])
@login_required
 def metadata_change_active_provider(prov_name):
    new_state = request.get_json()
-    active = current_user.view_settings.get('metadata', {})
+    active = current_user.view_settings.get("metadata", {})
-    active[new_state['id']] = new_state['value']
+    active[new_state["id"]] = new_state["value"]
-    current_user.view_settings['metadata'] = active
+    current_user.view_settings["metadata"] = active
    try:
        try:
            flag_modified(current_user, "view_settings")
@ -89,29 +103,33 @@ def metadata_change_active_provider(prov_name):
        log.error("Invalid request received: {}".format(request))
        return "Invalid request", 400
    if "initial" in new_state and prov_name:
-        for c in cl:
+        data = []
-            if c.__id__ == prov_name:
+        provider = next((c for c in cl if c.__id__ == prov_name), None)
-                data = c.search(new_state.get('query', ""))
+        if provider is not None:
-                break
+            data = provider.search(new_state.get("query", ""))
-        return Response(json.dumps(data), mimetype='application/json')
+        return Response(
            json.dumps([asdict(x) for x in data]), mimetype="application/json"
        )
    return ""
-@meta.route("/metadata/search", methods=['POST'])
+
@meta.route("/metadata/search", methods=["POST"])
@login_required
 def metadata_search():
-    query = request.form.to_dict().get('query')
+    query = request.form.to_dict().get("query")
    data = list()
-    active = current_user.view_settings.get('metadata', {})
+    active = current_user.view_settings.get("metadata", {})
    locale = get_locale()
    if query:
-        generic_cover = ""
+        static_cover = url_for("static", filename="generic_cover.jpg")
        # start = current_milli_time()
        with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
-            meta = {executor.submit(c.search, query, generic_cover): c for c in cl if active.get(c.__id__, True)}
+            meta = {
                executor.submit(c.search, query, static_cover, locale): c
                for c in cl
                if active.get(c.__id__, True)
            }
            for future in concurrent.futures.as_completed(meta):
-                data.extend(future.result())
+                data.extend([asdict(x) for x in future.result()])
-    return Response(json.dumps(data), mimetype='application/json')
+    # log.info({'Time elapsed {}'.format(current_milli_time()-start)})
-
+    return Response(json.dumps(data), mimetype="application/json")
--- a/cps/services/Metadata.py
+++ b/cps/services/Metadata.py
@ -15,13 +15,93 @@
 #
 #  You should have received a copy of the GNU General Public License
 #  along with this program. If not, see <http://www.gnu.org/licenses/>.
 import abc
 import dataclasses
 import os
 import re
 from typing import Dict, Generator, List, Optional, Union
 from cps import constants
-class Metadata():
+@dataclasses.dataclass
 class MetaSourceInfo:
    id: str
    description: str
    link: str
@dataclasses.dataclass
 class MetaRecord:
    id: Union[str, int]
    title: str
    authors: List[str]
    url: str
    source: MetaSourceInfo
    cover: str = os.path.join(constants.STATIC_DIR, 'generic_cover.jpg')
    description: Optional[str] = ""
    series: Optional[str] = None
    series_index: Optional[Union[int, float]] = 0
    identifiers: Dict[str, Union[str, int]] = dataclasses.field(default_factory=dict)
    publisher: Optional[str] = None
    publishedDate: Optional[str] = None
    rating: Optional[int] = 0
    languages: Optional[List[str]] = dataclasses.field(default_factory=list)
    tags: Optional[List[str]] = dataclasses.field(default_factory=list)
 class Metadata:
    __name__ = "Generic"
    __id__ = "generic"
    def __init__(self):
        self.active = True
    def set_status(self, state):
        self.active = state
    @abc.abstractmethod
    def search(
        self, query: str, generic_cover: str = "", locale: str = "en"
    ) -> Optional[List[MetaRecord]]:
        pass
    @staticmethod
    def get_title_tokens(
        title: str, strip_joiners: bool = True
    ) -> Generator[str, None, None]:
        """
        Taken from calibre source code
        """
        title_patterns = [
            (re.compile(pat, re.IGNORECASE), repl)
            for pat, repl in [
                # Remove things like: (2010) (Omnibus) etc.
                (
                    r"(?i)[({\[](\d{4}|omnibus|anthology|hardcover|"
                    r"audiobook|audio\scd|paperback|turtleback|"
                    r"mass\s*market|edition|ed\.)[\])}]",
                    "",
                ),
                # Remove any strings that contain the substring edition inside
                # parentheses
                (r"(?i)[({\[].*?(edition|ed.).*?[\]})]", ""),
                # Remove commas used a separators in numbers
                (r"(\d+),(\d+)", r"\1\2"),
                # Remove hyphens only if they have whitespace before them
                (r"(\s-)", " "),
                # Replace other special chars with a space
                (r"""[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”""", " "),
            ]
        ]
        for pat, repl in title_patterns:
            title = pat.sub(repl, title)
        tokens = title.split()
        for token in tokens:
            token = token.strip().strip('"').strip("'")
            if token and (
                not strip_joiners or token.lower() not in ("a", "and", "the", "&")
            ):
                yield token
--- a/cps/static/js/get_meta.js
+++ b/cps/static/js/get_meta.js
@ -26,19 +26,26 @@ $(function () {
       )
    };
    function getUniqueValues(attribute_name, book){
        var presentArray = $.map($("#"+attribute_name).val().split(","), $.trim);
        if ( presentArray.length === 1 && presentArray[0] === "") {
            presentArray = [];
        }
        $.each(book[attribute_name], function(i, el) {
            if ($.inArray(el, presentArray) === -1) presentArray.push(el);
        });
        return presentArray
    }
    function populateForm (book) {
        tinymce.get("description").setContent(book.description);
-        var uniqueTags = $.map($("#tags").val().split(","), $.trim);
+        var uniqueTags = getUniqueValues('tags', book)
-        if ( uniqueTags.length == 1 && uniqueTags[0] == "") {
+        var uniqueLanguages = getUniqueValues('languages', book)
            uniqueTags = [];
        }
        $.each(book.tags, function(i, el) {
            if ($.inArray(el, uniqueTags) === -1) uniqueTags.push(el);
        });
        var ampSeparatedAuthors = (book.authors || []).join(" & ");
        $("#bookAuthor").val(ampSeparatedAuthors);
        $("#book_title").val(book.title);
        $("#tags").val(uniqueTags.join(", "));
        $("#languages").val(uniqueLanguages.join(", "));
        $("#rating").data("rating").setValue(Math.round(book.rating));
        if(book.cover && $("#cover_url").length){
            $(".cover img").attr("src", book.cover);
@ -48,7 +55,32 @@ $(function () {
        $("#publisher").val(book.publisher);
        if (typeof book.series !== "undefined") {
            $("#series").val(book.series);
            $("#series_index").val(book.series_index);
        }
        if (typeof book.identifiers !== "undefined") {
            populateIdentifiers(book.identifiers)
        }
    }
    function populateIdentifiers(identifiers){
       for (const property in identifiers) {
          console.log(`${property}: ${identifiers[property]}`);
          if ($('input[name="identifier-type-'+property+'"]').length) {
              $('input[name="identifier-val-'+property+'"]').val(identifiers[property])
          }
          else {
              addIdentifier(property, identifiers[property])
          }
        }
    }
    function addIdentifier(name, value){
        var line = '<tr>';
        line += '<td><input type="text" class="form-control" name="identifier-type-'+ name +'" required="required" placeholder="' + _("Identifier Type") +'" value="'+ name +'"></td>';
        line += '<td><input type="text" class="form-control" name="identifier-val-'+ name +'" required="required" placeholder="' + _("Identifier Value") +'" value="'+ value +'"></td>';
        line += '<td><a class="btn btn-default" onclick="removeIdentifierLine(this)">'+_("Remove")+'</a></td>';
        line += '</tr>';
        $("#identifier-table").append(line);
    }
    function doSearch (keyword) {
--- a/cps/templates/feed.xml
+++ b/cps/templates/feed.xml
@ -40,35 +40,35 @@
  {% if entries and entries[0] %}
  {% for entry in entries %}
  <entry>
-    <title>{{entry[0].title}}</title>
+    <title>{{entry.title}}</title>
-    <id>urn:uuid:{{entry[0].uuid}}</id>
+    <id>urn:uuid:{{entry.uuid}}</id>
-    <updated>{{entry[0].atom_timestamp}}</updated>
+    <updated>{{entry.atom_timestamp}}</updated>
-    {% if entry[0].authors.__len__() > 0 %}
+    {% if entry.authors.__len__() > 0 %}
      <author>
-        <name>{{entry[0].authors[0].name}}</name>
+        <name>{{entry.authors[0].name}}</name>
      </author>
    {% endif %}
-    {% if entry[0].publishers.__len__() > 0 %}
+    {% if entry.publishers.__len__() > 0 %}
      <publisher>
-        <name>{{entry[0].publishers[0].name}}</name>
+        <name>{{entry.publishers[0].name}}</name>
      </publisher>
    {% endif %}
-    {% for lang in entry[0].languages %}
+    {% for lang in entry.languages %}
      <dcterms:language>{{lang.lang_code}}</dcterms:language>
    {% endfor %}
-    {% for tag in entry[0].tags %}
+    {% for tag in entry.tags %}
    <category scheme="http://www.bisg.org/standards/bisac_subject/index.html"
              term="{{tag.name}}"
              label="{{tag.name}}"/>
    {% endfor %}
-    {% if entry[0].comments[0] %}<summary>{{entry[0].comments[0].text|striptags}}</summary>{% endif %}
+    {% if entry.comments[0] %}<summary>{{entry.comments[0].text|striptags}}</summary>{% endif %}
-    {% if entry[0].has_cover %}
+    {% if entry.has_cover %}
-    <link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry[0].id)}}" rel="http://opds-spec.org/image"/>
+    <link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry.id)}}" rel="http://opds-spec.org/image"/>
-    <link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry[0].id)}}" rel="http://opds-spec.org/image/thumbnail"/>
+    <link type="image/jpeg" href="{{url_for('opds.feed_get_cover', book_id=entry.id)}}" rel="http://opds-spec.org/image/thumbnail"/>
    {% endif %}
-    {% for format in entry[0].data %}
+    {% for format in entry.data %}
-    <link rel="http://opds-spec.org/acquisition" href="{{ url_for('opds.opds_download_link', book_id=entry[0].id, book_format=format.format|lower)}}"
+    <link rel="http://opds-spec.org/acquisition" href="{{ url_for('opds.opds_download_link', book_id=entry.id, book_format=format.format|lower)}}"
-          length="{{format.uncompressed_size}}" mtime="{{entry[0].atom_timestamp}}" type="{{format.format|lower|mimetype}}"/>
+          length="{{format.uncompressed_size}}" mtime="{{entry.atom_timestamp}}" type="{{format.format|lower|mimetype}}"/>
    {% endfor %}
  </entry>
  {% endfor %}
--- a/optional-requirements.txt
+++ b/optional-requirements.txt
@ -31,6 +31,9 @@ SQLAlchemy-Utils>=0.33.5,<0.39.0
 # metadata extraction
 rarfile>=2.7
 scholarly>=1.2.0,<1.6
 markdown2==2.4.2
 html2text==2020.1.16
 python-dateutil==2.8.2
 # Comics
 natsort>=2.2.0,<8.1.0
--- a/TestSummary_Linux.html
+++ b/TestSummary_Linux.html