diff --git a/cps/metadata_provider/lubimyczytac.py b/cps/metadata_provider/lubimyczytac.py
index ee66d1b4..1d4e18e1 100644
--- a/cps/metadata_provider/lubimyczytac.py
+++ b/cps/metadata_provider/lubimyczytac.py
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
-
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
# Copyright (C) 2021 OzzieIsaacs
#
@@ -18,7 +17,8 @@
import datetime
import json
import re
-from typing import Dict, Generator, List, Optional, Tuple, Union
+from multiprocessing.pool import ThreadPool
+from typing import Dict, List, Optional, Tuple, Union
from urllib.parse import quote
import requests
@@ -114,13 +114,14 @@ class LubimyCzytac(Metadata):
lc_parser = LubimyCzytacParser(root=root, metadata=self)
matches = lc_parser.parse_search_results()
if matches:
- final_matches = []
- for match in matches:
- response = requests.get(match.get("url"))
- match = lc_parser.parse_single_book(
- match=match, response=response, generic_cover=generic_cover
+ with ThreadPool(processes=10) as pool:
+ final_matches = pool.starmap(
+ lc_parser.parse_single_book,
+ [
+ (match, generic_cover)
+ for match in matches
+ ],
)
- final_matches.append(match)
return final_matches
return matches
@@ -146,46 +147,6 @@ class LubimyCzytac(Metadata):
return ""
return f"{LubimyCzytac.BASE_URL}/szukaj/ksiazki?phrase={query}"
- @staticmethod
- def get_title_tokens(
- title: str, strip_joiners: bool = True
- ) -> Generator[str, None, None]:
- """
- Taken from calibre source code
- """
- title_patterns = [
- (re.compile(pat, re.IGNORECASE), repl)
- for pat, repl in [
- # Remove things like: (2010) (Omnibus) etc.
- (
- r"(?i)[({\[](\d{4}|omnibus|anthology|hardcover|"
- r"audiobook|audio\scd|paperback|turtleback|"
- r"mass\s*market|edition|ed\.)[\])}]",
- "",
- ),
- # Remove any strings that contain the substring edition inside
- # parentheses
- (r"(?i)[({\[].*?(edition|ed.).*?[\]})]", ""),
- # Remove commas used a separators in numbers
- (r"(\d+),(\d+)", r"\1\2"),
- # Remove hyphens only if they have whitespace before them
- (r"(\s-)", " "),
- # Replace other special chars with a space
- (r"""[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”""", " "),
- ]
- ]
-
- for pat, repl in title_patterns:
- title = pat.sub(repl, title)
-
- tokens = title.split()
- for token in tokens:
- token = token.strip().strip('"').strip("'")
- if token and (
- not strip_joiners or token.lower() not in ("a", "and", "the", "&")
- ):
- yield token
-
class LubimyCzytacParser:
PAGES_TEMPLATE = "
Książka ma {0} stron(y).
"
@@ -232,8 +193,9 @@ class LubimyCzytacParser:
return matches
def parse_single_book(
- self, match: Dict, response, generic_cover: str
+ self, match: Dict, generic_cover: str
) -> MetaRecord:
+ response = requests.get(match.get("url"))
self.root = fromstring(response.text)
match["series"], match["series_index"] = self._parse_series()
match["tags"] = self._parse_tags()
diff --git a/cps/search_metadata.py b/cps/search_metadata.py
index e837fe21..7d9b6e05 100644
--- a/cps/search_metadata.py
+++ b/cps/search_metadata.py
@@ -16,25 +16,23 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
-import os
-import json
-import importlib
-import sys
-import inspect
-import datetime
import concurrent.futures
+import importlib
+import inspect
+import json
+import os
+import sys
-from flask import Blueprint, request, Response, url_for
+from flask import Blueprint, Response, request, url_for
from flask_login import current_user
from flask_login import login_required
+from sqlalchemy.exc import InvalidRequestError, OperationalError
from sqlalchemy.orm.attributes import flag_modified
-from sqlalchemy.exc import OperationalError, InvalidRequestError
-from . import constants, logger, ub
from cps.services.Metadata import Metadata
+from . import constants, logger, ub
-
-meta = Blueprint('metadata', __name__)
+meta = Blueprint("metadata", __name__)
log = logger.create()
@@ -42,7 +40,7 @@ new_list = list()
meta_dir = os.path.join(constants.BASE_DIR, "cps", "metadata_provider")
modules = os.listdir(os.path.join(constants.BASE_DIR, "cps", "metadata_provider"))
for f in modules:
- if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith('__init__.py'):
+ if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith("__init__.py"):
a = os.path.basename(f)[:-3]
try:
importlib.import_module("cps.metadata_provider." + a)
@@ -51,34 +49,46 @@ for f in modules:
log.error("Import error for metadata source: {}".format(a))
pass
+
def list_classes(provider_list):
classes = list()
for element in provider_list:
- for name, obj in inspect.getmembers(sys.modules["cps.metadata_provider." + element]):
- if inspect.isclass(obj) and name != "Metadata" and issubclass(obj, Metadata):
+ for name, obj in inspect.getmembers(
+ sys.modules["cps.metadata_provider." + element]
+ ):
+ if (
+ inspect.isclass(obj)
+ and name != "Metadata"
+ and issubclass(obj, Metadata)
+ ):
classes.append(obj())
return classes
+
cl = list_classes(new_list)
+
@meta.route("/metadata/provider")
@login_required
def metadata_provider():
- active = current_user.view_settings.get('metadata', {})
+ active = current_user.view_settings.get("metadata", {})
provider = list()
for c in cl:
ac = active.get(c.__id__, True)
- provider.append({"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__})
- return Response(json.dumps(provider), mimetype='application/json')
+ provider.append(
+ {"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__}
+ )
+ return Response(json.dumps(provider), mimetype="application/json")
-@meta.route("/metadata/provider", methods=['POST'])
-@meta.route("/metadata/provider/", methods=['POST'])
+
+@meta.route("/metadata/provider", methods=["POST"])
+@meta.route("/metadata/provider/", methods=["POST"])
@login_required
def metadata_change_active_provider(prov_name):
new_state = request.get_json()
- active = current_user.view_settings.get('metadata', {})
- active[new_state['id']] = new_state['value']
- current_user.view_settings['metadata'] = active
+ active = current_user.view_settings.get("metadata", {})
+ active[new_state["id"]] = new_state["value"]
+ current_user.view_settings["metadata"] = active
try:
try:
flag_modified(current_user, "view_settings")
@@ -91,27 +101,26 @@ def metadata_change_active_provider(prov_name):
if "initial" in new_state and prov_name:
for c in cl:
if c.__id__ == prov_name:
- data = c.search(new_state.get('query', ""))
+ data = c.search(new_state.get("query", ""))
break
- return Response(json.dumps(data), mimetype='application/json')
+ return Response(json.dumps(data), mimetype="application/json")
return ""
-@meta.route("/metadata/search", methods=['POST'])
+
+@meta.route("/metadata/search", methods=["POST"])
@login_required
def metadata_search():
- query = request.form.to_dict().get('query')
+ query = request.form.to_dict().get("query")
data = list()
- active = current_user.view_settings.get('metadata', {})
+ active = current_user.view_settings.get("metadata", {})
if query:
- static_cover = url_for('static', filename='generic_cover.jpg')
+ static_cover = url_for("static", filename="generic_cover.jpg")
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
- meta = {executor.submit(c.search, query, static_cover): c for c in cl if active.get(c.__id__, True)}
+ meta = {
+ executor.submit(c.search, query, static_cover): c
+ for c in cl
+ if active.get(c.__id__, True)
+ }
for future in concurrent.futures.as_completed(meta):
data.extend(future.result())
- return Response(json.dumps(data), mimetype='application/json')
-
-
-
-
-
-
+ return Response(json.dumps(data), mimetype="application/json")
diff --git a/cps/services/Metadata.py b/cps/services/Metadata.py
index 17a9e38e..1464411a 100644
--- a/cps/services/Metadata.py
+++ b/cps/services/Metadata.py
@@ -16,7 +16,8 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
import abc
-from typing import Dict, List, Optional, TypedDict, Union
+import re
+from typing import Dict, Generator, List, Optional, TypedDict, Union
class Metadata:
@@ -30,9 +31,49 @@ class Metadata:
self.active = state
@abc.abstractmethod
- def search(self, query: str, generic_cover: str):
+ def search(self, query: str, generic_cover: str = ""):
pass
+ @staticmethod
+ def get_title_tokens(
+ title: str, strip_joiners: bool = True
+ ) -> Generator[str, None, None]:
+ """
+ Taken from calibre source code
+ """
+ title_patterns = [
+ (re.compile(pat, re.IGNORECASE), repl)
+ for pat, repl in [
+ # Remove things like: (2010) (Omnibus) etc.
+ (
+ r"(?i)[({\[](\d{4}|omnibus|anthology|hardcover|"
+ r"audiobook|audio\scd|paperback|turtleback|"
+ r"mass\s*market|edition|ed\.)[\])}]",
+ "",
+ ),
+ # Remove any strings that contain the substring edition inside
+ # parentheses
+ (r"(?i)[({\[].*?(edition|ed.).*?[\]})]", ""),
+ # Remove commas used a separators in numbers
+ (r"(\d+),(\d+)", r"\1\2"),
+ # Remove hyphens only if they have whitespace before them
+ (r"(\s-)", " "),
+ # Replace other special chars with a space
+ (r"""[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”""", " "),
+ ]
+ ]
+
+ for pat, repl in title_patterns:
+ title = pat.sub(repl, title)
+
+ tokens = title.split()
+ for token in tokens:
+ token = token.strip().strip('"').strip("'")
+ if token and (
+ not strip_joiners or token.lower() not in ("a", "and", "the", "&")
+ ):
+ yield token
+
class MetaSourceInfo(TypedDict):
id: str