run lubimyczytac detail pages in threadpool
This commit is contained in:
parent
d55626d445
commit
362fdc5716
|
@ -1,5 +1,4 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
|
||||
# Copyright (C) 2021 OzzieIsaacs
|
||||
#
|
||||
|
@ -18,7 +17,8 @@
|
|||
import datetime
|
||||
import json
|
||||
import re
|
||||
from typing import Dict, Generator, List, Optional, Tuple, Union
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
from urllib.parse import quote
|
||||
|
||||
import requests
|
||||
|
@ -114,13 +114,14 @@ class LubimyCzytac(Metadata):
|
|||
lc_parser = LubimyCzytacParser(root=root, metadata=self)
|
||||
matches = lc_parser.parse_search_results()
|
||||
if matches:
|
||||
final_matches = []
|
||||
for match in matches:
|
||||
response = requests.get(match.get("url"))
|
||||
match = lc_parser.parse_single_book(
|
||||
match=match, response=response, generic_cover=generic_cover
|
||||
with ThreadPool(processes=10) as pool:
|
||||
final_matches = pool.starmap(
|
||||
lc_parser.parse_single_book,
|
||||
[
|
||||
(match, generic_cover)
|
||||
for match in matches
|
||||
],
|
||||
)
|
||||
final_matches.append(match)
|
||||
return final_matches
|
||||
return matches
|
||||
|
||||
|
@ -146,46 +147,6 @@ class LubimyCzytac(Metadata):
|
|||
return ""
|
||||
return f"{LubimyCzytac.BASE_URL}/szukaj/ksiazki?phrase={query}"
|
||||
|
||||
@staticmethod
|
||||
def get_title_tokens(
|
||||
title: str, strip_joiners: bool = True
|
||||
) -> Generator[str, None, None]:
|
||||
"""
|
||||
Taken from calibre source code
|
||||
"""
|
||||
title_patterns = [
|
||||
(re.compile(pat, re.IGNORECASE), repl)
|
||||
for pat, repl in [
|
||||
# Remove things like: (2010) (Omnibus) etc.
|
||||
(
|
||||
r"(?i)[({\[](\d{4}|omnibus|anthology|hardcover|"
|
||||
r"audiobook|audio\scd|paperback|turtleback|"
|
||||
r"mass\s*market|edition|ed\.)[\])}]",
|
||||
"",
|
||||
),
|
||||
# Remove any strings that contain the substring edition inside
|
||||
# parentheses
|
||||
(r"(?i)[({\[].*?(edition|ed.).*?[\]})]", ""),
|
||||
# Remove commas used a separators in numbers
|
||||
(r"(\d+),(\d+)", r"\1\2"),
|
||||
# Remove hyphens only if they have whitespace before them
|
||||
(r"(\s-)", " "),
|
||||
# Replace other special chars with a space
|
||||
(r"""[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”""", " "),
|
||||
]
|
||||
]
|
||||
|
||||
for pat, repl in title_patterns:
|
||||
title = pat.sub(repl, title)
|
||||
|
||||
tokens = title.split()
|
||||
for token in tokens:
|
||||
token = token.strip().strip('"').strip("'")
|
||||
if token and (
|
||||
not strip_joiners or token.lower() not in ("a", "and", "the", "&")
|
||||
):
|
||||
yield token
|
||||
|
||||
|
||||
class LubimyCzytacParser:
|
||||
PAGES_TEMPLATE = "<p id='strony'>Książka ma {0} stron(y).</p>"
|
||||
|
@ -232,8 +193,9 @@ class LubimyCzytacParser:
|
|||
return matches
|
||||
|
||||
def parse_single_book(
|
||||
self, match: Dict, response, generic_cover: str
|
||||
self, match: Dict, generic_cover: str
|
||||
) -> MetaRecord:
|
||||
response = requests.get(match.get("url"))
|
||||
self.root = fromstring(response.text)
|
||||
match["series"], match["series_index"] = self._parse_series()
|
||||
match["tags"] = self._parse_tags()
|
||||
|
|
|
@ -16,25 +16,23 @@
|
|||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
import json
|
||||
import importlib
|
||||
import sys
|
||||
import inspect
|
||||
import datetime
|
||||
import concurrent.futures
|
||||
import importlib
|
||||
import inspect
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
from flask import Blueprint, request, Response, url_for
|
||||
from flask import Blueprint, Response, request, url_for
|
||||
from flask_login import current_user
|
||||
from flask_login import login_required
|
||||
from sqlalchemy.exc import InvalidRequestError, OperationalError
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
from sqlalchemy.exc import OperationalError, InvalidRequestError
|
||||
|
||||
from . import constants, logger, ub
|
||||
from cps.services.Metadata import Metadata
|
||||
from . import constants, logger, ub
|
||||
|
||||
|
||||
meta = Blueprint('metadata', __name__)
|
||||
meta = Blueprint("metadata", __name__)
|
||||
|
||||
log = logger.create()
|
||||
|
||||
|
@ -42,7 +40,7 @@ new_list = list()
|
|||
meta_dir = os.path.join(constants.BASE_DIR, "cps", "metadata_provider")
|
||||
modules = os.listdir(os.path.join(constants.BASE_DIR, "cps", "metadata_provider"))
|
||||
for f in modules:
|
||||
if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith('__init__.py'):
|
||||
if os.path.isfile(os.path.join(meta_dir, f)) and not f.endswith("__init__.py"):
|
||||
a = os.path.basename(f)[:-3]
|
||||
try:
|
||||
importlib.import_module("cps.metadata_provider." + a)
|
||||
|
@ -51,34 +49,46 @@ for f in modules:
|
|||
log.error("Import error for metadata source: {}".format(a))
|
||||
pass
|
||||
|
||||
|
||||
def list_classes(provider_list):
|
||||
classes = list()
|
||||
for element in provider_list:
|
||||
for name, obj in inspect.getmembers(sys.modules["cps.metadata_provider." + element]):
|
||||
if inspect.isclass(obj) and name != "Metadata" and issubclass(obj, Metadata):
|
||||
for name, obj in inspect.getmembers(
|
||||
sys.modules["cps.metadata_provider." + element]
|
||||
):
|
||||
if (
|
||||
inspect.isclass(obj)
|
||||
and name != "Metadata"
|
||||
and issubclass(obj, Metadata)
|
||||
):
|
||||
classes.append(obj())
|
||||
return classes
|
||||
|
||||
|
||||
cl = list_classes(new_list)
|
||||
|
||||
|
||||
@meta.route("/metadata/provider")
|
||||
@login_required
|
||||
def metadata_provider():
|
||||
active = current_user.view_settings.get('metadata', {})
|
||||
active = current_user.view_settings.get("metadata", {})
|
||||
provider = list()
|
||||
for c in cl:
|
||||
ac = active.get(c.__id__, True)
|
||||
provider.append({"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__})
|
||||
return Response(json.dumps(provider), mimetype='application/json')
|
||||
provider.append(
|
||||
{"name": c.__name__, "active": ac, "initial": ac, "id": c.__id__}
|
||||
)
|
||||
return Response(json.dumps(provider), mimetype="application/json")
|
||||
|
||||
@meta.route("/metadata/provider", methods=['POST'])
|
||||
@meta.route("/metadata/provider/<prov_name>", methods=['POST'])
|
||||
|
||||
@meta.route("/metadata/provider", methods=["POST"])
|
||||
@meta.route("/metadata/provider/<prov_name>", methods=["POST"])
|
||||
@login_required
|
||||
def metadata_change_active_provider(prov_name):
|
||||
new_state = request.get_json()
|
||||
active = current_user.view_settings.get('metadata', {})
|
||||
active[new_state['id']] = new_state['value']
|
||||
current_user.view_settings['metadata'] = active
|
||||
active = current_user.view_settings.get("metadata", {})
|
||||
active[new_state["id"]] = new_state["value"]
|
||||
current_user.view_settings["metadata"] = active
|
||||
try:
|
||||
try:
|
||||
flag_modified(current_user, "view_settings")
|
||||
|
@ -91,27 +101,26 @@ def metadata_change_active_provider(prov_name):
|
|||
if "initial" in new_state and prov_name:
|
||||
for c in cl:
|
||||
if c.__id__ == prov_name:
|
||||
data = c.search(new_state.get('query', ""))
|
||||
data = c.search(new_state.get("query", ""))
|
||||
break
|
||||
return Response(json.dumps(data), mimetype='application/json')
|
||||
return Response(json.dumps(data), mimetype="application/json")
|
||||
return ""
|
||||
|
||||
@meta.route("/metadata/search", methods=['POST'])
|
||||
|
||||
@meta.route("/metadata/search", methods=["POST"])
|
||||
@login_required
|
||||
def metadata_search():
|
||||
query = request.form.to_dict().get('query')
|
||||
query = request.form.to_dict().get("query")
|
||||
data = list()
|
||||
active = current_user.view_settings.get('metadata', {})
|
||||
active = current_user.view_settings.get("metadata", {})
|
||||
if query:
|
||||
static_cover = url_for('static', filename='generic_cover.jpg')
|
||||
static_cover = url_for("static", filename="generic_cover.jpg")
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||
meta = {executor.submit(c.search, query, static_cover): c for c in cl if active.get(c.__id__, True)}
|
||||
meta = {
|
||||
executor.submit(c.search, query, static_cover): c
|
||||
for c in cl
|
||||
if active.get(c.__id__, True)
|
||||
}
|
||||
for future in concurrent.futures.as_completed(meta):
|
||||
data.extend(future.result())
|
||||
return Response(json.dumps(data), mimetype='application/json')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
return Response(json.dumps(data), mimetype="application/json")
|
||||
|
|
|
@ -16,7 +16,8 @@
|
|||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
import abc
|
||||
from typing import Dict, List, Optional, TypedDict, Union
|
||||
import re
|
||||
from typing import Dict, Generator, List, Optional, TypedDict, Union
|
||||
|
||||
|
||||
class Metadata:
|
||||
|
@ -30,9 +31,49 @@ class Metadata:
|
|||
self.active = state
|
||||
|
||||
@abc.abstractmethod
|
||||
def search(self, query: str, generic_cover: str):
|
||||
def search(self, query: str, generic_cover: str = ""):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def get_title_tokens(
|
||||
title: str, strip_joiners: bool = True
|
||||
) -> Generator[str, None, None]:
|
||||
"""
|
||||
Taken from calibre source code
|
||||
"""
|
||||
title_patterns = [
|
||||
(re.compile(pat, re.IGNORECASE), repl)
|
||||
for pat, repl in [
|
||||
# Remove things like: (2010) (Omnibus) etc.
|
||||
(
|
||||
r"(?i)[({\[](\d{4}|omnibus|anthology|hardcover|"
|
||||
r"audiobook|audio\scd|paperback|turtleback|"
|
||||
r"mass\s*market|edition|ed\.)[\])}]",
|
||||
"",
|
||||
),
|
||||
# Remove any strings that contain the substring edition inside
|
||||
# parentheses
|
||||
(r"(?i)[({\[].*?(edition|ed.).*?[\]})]", ""),
|
||||
# Remove commas used a separators in numbers
|
||||
(r"(\d+),(\d+)", r"\1\2"),
|
||||
# Remove hyphens only if they have whitespace before them
|
||||
(r"(\s-)", " "),
|
||||
# Replace other special chars with a space
|
||||
(r"""[:,;!@$%^&*(){}.`~"\s\[\]/]《》「」“”""", " "),
|
||||
]
|
||||
]
|
||||
|
||||
for pat, repl in title_patterns:
|
||||
title = pat.sub(repl, title)
|
||||
|
||||
tokens = title.split()
|
||||
for token in tokens:
|
||||
token = token.strip().strip('"').strip("'")
|
||||
if token and (
|
||||
not strip_joiners or token.lower() not in ("a", "and", "the", "&")
|
||||
):
|
||||
yield token
|
||||
|
||||
|
||||
class MetaSourceInfo(TypedDict):
|
||||
id: str
|
||||
|
|
Loading…
Reference in New Issue
Block a user