fix locale for lubimyczytac languages
This commit is contained in:
parent
51bf35c2e4
commit
bea14d1784
|
@ -18,7 +18,7 @@ import datetime
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
from typing import Dict, List, Optional, Tuple, Union
|
from typing import List, Optional, Tuple, Union
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
@ -27,6 +27,7 @@ from html2text import HTML2Text
|
||||||
from lxml.html import HtmlElement, fromstring, tostring
|
from lxml.html import HtmlElement, fromstring, tostring
|
||||||
from markdown2 import Markdown
|
from markdown2 import Markdown
|
||||||
|
|
||||||
|
from cps.isoLanguages import get_language_name
|
||||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
|
|
||||||
SYMBOLS_TO_TRANSLATE = (
|
SYMBOLS_TO_TRANSLATE = (
|
||||||
|
@ -119,7 +120,7 @@ class LubimyCzytac(Metadata):
|
||||||
with ThreadPool(processes=10) as pool:
|
with ThreadPool(processes=10) as pool:
|
||||||
final_matches = pool.starmap(
|
final_matches = pool.starmap(
|
||||||
lc_parser.parse_single_book,
|
lc_parser.parse_single_book,
|
||||||
[(match, generic_cover) for match in matches],
|
[(match, generic_cover, locale) for match in matches],
|
||||||
)
|
)
|
||||||
return final_matches
|
return final_matches
|
||||||
return matches
|
return matches
|
||||||
|
@ -165,18 +166,18 @@ class LubimyCzytacParser:
|
||||||
title = self._parse_xpath_node(
|
title = self._parse_xpath_node(
|
||||||
root=result,
|
root=result,
|
||||||
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
|
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
|
||||||
f"{LubimyCzytac.TITLE_TEXT_PATH}",
|
f"{LubimyCzytac.TITLE_TEXT_PATH}",
|
||||||
)
|
)
|
||||||
|
|
||||||
book_url = self._parse_xpath_node(
|
book_url = self._parse_xpath_node(
|
||||||
root=result,
|
root=result,
|
||||||
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
|
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
|
||||||
f"{LubimyCzytac.URL_PATH}",
|
f"{LubimyCzytac.URL_PATH}",
|
||||||
)
|
)
|
||||||
authors = self._parse_xpath_node(
|
authors = self._parse_xpath_node(
|
||||||
root=result,
|
root=result,
|
||||||
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
|
xpath=f"{LubimyCzytac.SINGLE_BOOK_RESULT_XPATH}"
|
||||||
f"{LubimyCzytac.AUTHORS_PATH}",
|
f"{LubimyCzytac.AUTHORS_PATH}",
|
||||||
take_first=False,
|
take_first=False,
|
||||||
)
|
)
|
||||||
if not all([title, book_url, authors]):
|
if not all([title, book_url, authors]):
|
||||||
|
@ -191,21 +192,21 @@ class LubimyCzytacParser:
|
||||||
id=self.metadata.__id__,
|
id=self.metadata.__id__,
|
||||||
description=self.metadata.__name__,
|
description=self.metadata.__name__,
|
||||||
link=LubimyCzytac.BASE_URL,
|
link=LubimyCzytac.BASE_URL,
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
def parse_single_book(self, match: MetaRecord, generic_cover: str) -> MetaRecord:
|
def parse_single_book(
|
||||||
|
self, match: MetaRecord, generic_cover: str, locale: str
|
||||||
|
) -> MetaRecord:
|
||||||
response = requests.get(match.url)
|
response = requests.get(match.url)
|
||||||
self.root = fromstring(response.text)
|
self.root = fromstring(response.text)
|
||||||
match.cover = self._parse_cover(generic_cover=generic_cover)
|
match.cover = self._parse_cover(generic_cover=generic_cover)
|
||||||
match.description = self._parse_description()
|
match.description = self._parse_description()
|
||||||
match.languages = self._parse_languages()
|
match.languages = self._parse_languages(locale=locale)
|
||||||
match.publisher = self._parse_publisher()
|
match.publisher = self._parse_publisher()
|
||||||
match.publishedDate = self._parse_from_summary(
|
match.publishedDate = self._parse_from_summary(attribute_name="datePublished")
|
||||||
attribute_name="datePublished"
|
|
||||||
)
|
|
||||||
match.rating = self._parse_rating()
|
match.rating = self._parse_rating()
|
||||||
match.series, match.series_index = self._parse_series()
|
match.series, match.series_index = self._parse_series()
|
||||||
match.tags = self._parse_tags()
|
match.tags = self._parse_tags()
|
||||||
|
@ -241,15 +242,15 @@ class LubimyCzytacParser:
|
||||||
def _parse_publisher(self) -> Optional[str]:
|
def _parse_publisher(self) -> Optional[str]:
|
||||||
return self._parse_xpath_node(xpath=LubimyCzytac.PUBLISHER, take_first=True)
|
return self._parse_xpath_node(xpath=LubimyCzytac.PUBLISHER, take_first=True)
|
||||||
|
|
||||||
def _parse_languages(self) -> List[str]:
|
def _parse_languages(self, locale: str) -> List[str]:
|
||||||
languages = list()
|
languages = list()
|
||||||
lang = self._parse_xpath_node(xpath=LubimyCzytac.LANGUAGES, take_first=True)
|
lang = self._parse_xpath_node(xpath=LubimyCzytac.LANGUAGES, take_first=True)
|
||||||
if lang:
|
if lang:
|
||||||
if "polski" in lang:
|
if "polski" in lang:
|
||||||
languages.append("Polish")
|
languages.append("pol")
|
||||||
if "angielski" in lang:
|
if "angielski" in lang:
|
||||||
languages.append("English")
|
languages.append("eng")
|
||||||
return languages
|
return [get_language_name(locale, language) for language in languages]
|
||||||
|
|
||||||
def _parse_series(self) -> Tuple[Optional[str], Optional[Union[float, int]]]:
|
def _parse_series(self) -> Tuple[Optional[str], Optional[Union[float, int]]]:
|
||||||
series_index = 0
|
series_index = 0
|
||||||
|
|
Loading…
Reference in New Issue
Block a user