fix locale for lubimyczytac languages

This commit is contained in:
collerek 2021-12-15 15:20:01 +01:00
parent 51bf35c2e4
commit bea14d1784

View File

@ -18,7 +18,7 @@ import datetime
import json import json
import re import re
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from typing import Dict, List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
from urllib.parse import quote from urllib.parse import quote
import requests import requests
@ -27,6 +27,7 @@ from html2text import HTML2Text
from lxml.html import HtmlElement, fromstring, tostring from lxml.html import HtmlElement, fromstring, tostring
from markdown2 import Markdown from markdown2 import Markdown
from cps.isoLanguages import get_language_name
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
SYMBOLS_TO_TRANSLATE = ( SYMBOLS_TO_TRANSLATE = (
@ -119,7 +120,7 @@ class LubimyCzytac(Metadata):
with ThreadPool(processes=10) as pool: with ThreadPool(processes=10) as pool:
final_matches = pool.starmap( final_matches = pool.starmap(
lc_parser.parse_single_book, lc_parser.parse_single_book,
[(match, generic_cover) for match in matches], [(match, generic_cover, locale) for match in matches],
) )
return final_matches return final_matches
return matches return matches
@ -191,21 +192,21 @@ class LubimyCzytacParser:
id=self.metadata.__id__, id=self.metadata.__id__,
description=self.metadata.__name__, description=self.metadata.__name__,
link=LubimyCzytac.BASE_URL, link=LubimyCzytac.BASE_URL,
) ),
) )
) )
return matches return matches
def parse_single_book(self, match: MetaRecord, generic_cover: str) -> MetaRecord: def parse_single_book(
self, match: MetaRecord, generic_cover: str, locale: str
) -> MetaRecord:
response = requests.get(match.url) response = requests.get(match.url)
self.root = fromstring(response.text) self.root = fromstring(response.text)
match.cover = self._parse_cover(generic_cover=generic_cover) match.cover = self._parse_cover(generic_cover=generic_cover)
match.description = self._parse_description() match.description = self._parse_description()
match.languages = self._parse_languages() match.languages = self._parse_languages(locale=locale)
match.publisher = self._parse_publisher() match.publisher = self._parse_publisher()
match.publishedDate = self._parse_from_summary( match.publishedDate = self._parse_from_summary(attribute_name="datePublished")
attribute_name="datePublished"
)
match.rating = self._parse_rating() match.rating = self._parse_rating()
match.series, match.series_index = self._parse_series() match.series, match.series_index = self._parse_series()
match.tags = self._parse_tags() match.tags = self._parse_tags()
@ -241,15 +242,15 @@ class LubimyCzytacParser:
def _parse_publisher(self) -> Optional[str]: def _parse_publisher(self) -> Optional[str]:
return self._parse_xpath_node(xpath=LubimyCzytac.PUBLISHER, take_first=True) return self._parse_xpath_node(xpath=LubimyCzytac.PUBLISHER, take_first=True)
def _parse_languages(self) -> List[str]: def _parse_languages(self, locale: str) -> List[str]:
languages = list() languages = list()
lang = self._parse_xpath_node(xpath=LubimyCzytac.LANGUAGES, take_first=True) lang = self._parse_xpath_node(xpath=LubimyCzytac.LANGUAGES, take_first=True)
if lang: if lang:
if "polski" in lang: if "polski" in lang:
languages.append("Polish") languages.append("pol")
if "angielski" in lang: if "angielski" in lang:
languages.append("English") languages.append("eng")
return languages return [get_language_name(locale, language) for language in languages]
def _parse_series(self) -> Tuple[Optional[str], Optional[Union[float, int]]]: def _parse_series(self) -> Tuple[Optional[str], Optional[Union[float, int]]]:
series_index = 0 series_index = 0