Fix for #3016 (Parsing lubimyczytac: Tags instead of categories are taken, translator is appended to description)

This commit is contained in:
Ozzie Isaacs 2024-05-11 09:03:22 +02:00
parent 7e85894b3a
commit fc9a9cb9ac

View File

@ -97,12 +97,14 @@ class LubimyCzytac(Metadata):
LANGUAGES = f"{CONTAINER}//dt[contains(text(),'Język:')]{SIBLINGS}/text()" LANGUAGES = f"{CONTAINER}//dt[contains(text(),'Język:')]{SIBLINGS}/text()"
DESCRIPTION = f"{CONTAINER}//div[@class='collapse-content']" DESCRIPTION = f"{CONTAINER}//div[@class='collapse-content']"
SERIES = f"{CONTAINER}//span/a[contains(@href,'/cykl/')]/text()" SERIES = f"{CONTAINER}//span/a[contains(@href,'/cykl/')]/text()"
TRANSLATOR = f"{CONTAINER}//dt[contains(text(),'Tłumacz:')]{SIBLINGS}/a/text()"
DETAILS = "//div[@id='book-details']" DETAILS = "//div[@id='book-details']"
PUBLISH_DATE = "//dt[contains(@title,'Data pierwszego wydania" PUBLISH_DATE = "//dt[contains(@title,'Data pierwszego wydania"
FIRST_PUBLISH_DATE = f"{DETAILS}{PUBLISH_DATE} oryginalnego')]{SIBLINGS}[1]/text()" FIRST_PUBLISH_DATE = f"{DETAILS}{PUBLISH_DATE} oryginalnego')]{SIBLINGS}[1]/text()"
FIRST_PUBLISH_DATE_PL = f"{DETAILS}{PUBLISH_DATE} polskiego')]{SIBLINGS}[1]/text()" FIRST_PUBLISH_DATE_PL = f"{DETAILS}{PUBLISH_DATE} polskiego')]{SIBLINGS}[1]/text()"
TAGS = "//nav[@aria-label='breadcrumbs']//a[contains(@href,'/ksiazki/k/')]/span/text()" TAGS = "//a[contains(@href,'/ksiazki/t/')]/text()" # "//nav[@aria-label='breadcrumbs']//a[contains(@href,'/ksiazki/k/')]/span/text()"
RATING = "//meta[@property='books:rating:value']/@content" RATING = "//meta[@property='books:rating:value']/@content"
COVER = "//meta[@property='og:image']/@content" COVER = "//meta[@property='og:image']/@content"
@ -158,6 +160,7 @@ class LubimyCzytac(Metadata):
class LubimyCzytacParser: class LubimyCzytacParser:
PAGES_TEMPLATE = "<p id='strony'>Książka ma {0} stron(y).</p>" PAGES_TEMPLATE = "<p id='strony'>Książka ma {0} stron(y).</p>"
TRANSLATOR_TEMPLATE = "<p id='translator'>Tłumacz: {0}</p>"
PUBLISH_DATE_TEMPLATE = "<p id='pierwsze_wydanie'>Data pierwszego wydania: {0}</p>" PUBLISH_DATE_TEMPLATE = "<p id='pierwsze_wydanie'>Data pierwszego wydania: {0}</p>"
PUBLISH_DATE_PL_TEMPLATE = ( PUBLISH_DATE_PL_TEMPLATE = (
"<p id='pierwsze_wydanie'>Data pierwszego wydania w Polsce: {0}</p>" "<p id='pierwsze_wydanie'>Data pierwszego wydania w Polsce: {0}</p>"
@ -346,5 +349,9 @@ class LubimyCzytacParser:
description += LubimyCzytacParser.PUBLISH_DATE_PL_TEMPLATE.format( description += LubimyCzytacParser.PUBLISH_DATE_PL_TEMPLATE.format(
first_publish_date_pl.strftime("%d.%m.%Y") first_publish_date_pl.strftime("%d.%m.%Y")
) )
translator = self._parse_xpath_node(xpath=LubimyCzytac.TRANSLATOR)
if translator:
description += LubimyCzytacParser.TRANSLATOR_TEMPLATE.format(translator)
return description return description