Add clean_date method in douban metadata_provider
This commit is contained in:
		
							parent
							
								
									97cf20764b
								
							
						
					
					
						commit
						b54a170a00
					
				| 
						 | 
					@ -88,7 +88,7 @@ class Douban(Metadata):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            results = r.json()
 | 
					            results = r.json()
 | 
				
			||||||
            if results["total"] == 0:
 | 
					            if results["total"] == 0:
 | 
				
			||||||
                return val
 | 
					                return []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            book_id_list = [
 | 
					            book_id_list = [
 | 
				
			||||||
                self.ID_PATTERN.search(item).group("id")
 | 
					                self.ID_PATTERN.search(item).group("id")
 | 
				
			||||||
| 
						 | 
					@ -139,7 +139,7 @@ class Douban(Metadata):
 | 
				
			||||||
        match.cover = html.xpath(self.COVER_XPATH)[0].attrib["href"] or generic_cover
 | 
					        match.cover = html.xpath(self.COVER_XPATH)[0].attrib["href"] or generic_cover
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            rating_num = float(html.xpath(self.RATING_XPATH)[0].text.strip())
 | 
					            rating_num = float(html.xpath(self.RATING_XPATH)[0].text.strip())
 | 
				
			||||||
        except ValueError:
 | 
					        except Exception:
 | 
				
			||||||
            rating_num = 0
 | 
					            rating_num = 0
 | 
				
			||||||
        match.rating = int(-1 * rating_num // 2 * -1) if rating_num else 0
 | 
					        match.rating = int(-1 * rating_num // 2 * -1) if rating_num else 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -166,10 +166,41 @@ class Douban(Metadata):
 | 
				
			||||||
            elif self.SUBTITLE_PATTERN.search(text):
 | 
					            elif self.SUBTITLE_PATTERN.search(text):
 | 
				
			||||||
                match.title = f'{match.title}:' + element.tail.strip()
 | 
					                match.title = f'{match.title}:' + element.tail.strip()
 | 
				
			||||||
            elif self.PUBLISHED_DATE_PATTERN.search(text):
 | 
					            elif self.PUBLISHED_DATE_PATTERN.search(text):
 | 
				
			||||||
                match.publishedDate = element.tail.strip()
 | 
					                match.publishedDate = self._clean_date(element.tail.strip())
 | 
				
			||||||
            elif self.SUBTITLE_PATTERN.search(text):
 | 
					            elif self.SUBTITLE_PATTERN.search(text):
 | 
				
			||||||
                match.series = element.getnext().text
 | 
					                match.series = element.getnext().text
 | 
				
			||||||
            elif i_type := self.IDENTIFIERS_PATTERN.search(text):
 | 
					            elif i_type := self.IDENTIFIERS_PATTERN.search(text):
 | 
				
			||||||
                match.identifiers[i_type.group()] = element.tail.strip()
 | 
					                match.identifiers[i_type.group()] = element.tail.strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return match
 | 
					        return match
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    def _clean_date(self, date: str) -> str:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Clean up the date string to be in the format YYYY-MM-DD
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Examples of possible patterns:
 | 
				
			||||||
 | 
					            '2014-7-16', '1988年4月', '1995-04', '2021-8', '2020-12-1', '1996年',
 | 
				
			||||||
 | 
					            '1972', '2004/11/01', '1959年3月北京第1版第1印'
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        year = date[:4]
 | 
				
			||||||
 | 
					        moon = "01"
 | 
				
			||||||
 | 
					        day = "01"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if len(date) > 5:
 | 
				
			||||||
 | 
					            digit = []
 | 
				
			||||||
 | 
					            ls = []
 | 
				
			||||||
 | 
					            for i in range(5, len(date)):
 | 
				
			||||||
 | 
					                if date[i].isdigit():
 | 
				
			||||||
 | 
					                    digit.append(date[i])
 | 
				
			||||||
 | 
					                elif digit:
 | 
				
			||||||
 | 
					                    ls.append("".join(digit) if len(digit)==2 else f"0{digit[0]}")
 | 
				
			||||||
 | 
					                    digit = []
 | 
				
			||||||
 | 
					            if digit:
 | 
				
			||||||
 | 
					                ls.append("".join(digit) if len(digit)==2 else f"0{digit[0]}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            moon = ls[0]
 | 
				
			||||||
 | 
					            if len(ls)>1:
 | 
				
			||||||
 | 
					                day = ls[1]     
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return f"{year}-{moon}-{day}"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user