backup metadata first step

This commit is contained in:
Ozzieisaacs 2022-09-10 18:26:52 +02:00
parent 110d283a50
commit ca0ee5d391
6 changed files with 232 additions and 3 deletions

View File

@ -322,6 +322,15 @@ class Data(Base):
return u"<Data('{0},{1}{2}{3}')>".format(self.book, self.format, self.uncompressed_size, self.name) return u"<Data('{0},{1}{2}{3}')>".format(self.book, self.format, self.uncompressed_size, self.name)
class Metadata_Dirtied(Base):
__tablename__ = 'metadata_dirtied'
id = Column(Integer, primary_key=True, autoincrement=True)
book = Column(Integer, ForeignKey('books.id'), nullable=False, unique=True)
def __init__(self, book):
self.book = book
class Books(Base): class Books(Base):
__tablename__ = 'books' __tablename__ = 'books'
@ -390,6 +399,9 @@ class CustomColumns(Base):
display_dict = json.loads(self.display) display_dict = json.loads(self.display)
return display_dict return display_dict
def to_json(self):
pass
class AlchemyEncoder(json.JSONEncoder): class AlchemyEncoder(json.JSONEncoder):
@ -642,6 +654,16 @@ class CalibreDB:
def get_book_format(self, book_id, file_format): def get_book_format(self, book_id, file_format):
return self.session.query(Data).filter(Data.book == book_id).filter(Data.format == file_format).first() return self.session.query(Data).filter(Data.book == book_id).filter(Data.format == file_format).first()
def set_metadata_dirty(self, book_id):
if not self.session.query(Metadata_Dirtied).filter(Metadata_Dirtied.book==book_id).one_or_none():
self.session.add(Metadata_Dirtied(book_id))
def delete_dirty_metadata(self, book_id):
try:
self.session.query(Metadata_Dirtied).filter(Metadata_Dirtied.book==book_id).delete()
self.session.commit()
except (OperationalError) as e:
self.session.rollback()
log.error("Database error: {}".format(e))
# Language and content filters for displaying in the UI # Language and content filters for displaying in the UI
def common_filters(self, allow_show_archived=False, return_all_languages=False): def common_filters(self, allow_show_archived=False, return_all_languages=False):

View File

@ -203,6 +203,7 @@ def edit_book(book_id):
if modify_date: if modify_date:
book.last_modified = datetime.utcnow() book.last_modified = datetime.utcnow()
kobo_sync_status.remove_synced_book(edited_books_id, all=True) kobo_sync_status.remove_synced_book(edited_books_id, all=True)
calibre_db.set_metadata_dirty(book.id)
calibre_db.session.merge(book) calibre_db.session.merge(book)
calibre_db.session.commit() calibre_db.session.commit()
@ -277,6 +278,8 @@ def upload():
move_coverfile(meta, db_book) move_coverfile(meta, db_book)
if modify_date:
calibre_db.set_metadata_dirty(book_id)
# save data to database, reread data # save data to database, reread data
calibre_db.session.commit() calibre_db.session.commit()
@ -555,6 +558,7 @@ def table_xchange_author_title():
renamed_author=renamed) renamed_author=renamed)
if modify_date: if modify_date:
book.last_modified = datetime.utcnow() book.last_modified = datetime.utcnow()
calibre_db.set_metadata_dirty(book.id)
try: try:
calibre_db.session.commit() calibre_db.session.commit()
except (OperationalError, IntegrityError, StaleDataError) as e: except (OperationalError, IntegrityError, StaleDataError) as e:

View File

@ -578,6 +578,7 @@ def deleteDatabaseEntry(ID):
# Gets cover file from gdrive # Gets cover file from gdrive
# ToDo: Check is this right everyone get read permissions on cover files?
def get_cover_via_gdrive(cover_path): def get_cover_via_gdrive(cover_path):
df = getFileFromEbooksFolder(cover_path, 'cover.jpg') df = getFileFromEbooksFolder(cover_path, 'cover.jpg')
if df: if df:
@ -600,6 +601,29 @@ def get_cover_via_gdrive(cover_path):
else: else:
return None return None
# Gets cover file from gdrive
def get_metadata_backup_via_gdrive(metadata_path):
df = getFileFromEbooksFolder(metadata_path, 'metadata.opf')
if df:
if not session.query(PermissionAdded).filter(PermissionAdded.gdrive_id == df['id']).first():
df.GetPermissions()
df.InsertPermission({
'type': 'anyone',
'value': 'anyone',
'role': 'writer', # ToDo needs write access
'withLink': True})
permissionAdded = PermissionAdded()
permissionAdded.gdrive_id = df['id']
session.add(permissionAdded)
try:
session.commit()
except OperationalError as ex:
log.error_or_exception('Database error: {}'.format(ex))
session.rollback()
return df.metadata.get('webContentLink')
else:
return None
# Creates chunks for downloading big files # Creates chunks for downloading big files
def partial(total_byte_len, part_size_limit): def partial(total_byte_len, part_size_limit):
s = [] s = []

View File

@ -19,7 +19,6 @@
import os import os
import io import io
import sys
import mimetypes import mimetypes
import re import re
import shutil import shutil
@ -686,6 +685,7 @@ def update_dir_structure(book_id,
def delete_book(book, calibrepath, book_format): def delete_book(book, calibrepath, book_format):
if not book_format: if not book_format:
clear_cover_thumbnail_cache(book.id) ## here it breaks clear_cover_thumbnail_cache(book.id) ## here it breaks
calibre_db.delete_dirty_metadata(book.id)
if config.config_use_google_drive: if config.config_use_google_drive:
return delete_book_gdrive(book, book_format) return delete_book_gdrive(book, book_format)
else: else:

View File

@ -23,7 +23,7 @@ from .services.background_scheduler import BackgroundScheduler, use_APScheduler
from .tasks.database import TaskReconnectDatabase from .tasks.database import TaskReconnectDatabase
from .tasks.thumbnail import TaskGenerateCoverThumbnails, TaskGenerateSeriesThumbnails, TaskClearCoverThumbnailCache from .tasks.thumbnail import TaskGenerateCoverThumbnails, TaskGenerateSeriesThumbnails, TaskClearCoverThumbnailCache
from .services.worker import WorkerThread from .services.worker import WorkerThread
from .tasks.metadata_backup import TaskBackupMetadata
def get_scheduled_tasks(reconnect=True): def get_scheduled_tasks(reconnect=True):
tasks = list() tasks = list()
@ -32,6 +32,10 @@ def get_scheduled_tasks(reconnect=True):
if reconnect: if reconnect:
tasks.append([lambda: TaskReconnectDatabase(), 'reconnect', False]) tasks.append([lambda: TaskReconnectDatabase(), 'reconnect', False])
# ToDo make configurable. Generate metadata.opf file for each changed book
if True:
tasks.append([lambda: TaskBackupMetadata(), 'backup metadata', False])
# Generate all missing book cover thumbnails # Generate all missing book cover thumbnails
if config.schedule_generate_book_covers: if config.schedule_generate_book_covers:
tasks.append([lambda: TaskClearCoverThumbnailCache(0), 'delete superfluous book covers', True]) tasks.append([lambda: TaskClearCoverThumbnailCache(0), 'delete superfluous book covers', True])
@ -91,6 +95,7 @@ def should_task_be_running(start, duration):
end_time = start_time + datetime.timedelta(hours=duration // 60, minutes=duration % 60) end_time = start_time + datetime.timedelta(hours=duration // 60, minutes=duration % 60)
return start_time < now < end_time return start_time < now < end_time
def calclulate_end_time(start, duration): def calclulate_end_time(start, duration):
start_time = datetime.datetime.now().replace(hour=start, minute=0) start_time = datetime.datetime.now().replace(hour=start, minute=0)
return start_time + datetime.timedelta(hours=duration // 60, minutes=duration % 60) return start_time + datetime.timedelta(hours=duration // 60, minutes=duration % 60)

View File

@ -0,0 +1,174 @@
# -*- coding: utf-8 -*-
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
# Copyright (C) 2020 monkey
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import datetime
import os
import json
from urllib.request import urlopen
from lxml import etree
from html import escape
from cps import config, db, fs, gdriveutils, logger, ub
from cps.services.worker import CalibreTask, STAT_CANCELLED, STAT_ENDED
from flask_babel import lazy_gettext as N_
OPF_NAMESPACE = "http://www.idpf.org/2007/opf"
PURL_NAMESPACE = "http://purl.org/dc/elements/1.1/"
OPF = "{%s}" % OPF_NAMESPACE
PURL = "{%s}" % PURL_NAMESPACE
etree.register_namespace("opf", OPF_NAMESPACE)
etree.register_namespace("dc", PURL_NAMESPACE)
OPF_NS = {None: OPF_NAMESPACE} # the default namespace (no prefix)
NSMAP = {'dc': PURL_NAMESPACE, 'opf': OPF_NAMESPACE}
class TaskBackupMetadata(CalibreTask):
def __init__(self, task_message=N_('Backing up Metadata')):
super(TaskBackupMetadata, self).__init__(task_message)
self.log = logger.create()
self.db_session = db.CalibreDB(expire_on_commit=False, init=True).session
def run(self, worker_thread):
try:
metadata_backup = self.db_session.query(db.Metadata_Dirtied).all()
custom_columns = self.db_session.query(db.CustomColumns).all()
for backup in metadata_backup:
book = self.db_session.query(db.Books).filter(db.Books.id == backup.book).one_or_none()
# self.db_session.query(db.Metadata_Dirtied).filter(db.Metadata_Dirtied == backup.id).delete()
# self.db_session.commit()
if book:
metadata_file = self.open_metadata(book, custom_columns)
self._handleSuccess()
self.db_session.remove()
else:
self.log.error("Book {} not found in database".format(backup.book))
self._handleError("Book {} not found in database".format(backup.book))
self.db_session.remove()
except Exception as ex:
self.log.debug('Error creating metadata backup: ' + str(ex))
self._handleError('Error creating metadata backup: ' + str(ex))
self.db_session.rollback()
self.db_session.remove()
def open_metadata(self, book, custom_columns):
if config.config_use_google_drive:
if not gdriveutils.is_gdrive_ready():
raise Exception('Google Drive is configured but not ready')
web_content_link = gdriveutils.get_metadata_backup_via_gdrive(book.path)
if not web_content_link:
raise Exception('Google Drive cover url not found')
stream = None
try:
stream = urlopen(web_content_link)
except Exception as ex:
# Bubble exception to calling function
self.log.debug('Error reading metadata.opf: ' + str(ex)) # ToDo Chek whats going on
raise ex
finally:
if stream is not None:
stream.close()
else:
book_metadata_filepath = os.path.join(config.config_calibre_dir, book.path, 'metadata.opf')
if not os.path.isfile(book_metadata_filepath):
self.create_new_metadata_backup(book, custom_columns, book_metadata_filepath)
# ToDo What to do
return open(book_metadata_filepath, "w")
else:
etree.parse(book_metadata_filepath)
# backup not found has to be created
#raise Exception('Book cover file not found')
def create_new_metadata_backup(self, book, custom_columns, book_metadata_filepath):
# generate root package element
package = etree.Element(OPF + "package", nsmap=OPF_NS)
package.set("unique-identifier", "uuid_id")
package.set("version", "2.0")
# generate metadata element and all subelements of it
metadata = etree.SubElement(package, "metadata", nsmap=NSMAP)
identifier = etree.SubElement(metadata, PURL + "identifier", id="calibre_id", nsmap=NSMAP)
identifier.set(OPF + "scheme", "calibre")
identifier.text = str(book.id)
identifier2 = etree.SubElement(metadata, PURL + "identifier", id="uuid_id", nsmap=NSMAP)
identifier2.set(OPF + "scheme", "uuid")
identifier2.text = book.uuid
title = etree.SubElement(metadata, PURL + "title", nsmap=NSMAP)
title.text = book.title
for author in book.authors:
creator = etree.SubElement(metadata, PURL + "creator", nsmap=NSMAP)
creator.text = str(author)
creator.set(OPF + "file-as", book.author_sort) # ToDo Check
creator.set(OPF + "role", "aut")
contributor = etree.SubElement(metadata, PURL + "contributor", nsmap=NSMAP)
contributor.text = "calibre (5.7.2) [https://calibre-ebook.com]"
contributor.set(OPF + "file-as", "calibre") # ToDo Check
contributor.set(OPF + "role", "bpk")
date = etree.SubElement(metadata, PURL + "date", nsmap=NSMAP)
date.text = datetime.datetime.strftime(book.pubdate, "%Y-%m-%dT%H:%M:%S+00:00")
language = etree.SubElement(metadata, PURL + "language", nsmap=NSMAP)
if book.languages:
language.text = str(book.languages)
else:
language.text = "" # ToDo: insert locale (2 letter code)
if book.tags:
subject = etree.SubElement(metadata, PURL + "subject", nsmap=NSMAP)
subject.text = str(book.tags)
etree.SubElement(metadata, "meta", name="calibre:author_link_map",
content="{" + escape(",".join(['"' + str(a) + '":""' for a in book.authors])) + "}",
nsmap=NSMAP)
etree.SubElement(metadata, "meta", name="calibre:series",
content=str(book.series),
nsmap=NSMAP)
etree.SubElement(metadata, "meta", name="calibre:series_index",
content=str(book.series_index),
nsmap=NSMAP)
etree.SubElement(metadata, "meta", name="calibre:timestamp",
content=datetime.datetime.strftime(book.timestamp, "%Y-%m-%dT%H:%M:%S+00:00"),
nsmap=NSMAP)
etree.SubElement(metadata, "meta", name="calibre:title_sort",
content=book.sort,
nsmap=NSMAP)
for cc in custom_columns:
etree.SubElement(metadata, "meta", name="calibre:user_metadata:#{}".format(cc.label),
content=escape(cc.get_display_dict()),
nsmap=NSMAP)
pass
# generate guide element and all sub elements of it
guide = etree.SubElement(package, "guide")
etree.SubElement(guide, "reference", type="cover", title="Titelbild", href="cover.jpg")
# prepare finalize everything and output
doc = etree.ElementTree(package)
with open(book_metadata_filepath, 'wb') as f:
doc.write(f, xml_declaration=True, encoding='utf-8', pretty_print=True)
@property
def name(self):
return "Backing up Metadata"
@property
def is_cancellable(self):
return True