From 541fc7e14ebe3716fce58593a2ff3f5b23403e3d Mon Sep 17 00:00:00 2001 From: mmonkey Date: Tue, 22 Dec 2020 17:49:21 -0600 Subject: [PATCH] fixed thumbnail generate tasks, added thumbnail cleanup task, added reconnect db scheduled job --- cps/fs.py | 61 ++++++++++++++ cps/helper.py | 7 +- cps/schedule.py | 18 ++++- cps/services/worker.py | 17 +++- cps/tasks/thumbnail.py | 177 +++++++++++++++++++++++++++++++---------- cps/thumbnails.py | 54 ------------- cps/ub.py | 28 ++++--- 7 files changed, 245 insertions(+), 117 deletions(-) create mode 100644 cps/fs.py delete mode 100644 cps/thumbnails.py diff --git a/cps/fs.py b/cps/fs.py new file mode 100644 index 00000000..699d5991 --- /dev/null +++ b/cps/fs.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- + +# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web) +# Copyright (C) 2020 mmonkey +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from __future__ import division, print_function, unicode_literals +from .constants import CACHE_DIR +from os import listdir, makedirs, remove +from os.path import isdir, isfile, join +from shutil import rmtree + +CACHE_TYPE_THUMBNAILS = 'thumbnails' + + +class FileSystem: + _instance = None + _cache_dir = CACHE_DIR + + def __new__(cls): + if cls._instance is None: + cls._instance = super(FileSystem, cls).__new__(cls) + return cls._instance + + def get_cache_dir(self, cache_type=None): + if not isdir(self._cache_dir): + makedirs(self._cache_dir) + + if cache_type and not isdir(join(self._cache_dir, cache_type)): + makedirs(join(self._cache_dir, cache_type)) + + return join(self._cache_dir, cache_type) if cache_type else self._cache_dir + + def get_cache_file_path(self, filename, cache_type=None): + return join(self.get_cache_dir(cache_type), filename) if filename else None + + def list_cache_files(self, cache_type=None): + path = self.get_cache_dir(cache_type) + return [file for file in listdir(path) if isfile(join(path, file))] + + def delete_cache_dir(self, cache_type=None): + if not cache_type and isdir(self._cache_dir): + rmtree(self._cache_dir) + if cache_type and isdir(join(self._cache_dir, cache_type)): + rmtree(join(self._cache_dir, cache_type)) + + def delete_cache_file(self, filename, cache_type=None): + if isfile(join(self.get_cache_dir(cache_type), filename)): + remove(join(self.get_cache_dir(cache_type), filename)) diff --git a/cps/helper.py b/cps/helper.py index d3420a11..271ab3e9 100644 --- a/cps/helper.py +++ b/cps/helper.py @@ -52,7 +52,7 @@ except ImportError: from . import calibre_db from .tasks.convert import TaskConvert -from . import logger, config, get_locale, db, thumbnails, ub +from . import logger, config, get_locale, db, fs, ub from . import gdriveutils as gd from .constants import STATIC_DIR as _STATIC_DIR from .subproc_wrapper import process_wait @@ -555,8 +555,9 @@ def get_book_cover_internal(book, use_generic_cover_on_failure, resolution=1, di if not disable_thumbnail: thumbnail = get_book_cover_thumbnail(book, resolution) if thumbnail: - if os.path.isfile(thumbnails.get_thumbnail_cache_path(thumbnail)): - return send_from_directory(thumbnails.get_thumbnail_cache_dir(), thumbnail.filename) + cache = fs.FileSystem() + if cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS): + return send_from_directory(cache.get_cache_dir(fs.CACHE_TYPE_THUMBNAILS), thumbnail.filename) # Send the book cover from Google Drive if configured if config.config_use_google_drive: diff --git a/cps/schedule.py b/cps/schedule.py index 5d2c94b9..5c658e41 100644 --- a/cps/schedule.py +++ b/cps/schedule.py @@ -18,9 +18,9 @@ from __future__ import division, print_function, unicode_literals -from . import logger +from . import config, db, logger, ub from .services.background_scheduler import BackgroundScheduler -from .tasks.thumbnail import TaskThumbnail +from .tasks.thumbnail import TaskCleanupCoverThumbnailCache, TaskGenerateCoverThumbnails log = logger.create() @@ -29,6 +29,16 @@ def register_jobs(): scheduler = BackgroundScheduler() # Generate 100 book cover thumbnails every 5 minutes - scheduler.add_task(user=None, task=lambda: TaskThumbnail(limit=100), trigger='interval', minutes=5) + scheduler.add_task(user=None, task=lambda: TaskGenerateCoverThumbnails(limit=100), trigger='interval', minutes=5) - # TODO: validate thumbnail scheduled task + # Cleanup book cover cache every day at 4am + scheduler.add_task(user=None, task=lambda: TaskCleanupCoverThumbnailCache(), trigger='cron', hour=4) + + # Reconnect metadata.db every 4 hours + scheduler.add(func=reconnect_db_job, trigger='interval', hours=4) + + +def reconnect_db_job(): + log.info('Running background task: reconnect to calibre database') + calibre_db = db.CalibreDB() + calibre_db.reconnect_db(config, ub.app_DB_path) diff --git a/cps/services/worker.py b/cps/services/worker.py index 072674a0..2b6816db 100644 --- a/cps/services/worker.py +++ b/cps/services/worker.py @@ -35,7 +35,6 @@ def _get_main_thread(): raise Exception("main thread not found?!") - class ImprovedQueue(queue.Queue): def to_list(self): """ @@ -45,7 +44,8 @@ class ImprovedQueue(queue.Queue): with self.mutex: return list(self.queue) -#Class for all worker tasks in the background + +# Class for all worker tasks in the background class WorkerThread(threading.Thread): _instance = None @@ -127,6 +127,10 @@ class WorkerThread(threading.Thread): # CalibreTask.start() should wrap all exceptions in it's own error handling item.task.start(self) + # remove self_cleanup tasks from list + if item.task.self_cleanup: + self.dequeued.remove(item) + self.queue.task_done() @@ -141,6 +145,7 @@ class CalibreTask: self.end_time = None self.message = message self.id = uuid.uuid4() + self.self_cleanup = False @abc.abstractmethod def run(self, worker_thread): @@ -209,6 +214,14 @@ class CalibreTask: # todo: throw error if outside of [0,1] self._progress = x + @property + def self_cleanup(self): + return self._self_cleanup + + @self_cleanup.setter + def self_cleanup(self, is_self_cleanup): + self._self_cleanup = is_self_cleanup + def _handleError(self, error_message): self.stat = STAT_FAIL self.progress = 1 diff --git a/cps/tasks/thumbnail.py b/cps/tasks/thumbnail.py index 4e0c6db4..f61eb4a7 100644 --- a/cps/tasks/thumbnail.py +++ b/cps/tasks/thumbnail.py @@ -19,13 +19,15 @@ from __future__ import division, print_function, unicode_literals import os -from cps import config, db, gdriveutils, logger, ub -from cps.constants import CACHE_DIR as _CACHE_DIR +from cps import config, db, fs, gdriveutils, logger, ub from cps.services.worker import CalibreTask -from cps.thumbnails import THUMBNAIL_RESOLUTION_1X, THUMBNAIL_RESOLUTION_2X from datetime import datetime, timedelta from sqlalchemy import func -from urllib.request import urlopen + +try: + from urllib.request import urlopen +except ImportError as e: + from urllib2 import urlopen try: from wand.image import Image @@ -33,73 +35,92 @@ try: except (ImportError, RuntimeError) as e: use_IM = False +THUMBNAIL_RESOLUTION_1X = 1 +THUMBNAIL_RESOLUTION_2X = 2 -class TaskThumbnail(CalibreTask): + +class TaskGenerateCoverThumbnails(CalibreTask): def __init__(self, limit=100, task_message=u'Generating cover thumbnails'): - super(TaskThumbnail, self).__init__(task_message) + super(TaskGenerateCoverThumbnails, self).__init__(task_message) + self.self_cleanup = True self.limit = limit self.log = logger.create() self.app_db_session = ub.get_new_session_instance() - self.worker_db = db.CalibreDB(expire_on_commit=False) + self.calibre_db = db.CalibreDB(expire_on_commit=False) + self.cache = fs.FileSystem() + self.resolutions = [ + THUMBNAIL_RESOLUTION_1X, + THUMBNAIL_RESOLUTION_2X + ] def run(self, worker_thread): - if self.worker_db.session and use_IM: - thumbnails = self.get_thumbnail_book_ids() - thumbnail_book_ids = list(map(lambda t: t.book_id, thumbnails)) + if self.calibre_db.session and use_IM: + expired_thumbnails = self.get_expired_thumbnails() + thumbnail_book_ids = self.get_thumbnail_book_ids() books_without_thumbnails = self.get_books_without_thumbnails(thumbnail_book_ids) count = len(books_without_thumbnails) for i, book in enumerate(books_without_thumbnails): - thumbnails = self.get_thumbnails_for_book(thumbnails, book) - if thumbnails: - for thumbnail in thumbnails: - self.update_book_thumbnail(book, thumbnail) - - else: - self.create_book_thumbnail(book, THUMBNAIL_RESOLUTION_1X) - self.create_book_thumbnail(book, THUMBNAIL_RESOLUTION_2X) + for resolution in self.resolutions: + expired_thumbnail = self.get_expired_thumbnail_for_book_and_resolution( + book, + resolution, + expired_thumbnails + ) + if expired_thumbnail: + self.update_book_thumbnail(book, expired_thumbnail) + else: + self.create_book_thumbnail(book, resolution) self.progress = (1.0 / count) * i self._handleSuccess() - self.app_db_session.close() + self.app_db_session.remove() + + def get_expired_thumbnails(self): + return self.app_db_session\ + .query(ub.Thumbnail)\ + .filter(ub.Thumbnail.expiration < datetime.utcnow())\ + .all() def get_thumbnail_book_ids(self): return self.app_db_session\ - .query(ub.Thumbnail)\ + .query(ub.Thumbnail.book_id)\ .group_by(ub.Thumbnail.book_id)\ .having(func.min(ub.Thumbnail.expiration) > datetime.utcnow())\ - .all() + .distinct() def get_books_without_thumbnails(self, thumbnail_book_ids): - return self.worker_db.session\ + return self.calibre_db.session\ .query(db.Books)\ .filter(db.Books.has_cover == 1)\ .filter(db.Books.id.notin_(thumbnail_book_ids))\ .limit(self.limit)\ .all() - def get_thumbnails_for_book(self, thumbnails, book): - results = list() - for thumbnail in thumbnails: - if thumbnail.book_id == book.id: - results.append(thumbnail) + def get_expired_thumbnail_for_book_and_resolution(self, book, resolution, expired_thumbnails): + for thumbnail in expired_thumbnails: + if thumbnail.book_id == book.id and thumbnail.resolution == resolution: + return thumbnail - return results + return None def update_book_thumbnail(self, book, thumbnail): + thumbnail.generated_at = datetime.utcnow() thumbnail.expiration = datetime.utcnow() + timedelta(days=30) try: self.app_db_session.commit() self.generate_book_thumbnail(book, thumbnail) except Exception as ex: + self.log.info(u'Error updating book thumbnail: ' + str(ex)) self._handleError(u'Error updating book thumbnail: ' + str(ex)) self.app_db_session.rollback() def create_book_thumbnail(self, book, resolution): thumbnail = ub.Thumbnail() thumbnail.book_id = book.id + thumbnail.format = 'jpeg' thumbnail.resolution = resolution self.app_db_session.add(thumbnail) @@ -107,6 +128,7 @@ class TaskThumbnail(CalibreTask): self.app_db_session.commit() self.generate_book_thumbnail(book, thumbnail) except Exception as ex: + self.log.info(u'Error creating book thumbnail: ' + str(ex)) self._handleError(u'Error creating book thumbnail: ' + str(ex)) self.app_db_session.rollback() @@ -128,9 +150,12 @@ class TaskThumbnail(CalibreTask): if img.height > height: width = self.get_thumbnail_width(height, img) img.resize(width=width, height=height, filter='lanczos') - img.save(filename=self.get_thumbnail_cache_path(thumbnail)) + img.format = thumbnail.format + filename = self.cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS) + img.save(filename=filename) except Exception as ex: # Bubble exception to calling function + self.log.info(u'Error generating thumbnail file: ' + str(ex)) raise ex finally: stream.close() @@ -144,7 +169,9 @@ class TaskThumbnail(CalibreTask): if img.height > height: width = self.get_thumbnail_width(height, img) img.resize(width=width, height=height, filter='lanczos') - img.save(filename=self.get_thumbnail_cache_path(thumbnail)) + img.format = thumbnail.format + filename = self.cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS) + img.save(filename=filename) def get_thumbnail_height(self, thumbnail): return int(225 * thumbnail.resolution) @@ -153,20 +180,88 @@ class TaskThumbnail(CalibreTask): percent = (height / float(img.height)) return int((float(img.width) * float(percent))) - def get_thumbnail_cache_dir(self): - if not os.path.isdir(_CACHE_DIR): - os.makedirs(_CACHE_DIR) + @property + def name(self): + return "GenerateCoverThumbnails" - if not os.path.isdir(os.path.join(_CACHE_DIR, 'thumbnails')): - os.makedirs(os.path.join(_CACHE_DIR, 'thumbnails')) - return os.path.join(_CACHE_DIR, 'thumbnails') +class TaskCleanupCoverThumbnailCache(CalibreTask): + def __init__(self, task_message=u'Validating cover thumbnail cache'): + super(TaskCleanupCoverThumbnailCache, self).__init__(task_message) + self.log = logger.create() + self.app_db_session = ub.get_new_session_instance() + self.calibre_db = db.CalibreDB(expire_on_commit=False) + self.cache = fs.FileSystem() - def get_thumbnail_cache_path(self, thumbnail): - if thumbnail: - return os.path.join(self.get_thumbnail_cache_dir(), thumbnail.filename) - return None + def run(self, worker_thread): + cached_thumbnail_files = self.cache.list_cache_files(fs.CACHE_TYPE_THUMBNAILS) + + # Expire thumbnails in the database if the cached file is missing + # This case will happen if a user deletes the cache dir or cached files + if self.app_db_session: + self.expire_missing_thumbnails(cached_thumbnail_files) + self.progress = 0.33 + + # Delete thumbnails in the database if the book has been removed + # This case will happen if a book is removed in Calibre and the metadata.db file is updated in the filesystem + if self.app_db_session and self.calibre_db: + book_ids = self.get_book_ids() + self.delete_thumbnails_for_missing_books(book_ids) + self.progress = 0.66 + + # Delete extraneous cached thumbnail files + # This case will happen if a book was deleted and the thumbnail OR the metadata.db file was changed externally + if self.app_db_session: + db_thumbnail_files = self.get_thumbnail_filenames() + self.delete_extraneous_thumbnail_files(cached_thumbnail_files, db_thumbnail_files) + + self._handleSuccess() + self.app_db_session.remove() + + def expire_missing_thumbnails(self, filenames): + try: + self.app_db_session\ + .query(ub.Thumbnail)\ + .filter(ub.Thumbnail.filename.notin_(filenames))\ + .update({"expiration": datetime.utcnow()}, synchronize_session=False) + self.app_db_session.commit() + except Exception as ex: + self.log.info(u'Error expiring thumbnails for missing cache files: ' + str(ex)) + self._handleError(u'Error expiring thumbnails for missing cache files: ' + str(ex)) + self.app_db_session.rollback() + + def get_book_ids(self): + results = self.calibre_db.session\ + .query(db.Books.id)\ + .filter(db.Books.has_cover == 1)\ + .distinct() + + return [value for value, in results] + + def delete_thumbnails_for_missing_books(self, book_ids): + try: + self.app_db_session\ + .query(ub.Thumbnail)\ + .filter(ub.Thumbnail.book_id.notin_(book_ids))\ + .delete(synchronize_session=False) + self.app_db_session.commit() + except Exception as ex: + self.log.info(str(ex)) + self._handleError(u'Error deleting thumbnails for missing books: ' + str(ex)) + self.app_db_session.rollback() + + def get_thumbnail_filenames(self): + results = self.app_db_session\ + .query(ub.Thumbnail.filename)\ + .all() + + return [thumbnail for thumbnail, in results] + + def delete_extraneous_thumbnail_files(self, cached_thumbnail_files, db_thumbnail_files): + extraneous_files = list(set(cached_thumbnail_files).difference(db_thumbnail_files)) + for file in extraneous_files: + self.cache.delete_cache_file(file, fs.CACHE_TYPE_THUMBNAILS) @property def name(self): - return "Thumbnail" + return "CleanupCoverThumbnailCache" diff --git a/cps/thumbnails.py b/cps/thumbnails.py deleted file mode 100644 index ea7aac86..00000000 --- a/cps/thumbnails.py +++ /dev/null @@ -1,54 +0,0 @@ -# -*- coding: utf-8 -*- - -# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web) -# Copyright (C) 2020 mmonkey -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -from __future__ import division, print_function, unicode_literals -import os - -from . import logger, ub -from .constants import CACHE_DIR as _CACHE_DIR - -from datetime import datetime - -THUMBNAIL_RESOLUTION_1X = 1 -THUMBNAIL_RESOLUTION_2X = 2 - -log = logger.create() - - -def get_thumbnail_cache_dir(): - if not os.path.isdir(_CACHE_DIR): - os.makedirs(_CACHE_DIR) - if not os.path.isdir(os.path.join(_CACHE_DIR, 'thumbnails')): - os.makedirs(os.path.join(_CACHE_DIR, 'thumbnails')) - return os.path.join(_CACHE_DIR, 'thumbnails') - - -def get_thumbnail_cache_path(thumbnail): - if thumbnail: - return os.path.join(get_thumbnail_cache_dir(), thumbnail.filename) - return None - - -def cover_thumbnail_exists_for_book(book): - if book and book.has_cover: - thumbnail = ub.session.query(ub.Thumbnail).filter(ub.Thumbnail.book_id == book.id).first() - if thumbnail and thumbnail.expiration > datetime.utcnow(): - thumbnail_path = get_thumbnail_cache_path(thumbnail) - return thumbnail_path and os.path.isfile(thumbnail_path) - - return False diff --git a/cps/ub.py b/cps/ub.py index 0b5a65e7..30abd728 100644 --- a/cps/ub.py +++ b/cps/ub.py @@ -18,6 +18,7 @@ # along with this program. If not, see . from __future__ import division, print_function, unicode_literals +import atexit import os import sys import datetime @@ -42,12 +43,11 @@ from sqlalchemy import create_engine, exc, exists, event from sqlalchemy import Column, ForeignKey from sqlalchemy import String, Integer, SmallInteger, Boolean, DateTime, Float, JSON from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.ext.hybrid import hybrid_property from sqlalchemy.orm.attributes import flag_modified from sqlalchemy.orm import backref, relationship, sessionmaker, Session, scoped_session from werkzeug.security import generate_password_hash -from . import cli, constants +from . import cli, constants, logger session = None @@ -435,6 +435,14 @@ class RemoteAuthToken(Base): return '' % self.id +def filename(context): + file_format = context.get_current_parameters()['format'] + if file_format == 'jpeg': + return context.get_current_parameters()['uuid'] + '.jpg' + else: + return context.get_current_parameters()['uuid'] + '.' + file_format + + class Thumbnail(Base): __tablename__ = 'thumbnail' @@ -443,19 +451,10 @@ class Thumbnail(Base): uuid = Column(String, default=lambda: str(uuid.uuid4()), unique=True) format = Column(String, default='jpeg') resolution = Column(SmallInteger, default=1) + filename = Column(String, default=filename) + generated_at = Column(DateTime, default=lambda: datetime.datetime.utcnow()) expiration = Column(DateTime, default=lambda: datetime.datetime.utcnow() + datetime.timedelta(days=30)) - @hybrid_property - def extension(self): - if self.format == 'jpeg': - return 'jpg' - else: - return self.format - - @hybrid_property - def filename(self): - return self.uuid + '.' + self.extension - # Migrate database to current version, has to be updated after every database change. Currently migration from # everywhere to current should work. Migration is done by checking if relevant columns are existing, and than adding @@ -705,6 +704,9 @@ def get_new_session_instance(): new_engine = create_engine(u'sqlite:///{0}'.format(cli.settingspath), echo=False) new_session = scoped_session(sessionmaker()) new_session.configure(bind=new_engine) + + atexit.register(lambda: new_session.remove() if new_session else True) + return new_session