fixed thumbnail generate tasks, added thumbnail cleanup task, added reconnect db scheduled job

This commit is contained in:
mmonkey 2020-12-22 17:49:21 -06:00
parent e48bdf9d5a
commit 541fc7e14e
7 changed files with 245 additions and 117 deletions

61
cps/fs.py Normal file
View File

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
# Copyright (C) 2020 mmonkey
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division, print_function, unicode_literals
from .constants import CACHE_DIR
from os import listdir, makedirs, remove
from os.path import isdir, isfile, join
from shutil import rmtree
CACHE_TYPE_THUMBNAILS = 'thumbnails'
class FileSystem:
_instance = None
_cache_dir = CACHE_DIR
def __new__(cls):
if cls._instance is None:
cls._instance = super(FileSystem, cls).__new__(cls)
return cls._instance
def get_cache_dir(self, cache_type=None):
if not isdir(self._cache_dir):
makedirs(self._cache_dir)
if cache_type and not isdir(join(self._cache_dir, cache_type)):
makedirs(join(self._cache_dir, cache_type))
return join(self._cache_dir, cache_type) if cache_type else self._cache_dir
def get_cache_file_path(self, filename, cache_type=None):
return join(self.get_cache_dir(cache_type), filename) if filename else None
def list_cache_files(self, cache_type=None):
path = self.get_cache_dir(cache_type)
return [file for file in listdir(path) if isfile(join(path, file))]
def delete_cache_dir(self, cache_type=None):
if not cache_type and isdir(self._cache_dir):
rmtree(self._cache_dir)
if cache_type and isdir(join(self._cache_dir, cache_type)):
rmtree(join(self._cache_dir, cache_type))
def delete_cache_file(self, filename, cache_type=None):
if isfile(join(self.get_cache_dir(cache_type), filename)):
remove(join(self.get_cache_dir(cache_type), filename))

View File

@ -52,7 +52,7 @@ except ImportError:
from . import calibre_db
from .tasks.convert import TaskConvert
from . import logger, config, get_locale, db, thumbnails, ub
from . import logger, config, get_locale, db, fs, ub
from . import gdriveutils as gd
from .constants import STATIC_DIR as _STATIC_DIR
from .subproc_wrapper import process_wait
@ -555,8 +555,9 @@ def get_book_cover_internal(book, use_generic_cover_on_failure, resolution=1, di
if not disable_thumbnail:
thumbnail = get_book_cover_thumbnail(book, resolution)
if thumbnail:
if os.path.isfile(thumbnails.get_thumbnail_cache_path(thumbnail)):
return send_from_directory(thumbnails.get_thumbnail_cache_dir(), thumbnail.filename)
cache = fs.FileSystem()
if cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS):
return send_from_directory(cache.get_cache_dir(fs.CACHE_TYPE_THUMBNAILS), thumbnail.filename)
# Send the book cover from Google Drive if configured
if config.config_use_google_drive:

View File

@ -18,9 +18,9 @@
from __future__ import division, print_function, unicode_literals
from . import logger
from . import config, db, logger, ub
from .services.background_scheduler import BackgroundScheduler
from .tasks.thumbnail import TaskThumbnail
from .tasks.thumbnail import TaskCleanupCoverThumbnailCache, TaskGenerateCoverThumbnails
log = logger.create()
@ -29,6 +29,16 @@ def register_jobs():
scheduler = BackgroundScheduler()
# Generate 100 book cover thumbnails every 5 minutes
scheduler.add_task(user=None, task=lambda: TaskThumbnail(limit=100), trigger='interval', minutes=5)
scheduler.add_task(user=None, task=lambda: TaskGenerateCoverThumbnails(limit=100), trigger='interval', minutes=5)
# TODO: validate thumbnail scheduled task
# Cleanup book cover cache every day at 4am
scheduler.add_task(user=None, task=lambda: TaskCleanupCoverThumbnailCache(), trigger='cron', hour=4)
# Reconnect metadata.db every 4 hours
scheduler.add(func=reconnect_db_job, trigger='interval', hours=4)
def reconnect_db_job():
log.info('Running background task: reconnect to calibre database')
calibre_db = db.CalibreDB()
calibre_db.reconnect_db(config, ub.app_DB_path)

View File

@ -35,7 +35,6 @@ def _get_main_thread():
raise Exception("main thread not found?!")
class ImprovedQueue(queue.Queue):
def to_list(self):
"""
@ -45,6 +44,7 @@ class ImprovedQueue(queue.Queue):
with self.mutex:
return list(self.queue)
# Class for all worker tasks in the background
class WorkerThread(threading.Thread):
_instance = None
@ -127,6 +127,10 @@ class WorkerThread(threading.Thread):
# CalibreTask.start() should wrap all exceptions in it's own error handling
item.task.start(self)
# remove self_cleanup tasks from list
if item.task.self_cleanup:
self.dequeued.remove(item)
self.queue.task_done()
@ -141,6 +145,7 @@ class CalibreTask:
self.end_time = None
self.message = message
self.id = uuid.uuid4()
self.self_cleanup = False
@abc.abstractmethod
def run(self, worker_thread):
@ -209,6 +214,14 @@ class CalibreTask:
# todo: throw error if outside of [0,1]
self._progress = x
@property
def self_cleanup(self):
return self._self_cleanup
@self_cleanup.setter
def self_cleanup(self, is_self_cleanup):
self._self_cleanup = is_self_cleanup
def _handleError(self, error_message):
self.stat = STAT_FAIL
self.progress = 1

View File

@ -19,13 +19,15 @@
from __future__ import division, print_function, unicode_literals
import os
from cps import config, db, gdriveutils, logger, ub
from cps.constants import CACHE_DIR as _CACHE_DIR
from cps import config, db, fs, gdriveutils, logger, ub
from cps.services.worker import CalibreTask
from cps.thumbnails import THUMBNAIL_RESOLUTION_1X, THUMBNAIL_RESOLUTION_2X
from datetime import datetime, timedelta
from sqlalchemy import func
try:
from urllib.request import urlopen
except ImportError as e:
from urllib2 import urlopen
try:
from wand.image import Image
@ -33,73 +35,92 @@ try:
except (ImportError, RuntimeError) as e:
use_IM = False
THUMBNAIL_RESOLUTION_1X = 1
THUMBNAIL_RESOLUTION_2X = 2
class TaskThumbnail(CalibreTask):
class TaskGenerateCoverThumbnails(CalibreTask):
def __init__(self, limit=100, task_message=u'Generating cover thumbnails'):
super(TaskThumbnail, self).__init__(task_message)
super(TaskGenerateCoverThumbnails, self).__init__(task_message)
self.self_cleanup = True
self.limit = limit
self.log = logger.create()
self.app_db_session = ub.get_new_session_instance()
self.worker_db = db.CalibreDB(expire_on_commit=False)
self.calibre_db = db.CalibreDB(expire_on_commit=False)
self.cache = fs.FileSystem()
self.resolutions = [
THUMBNAIL_RESOLUTION_1X,
THUMBNAIL_RESOLUTION_2X
]
def run(self, worker_thread):
if self.worker_db.session and use_IM:
thumbnails = self.get_thumbnail_book_ids()
thumbnail_book_ids = list(map(lambda t: t.book_id, thumbnails))
if self.calibre_db.session and use_IM:
expired_thumbnails = self.get_expired_thumbnails()
thumbnail_book_ids = self.get_thumbnail_book_ids()
books_without_thumbnails = self.get_books_without_thumbnails(thumbnail_book_ids)
count = len(books_without_thumbnails)
for i, book in enumerate(books_without_thumbnails):
thumbnails = self.get_thumbnails_for_book(thumbnails, book)
if thumbnails:
for thumbnail in thumbnails:
self.update_book_thumbnail(book, thumbnail)
for resolution in self.resolutions:
expired_thumbnail = self.get_expired_thumbnail_for_book_and_resolution(
book,
resolution,
expired_thumbnails
)
if expired_thumbnail:
self.update_book_thumbnail(book, expired_thumbnail)
else:
self.create_book_thumbnail(book, THUMBNAIL_RESOLUTION_1X)
self.create_book_thumbnail(book, THUMBNAIL_RESOLUTION_2X)
self.create_book_thumbnail(book, resolution)
self.progress = (1.0 / count) * i
self._handleSuccess()
self.app_db_session.close()
self.app_db_session.remove()
def get_expired_thumbnails(self):
return self.app_db_session\
.query(ub.Thumbnail)\
.filter(ub.Thumbnail.expiration < datetime.utcnow())\
.all()
def get_thumbnail_book_ids(self):
return self.app_db_session\
.query(ub.Thumbnail)\
.query(ub.Thumbnail.book_id)\
.group_by(ub.Thumbnail.book_id)\
.having(func.min(ub.Thumbnail.expiration) > datetime.utcnow())\
.all()
.distinct()
def get_books_without_thumbnails(self, thumbnail_book_ids):
return self.worker_db.session\
return self.calibre_db.session\
.query(db.Books)\
.filter(db.Books.has_cover == 1)\
.filter(db.Books.id.notin_(thumbnail_book_ids))\
.limit(self.limit)\
.all()
def get_thumbnails_for_book(self, thumbnails, book):
results = list()
for thumbnail in thumbnails:
if thumbnail.book_id == book.id:
results.append(thumbnail)
def get_expired_thumbnail_for_book_and_resolution(self, book, resolution, expired_thumbnails):
for thumbnail in expired_thumbnails:
if thumbnail.book_id == book.id and thumbnail.resolution == resolution:
return thumbnail
return results
return None
def update_book_thumbnail(self, book, thumbnail):
thumbnail.generated_at = datetime.utcnow()
thumbnail.expiration = datetime.utcnow() + timedelta(days=30)
try:
self.app_db_session.commit()
self.generate_book_thumbnail(book, thumbnail)
except Exception as ex:
self.log.info(u'Error updating book thumbnail: ' + str(ex))
self._handleError(u'Error updating book thumbnail: ' + str(ex))
self.app_db_session.rollback()
def create_book_thumbnail(self, book, resolution):
thumbnail = ub.Thumbnail()
thumbnail.book_id = book.id
thumbnail.format = 'jpeg'
thumbnail.resolution = resolution
self.app_db_session.add(thumbnail)
@ -107,6 +128,7 @@ class TaskThumbnail(CalibreTask):
self.app_db_session.commit()
self.generate_book_thumbnail(book, thumbnail)
except Exception as ex:
self.log.info(u'Error creating book thumbnail: ' + str(ex))
self._handleError(u'Error creating book thumbnail: ' + str(ex))
self.app_db_session.rollback()
@ -128,9 +150,12 @@ class TaskThumbnail(CalibreTask):
if img.height > height:
width = self.get_thumbnail_width(height, img)
img.resize(width=width, height=height, filter='lanczos')
img.save(filename=self.get_thumbnail_cache_path(thumbnail))
img.format = thumbnail.format
filename = self.cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS)
img.save(filename=filename)
except Exception as ex:
# Bubble exception to calling function
self.log.info(u'Error generating thumbnail file: ' + str(ex))
raise ex
finally:
stream.close()
@ -144,7 +169,9 @@ class TaskThumbnail(CalibreTask):
if img.height > height:
width = self.get_thumbnail_width(height, img)
img.resize(width=width, height=height, filter='lanczos')
img.save(filename=self.get_thumbnail_cache_path(thumbnail))
img.format = thumbnail.format
filename = self.cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS)
img.save(filename=filename)
def get_thumbnail_height(self, thumbnail):
return int(225 * thumbnail.resolution)
@ -153,20 +180,88 @@ class TaskThumbnail(CalibreTask):
percent = (height / float(img.height))
return int((float(img.width) * float(percent)))
def get_thumbnail_cache_dir(self):
if not os.path.isdir(_CACHE_DIR):
os.makedirs(_CACHE_DIR)
@property
def name(self):
return "GenerateCoverThumbnails"
if not os.path.isdir(os.path.join(_CACHE_DIR, 'thumbnails')):
os.makedirs(os.path.join(_CACHE_DIR, 'thumbnails'))
return os.path.join(_CACHE_DIR, 'thumbnails')
class TaskCleanupCoverThumbnailCache(CalibreTask):
def __init__(self, task_message=u'Validating cover thumbnail cache'):
super(TaskCleanupCoverThumbnailCache, self).__init__(task_message)
self.log = logger.create()
self.app_db_session = ub.get_new_session_instance()
self.calibre_db = db.CalibreDB(expire_on_commit=False)
self.cache = fs.FileSystem()
def get_thumbnail_cache_path(self, thumbnail):
if thumbnail:
return os.path.join(self.get_thumbnail_cache_dir(), thumbnail.filename)
return None
def run(self, worker_thread):
cached_thumbnail_files = self.cache.list_cache_files(fs.CACHE_TYPE_THUMBNAILS)
# Expire thumbnails in the database if the cached file is missing
# This case will happen if a user deletes the cache dir or cached files
if self.app_db_session:
self.expire_missing_thumbnails(cached_thumbnail_files)
self.progress = 0.33
# Delete thumbnails in the database if the book has been removed
# This case will happen if a book is removed in Calibre and the metadata.db file is updated in the filesystem
if self.app_db_session and self.calibre_db:
book_ids = self.get_book_ids()
self.delete_thumbnails_for_missing_books(book_ids)
self.progress = 0.66
# Delete extraneous cached thumbnail files
# This case will happen if a book was deleted and the thumbnail OR the metadata.db file was changed externally
if self.app_db_session:
db_thumbnail_files = self.get_thumbnail_filenames()
self.delete_extraneous_thumbnail_files(cached_thumbnail_files, db_thumbnail_files)
self._handleSuccess()
self.app_db_session.remove()
def expire_missing_thumbnails(self, filenames):
try:
self.app_db_session\
.query(ub.Thumbnail)\
.filter(ub.Thumbnail.filename.notin_(filenames))\
.update({"expiration": datetime.utcnow()}, synchronize_session=False)
self.app_db_session.commit()
except Exception as ex:
self.log.info(u'Error expiring thumbnails for missing cache files: ' + str(ex))
self._handleError(u'Error expiring thumbnails for missing cache files: ' + str(ex))
self.app_db_session.rollback()
def get_book_ids(self):
results = self.calibre_db.session\
.query(db.Books.id)\
.filter(db.Books.has_cover == 1)\
.distinct()
return [value for value, in results]
def delete_thumbnails_for_missing_books(self, book_ids):
try:
self.app_db_session\
.query(ub.Thumbnail)\
.filter(ub.Thumbnail.book_id.notin_(book_ids))\
.delete(synchronize_session=False)
self.app_db_session.commit()
except Exception as ex:
self.log.info(str(ex))
self._handleError(u'Error deleting thumbnails for missing books: ' + str(ex))
self.app_db_session.rollback()
def get_thumbnail_filenames(self):
results = self.app_db_session\
.query(ub.Thumbnail.filename)\
.all()
return [thumbnail for thumbnail, in results]
def delete_extraneous_thumbnail_files(self, cached_thumbnail_files, db_thumbnail_files):
extraneous_files = list(set(cached_thumbnail_files).difference(db_thumbnail_files))
for file in extraneous_files:
self.cache.delete_cache_file(file, fs.CACHE_TYPE_THUMBNAILS)
@property
def name(self):
return "Thumbnail"
return "CleanupCoverThumbnailCache"

View File

@ -1,54 +0,0 @@
# -*- coding: utf-8 -*-
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
# Copyright (C) 2020 mmonkey
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division, print_function, unicode_literals
import os
from . import logger, ub
from .constants import CACHE_DIR as _CACHE_DIR
from datetime import datetime
THUMBNAIL_RESOLUTION_1X = 1
THUMBNAIL_RESOLUTION_2X = 2
log = logger.create()
def get_thumbnail_cache_dir():
if not os.path.isdir(_CACHE_DIR):
os.makedirs(_CACHE_DIR)
if not os.path.isdir(os.path.join(_CACHE_DIR, 'thumbnails')):
os.makedirs(os.path.join(_CACHE_DIR, 'thumbnails'))
return os.path.join(_CACHE_DIR, 'thumbnails')
def get_thumbnail_cache_path(thumbnail):
if thumbnail:
return os.path.join(get_thumbnail_cache_dir(), thumbnail.filename)
return None
def cover_thumbnail_exists_for_book(book):
if book and book.has_cover:
thumbnail = ub.session.query(ub.Thumbnail).filter(ub.Thumbnail.book_id == book.id).first()
if thumbnail and thumbnail.expiration > datetime.utcnow():
thumbnail_path = get_thumbnail_cache_path(thumbnail)
return thumbnail_path and os.path.isfile(thumbnail_path)
return False

View File

@ -18,6 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division, print_function, unicode_literals
import atexit
import os
import sys
import datetime
@ -42,12 +43,11 @@ from sqlalchemy import create_engine, exc, exists, event
from sqlalchemy import Column, ForeignKey
from sqlalchemy import String, Integer, SmallInteger, Boolean, DateTime, Float, JSON
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy.orm import backref, relationship, sessionmaker, Session, scoped_session
from werkzeug.security import generate_password_hash
from . import cli, constants
from . import cli, constants, logger
session = None
@ -435,6 +435,14 @@ class RemoteAuthToken(Base):
return '<Token %r>' % self.id
def filename(context):
file_format = context.get_current_parameters()['format']
if file_format == 'jpeg':
return context.get_current_parameters()['uuid'] + '.jpg'
else:
return context.get_current_parameters()['uuid'] + '.' + file_format
class Thumbnail(Base):
__tablename__ = 'thumbnail'
@ -443,19 +451,10 @@ class Thumbnail(Base):
uuid = Column(String, default=lambda: str(uuid.uuid4()), unique=True)
format = Column(String, default='jpeg')
resolution = Column(SmallInteger, default=1)
filename = Column(String, default=filename)
generated_at = Column(DateTime, default=lambda: datetime.datetime.utcnow())
expiration = Column(DateTime, default=lambda: datetime.datetime.utcnow() + datetime.timedelta(days=30))
@hybrid_property
def extension(self):
if self.format == 'jpeg':
return 'jpg'
else:
return self.format
@hybrid_property
def filename(self):
return self.uuid + '.' + self.extension
# Migrate database to current version, has to be updated after every database change. Currently migration from
# everywhere to current should work. Migration is done by checking if relevant columns are existing, and than adding
@ -705,6 +704,9 @@ def get_new_session_instance():
new_engine = create_engine(u'sqlite:///{0}'.format(cli.settingspath), echo=False)
new_session = scoped_session(sessionmaker())
new_session.configure(bind=new_engine)
atexit.register(lambda: new_session.remove() if new_session else True)
return new_session