fixed thumbnail generate tasks, added thumbnail cleanup task, added reconnect db scheduled job

This commit is contained in:
mmonkey 2020-12-22 17:49:21 -06:00
parent e48bdf9d5a
commit 541fc7e14e
7 changed files with 245 additions and 117 deletions

61
cps/fs.py Normal file
View File

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
# Copyright (C) 2020 mmonkey
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division, print_function, unicode_literals
from .constants import CACHE_DIR
from os import listdir, makedirs, remove
from os.path import isdir, isfile, join
from shutil import rmtree
CACHE_TYPE_THUMBNAILS = 'thumbnails'
class FileSystem:
_instance = None
_cache_dir = CACHE_DIR
def __new__(cls):
if cls._instance is None:
cls._instance = super(FileSystem, cls).__new__(cls)
return cls._instance
def get_cache_dir(self, cache_type=None):
if not isdir(self._cache_dir):
makedirs(self._cache_dir)
if cache_type and not isdir(join(self._cache_dir, cache_type)):
makedirs(join(self._cache_dir, cache_type))
return join(self._cache_dir, cache_type) if cache_type else self._cache_dir
def get_cache_file_path(self, filename, cache_type=None):
return join(self.get_cache_dir(cache_type), filename) if filename else None
def list_cache_files(self, cache_type=None):
path = self.get_cache_dir(cache_type)
return [file for file in listdir(path) if isfile(join(path, file))]
def delete_cache_dir(self, cache_type=None):
if not cache_type and isdir(self._cache_dir):
rmtree(self._cache_dir)
if cache_type and isdir(join(self._cache_dir, cache_type)):
rmtree(join(self._cache_dir, cache_type))
def delete_cache_file(self, filename, cache_type=None):
if isfile(join(self.get_cache_dir(cache_type), filename)):
remove(join(self.get_cache_dir(cache_type), filename))

View File

@ -52,7 +52,7 @@ except ImportError:
from . import calibre_db from . import calibre_db
from .tasks.convert import TaskConvert from .tasks.convert import TaskConvert
from . import logger, config, get_locale, db, thumbnails, ub from . import logger, config, get_locale, db, fs, ub
from . import gdriveutils as gd from . import gdriveutils as gd
from .constants import STATIC_DIR as _STATIC_DIR from .constants import STATIC_DIR as _STATIC_DIR
from .subproc_wrapper import process_wait from .subproc_wrapper import process_wait
@ -555,8 +555,9 @@ def get_book_cover_internal(book, use_generic_cover_on_failure, resolution=1, di
if not disable_thumbnail: if not disable_thumbnail:
thumbnail = get_book_cover_thumbnail(book, resolution) thumbnail = get_book_cover_thumbnail(book, resolution)
if thumbnail: if thumbnail:
if os.path.isfile(thumbnails.get_thumbnail_cache_path(thumbnail)): cache = fs.FileSystem()
return send_from_directory(thumbnails.get_thumbnail_cache_dir(), thumbnail.filename) if cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS):
return send_from_directory(cache.get_cache_dir(fs.CACHE_TYPE_THUMBNAILS), thumbnail.filename)
# Send the book cover from Google Drive if configured # Send the book cover from Google Drive if configured
if config.config_use_google_drive: if config.config_use_google_drive:

View File

@ -18,9 +18,9 @@
from __future__ import division, print_function, unicode_literals from __future__ import division, print_function, unicode_literals
from . import logger from . import config, db, logger, ub
from .services.background_scheduler import BackgroundScheduler from .services.background_scheduler import BackgroundScheduler
from .tasks.thumbnail import TaskThumbnail from .tasks.thumbnail import TaskCleanupCoverThumbnailCache, TaskGenerateCoverThumbnails
log = logger.create() log = logger.create()
@ -29,6 +29,16 @@ def register_jobs():
scheduler = BackgroundScheduler() scheduler = BackgroundScheduler()
# Generate 100 book cover thumbnails every 5 minutes # Generate 100 book cover thumbnails every 5 minutes
scheduler.add_task(user=None, task=lambda: TaskThumbnail(limit=100), trigger='interval', minutes=5) scheduler.add_task(user=None, task=lambda: TaskGenerateCoverThumbnails(limit=100), trigger='interval', minutes=5)
# TODO: validate thumbnail scheduled task # Cleanup book cover cache every day at 4am
scheduler.add_task(user=None, task=lambda: TaskCleanupCoverThumbnailCache(), trigger='cron', hour=4)
# Reconnect metadata.db every 4 hours
scheduler.add(func=reconnect_db_job, trigger='interval', hours=4)
def reconnect_db_job():
log.info('Running background task: reconnect to calibre database')
calibre_db = db.CalibreDB()
calibre_db.reconnect_db(config, ub.app_DB_path)

View File

@ -35,7 +35,6 @@ def _get_main_thread():
raise Exception("main thread not found?!") raise Exception("main thread not found?!")
class ImprovedQueue(queue.Queue): class ImprovedQueue(queue.Queue):
def to_list(self): def to_list(self):
""" """
@ -45,7 +44,8 @@ class ImprovedQueue(queue.Queue):
with self.mutex: with self.mutex:
return list(self.queue) return list(self.queue)
#Class for all worker tasks in the background
# Class for all worker tasks in the background
class WorkerThread(threading.Thread): class WorkerThread(threading.Thread):
_instance = None _instance = None
@ -127,6 +127,10 @@ class WorkerThread(threading.Thread):
# CalibreTask.start() should wrap all exceptions in it's own error handling # CalibreTask.start() should wrap all exceptions in it's own error handling
item.task.start(self) item.task.start(self)
# remove self_cleanup tasks from list
if item.task.self_cleanup:
self.dequeued.remove(item)
self.queue.task_done() self.queue.task_done()
@ -141,6 +145,7 @@ class CalibreTask:
self.end_time = None self.end_time = None
self.message = message self.message = message
self.id = uuid.uuid4() self.id = uuid.uuid4()
self.self_cleanup = False
@abc.abstractmethod @abc.abstractmethod
def run(self, worker_thread): def run(self, worker_thread):
@ -209,6 +214,14 @@ class CalibreTask:
# todo: throw error if outside of [0,1] # todo: throw error if outside of [0,1]
self._progress = x self._progress = x
@property
def self_cleanup(self):
return self._self_cleanup
@self_cleanup.setter
def self_cleanup(self, is_self_cleanup):
self._self_cleanup = is_self_cleanup
def _handleError(self, error_message): def _handleError(self, error_message):
self.stat = STAT_FAIL self.stat = STAT_FAIL
self.progress = 1 self.progress = 1

View File

@ -19,13 +19,15 @@
from __future__ import division, print_function, unicode_literals from __future__ import division, print_function, unicode_literals
import os import os
from cps import config, db, gdriveutils, logger, ub from cps import config, db, fs, gdriveutils, logger, ub
from cps.constants import CACHE_DIR as _CACHE_DIR
from cps.services.worker import CalibreTask from cps.services.worker import CalibreTask
from cps.thumbnails import THUMBNAIL_RESOLUTION_1X, THUMBNAIL_RESOLUTION_2X
from datetime import datetime, timedelta from datetime import datetime, timedelta
from sqlalchemy import func from sqlalchemy import func
from urllib.request import urlopen
try:
from urllib.request import urlopen
except ImportError as e:
from urllib2 import urlopen
try: try:
from wand.image import Image from wand.image import Image
@ -33,73 +35,92 @@ try:
except (ImportError, RuntimeError) as e: except (ImportError, RuntimeError) as e:
use_IM = False use_IM = False
THUMBNAIL_RESOLUTION_1X = 1
THUMBNAIL_RESOLUTION_2X = 2
class TaskThumbnail(CalibreTask):
class TaskGenerateCoverThumbnails(CalibreTask):
def __init__(self, limit=100, task_message=u'Generating cover thumbnails'): def __init__(self, limit=100, task_message=u'Generating cover thumbnails'):
super(TaskThumbnail, self).__init__(task_message) super(TaskGenerateCoverThumbnails, self).__init__(task_message)
self.self_cleanup = True
self.limit = limit self.limit = limit
self.log = logger.create() self.log = logger.create()
self.app_db_session = ub.get_new_session_instance() self.app_db_session = ub.get_new_session_instance()
self.worker_db = db.CalibreDB(expire_on_commit=False) self.calibre_db = db.CalibreDB(expire_on_commit=False)
self.cache = fs.FileSystem()
self.resolutions = [
THUMBNAIL_RESOLUTION_1X,
THUMBNAIL_RESOLUTION_2X
]
def run(self, worker_thread): def run(self, worker_thread):
if self.worker_db.session and use_IM: if self.calibre_db.session and use_IM:
thumbnails = self.get_thumbnail_book_ids() expired_thumbnails = self.get_expired_thumbnails()
thumbnail_book_ids = list(map(lambda t: t.book_id, thumbnails)) thumbnail_book_ids = self.get_thumbnail_book_ids()
books_without_thumbnails = self.get_books_without_thumbnails(thumbnail_book_ids) books_without_thumbnails = self.get_books_without_thumbnails(thumbnail_book_ids)
count = len(books_without_thumbnails) count = len(books_without_thumbnails)
for i, book in enumerate(books_without_thumbnails): for i, book in enumerate(books_without_thumbnails):
thumbnails = self.get_thumbnails_for_book(thumbnails, book) for resolution in self.resolutions:
if thumbnails: expired_thumbnail = self.get_expired_thumbnail_for_book_and_resolution(
for thumbnail in thumbnails: book,
self.update_book_thumbnail(book, thumbnail) resolution,
expired_thumbnails
else: )
self.create_book_thumbnail(book, THUMBNAIL_RESOLUTION_1X) if expired_thumbnail:
self.create_book_thumbnail(book, THUMBNAIL_RESOLUTION_2X) self.update_book_thumbnail(book, expired_thumbnail)
else:
self.create_book_thumbnail(book, resolution)
self.progress = (1.0 / count) * i self.progress = (1.0 / count) * i
self._handleSuccess() self._handleSuccess()
self.app_db_session.close() self.app_db_session.remove()
def get_expired_thumbnails(self):
return self.app_db_session\
.query(ub.Thumbnail)\
.filter(ub.Thumbnail.expiration < datetime.utcnow())\
.all()
def get_thumbnail_book_ids(self): def get_thumbnail_book_ids(self):
return self.app_db_session\ return self.app_db_session\
.query(ub.Thumbnail)\ .query(ub.Thumbnail.book_id)\
.group_by(ub.Thumbnail.book_id)\ .group_by(ub.Thumbnail.book_id)\
.having(func.min(ub.Thumbnail.expiration) > datetime.utcnow())\ .having(func.min(ub.Thumbnail.expiration) > datetime.utcnow())\
.all() .distinct()
def get_books_without_thumbnails(self, thumbnail_book_ids): def get_books_without_thumbnails(self, thumbnail_book_ids):
return self.worker_db.session\ return self.calibre_db.session\
.query(db.Books)\ .query(db.Books)\
.filter(db.Books.has_cover == 1)\ .filter(db.Books.has_cover == 1)\
.filter(db.Books.id.notin_(thumbnail_book_ids))\ .filter(db.Books.id.notin_(thumbnail_book_ids))\
.limit(self.limit)\ .limit(self.limit)\
.all() .all()
def get_thumbnails_for_book(self, thumbnails, book): def get_expired_thumbnail_for_book_and_resolution(self, book, resolution, expired_thumbnails):
results = list() for thumbnail in expired_thumbnails:
for thumbnail in thumbnails: if thumbnail.book_id == book.id and thumbnail.resolution == resolution:
if thumbnail.book_id == book.id: return thumbnail
results.append(thumbnail)
return results return None
def update_book_thumbnail(self, book, thumbnail): def update_book_thumbnail(self, book, thumbnail):
thumbnail.generated_at = datetime.utcnow()
thumbnail.expiration = datetime.utcnow() + timedelta(days=30) thumbnail.expiration = datetime.utcnow() + timedelta(days=30)
try: try:
self.app_db_session.commit() self.app_db_session.commit()
self.generate_book_thumbnail(book, thumbnail) self.generate_book_thumbnail(book, thumbnail)
except Exception as ex: except Exception as ex:
self.log.info(u'Error updating book thumbnail: ' + str(ex))
self._handleError(u'Error updating book thumbnail: ' + str(ex)) self._handleError(u'Error updating book thumbnail: ' + str(ex))
self.app_db_session.rollback() self.app_db_session.rollback()
def create_book_thumbnail(self, book, resolution): def create_book_thumbnail(self, book, resolution):
thumbnail = ub.Thumbnail() thumbnail = ub.Thumbnail()
thumbnail.book_id = book.id thumbnail.book_id = book.id
thumbnail.format = 'jpeg'
thumbnail.resolution = resolution thumbnail.resolution = resolution
self.app_db_session.add(thumbnail) self.app_db_session.add(thumbnail)
@ -107,6 +128,7 @@ class TaskThumbnail(CalibreTask):
self.app_db_session.commit() self.app_db_session.commit()
self.generate_book_thumbnail(book, thumbnail) self.generate_book_thumbnail(book, thumbnail)
except Exception as ex: except Exception as ex:
self.log.info(u'Error creating book thumbnail: ' + str(ex))
self._handleError(u'Error creating book thumbnail: ' + str(ex)) self._handleError(u'Error creating book thumbnail: ' + str(ex))
self.app_db_session.rollback() self.app_db_session.rollback()
@ -128,9 +150,12 @@ class TaskThumbnail(CalibreTask):
if img.height > height: if img.height > height:
width = self.get_thumbnail_width(height, img) width = self.get_thumbnail_width(height, img)
img.resize(width=width, height=height, filter='lanczos') img.resize(width=width, height=height, filter='lanczos')
img.save(filename=self.get_thumbnail_cache_path(thumbnail)) img.format = thumbnail.format
filename = self.cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS)
img.save(filename=filename)
except Exception as ex: except Exception as ex:
# Bubble exception to calling function # Bubble exception to calling function
self.log.info(u'Error generating thumbnail file: ' + str(ex))
raise ex raise ex
finally: finally:
stream.close() stream.close()
@ -144,7 +169,9 @@ class TaskThumbnail(CalibreTask):
if img.height > height: if img.height > height:
width = self.get_thumbnail_width(height, img) width = self.get_thumbnail_width(height, img)
img.resize(width=width, height=height, filter='lanczos') img.resize(width=width, height=height, filter='lanczos')
img.save(filename=self.get_thumbnail_cache_path(thumbnail)) img.format = thumbnail.format
filename = self.cache.get_cache_file_path(thumbnail.filename, fs.CACHE_TYPE_THUMBNAILS)
img.save(filename=filename)
def get_thumbnail_height(self, thumbnail): def get_thumbnail_height(self, thumbnail):
return int(225 * thumbnail.resolution) return int(225 * thumbnail.resolution)
@ -153,20 +180,88 @@ class TaskThumbnail(CalibreTask):
percent = (height / float(img.height)) percent = (height / float(img.height))
return int((float(img.width) * float(percent))) return int((float(img.width) * float(percent)))
def get_thumbnail_cache_dir(self): @property
if not os.path.isdir(_CACHE_DIR): def name(self):
os.makedirs(_CACHE_DIR) return "GenerateCoverThumbnails"
if not os.path.isdir(os.path.join(_CACHE_DIR, 'thumbnails')):
os.makedirs(os.path.join(_CACHE_DIR, 'thumbnails'))
return os.path.join(_CACHE_DIR, 'thumbnails') class TaskCleanupCoverThumbnailCache(CalibreTask):
def __init__(self, task_message=u'Validating cover thumbnail cache'):
super(TaskCleanupCoverThumbnailCache, self).__init__(task_message)
self.log = logger.create()
self.app_db_session = ub.get_new_session_instance()
self.calibre_db = db.CalibreDB(expire_on_commit=False)
self.cache = fs.FileSystem()
def get_thumbnail_cache_path(self, thumbnail): def run(self, worker_thread):
if thumbnail: cached_thumbnail_files = self.cache.list_cache_files(fs.CACHE_TYPE_THUMBNAILS)
return os.path.join(self.get_thumbnail_cache_dir(), thumbnail.filename)
return None # Expire thumbnails in the database if the cached file is missing
# This case will happen if a user deletes the cache dir or cached files
if self.app_db_session:
self.expire_missing_thumbnails(cached_thumbnail_files)
self.progress = 0.33
# Delete thumbnails in the database if the book has been removed
# This case will happen if a book is removed in Calibre and the metadata.db file is updated in the filesystem
if self.app_db_session and self.calibre_db:
book_ids = self.get_book_ids()
self.delete_thumbnails_for_missing_books(book_ids)
self.progress = 0.66
# Delete extraneous cached thumbnail files
# This case will happen if a book was deleted and the thumbnail OR the metadata.db file was changed externally
if self.app_db_session:
db_thumbnail_files = self.get_thumbnail_filenames()
self.delete_extraneous_thumbnail_files(cached_thumbnail_files, db_thumbnail_files)
self._handleSuccess()
self.app_db_session.remove()
def expire_missing_thumbnails(self, filenames):
try:
self.app_db_session\
.query(ub.Thumbnail)\
.filter(ub.Thumbnail.filename.notin_(filenames))\
.update({"expiration": datetime.utcnow()}, synchronize_session=False)
self.app_db_session.commit()
except Exception as ex:
self.log.info(u'Error expiring thumbnails for missing cache files: ' + str(ex))
self._handleError(u'Error expiring thumbnails for missing cache files: ' + str(ex))
self.app_db_session.rollback()
def get_book_ids(self):
results = self.calibre_db.session\
.query(db.Books.id)\
.filter(db.Books.has_cover == 1)\
.distinct()
return [value for value, in results]
def delete_thumbnails_for_missing_books(self, book_ids):
try:
self.app_db_session\
.query(ub.Thumbnail)\
.filter(ub.Thumbnail.book_id.notin_(book_ids))\
.delete(synchronize_session=False)
self.app_db_session.commit()
except Exception as ex:
self.log.info(str(ex))
self._handleError(u'Error deleting thumbnails for missing books: ' + str(ex))
self.app_db_session.rollback()
def get_thumbnail_filenames(self):
results = self.app_db_session\
.query(ub.Thumbnail.filename)\
.all()
return [thumbnail for thumbnail, in results]
def delete_extraneous_thumbnail_files(self, cached_thumbnail_files, db_thumbnail_files):
extraneous_files = list(set(cached_thumbnail_files).difference(db_thumbnail_files))
for file in extraneous_files:
self.cache.delete_cache_file(file, fs.CACHE_TYPE_THUMBNAILS)
@property @property
def name(self): def name(self):
return "Thumbnail" return "CleanupCoverThumbnailCache"

View File

@ -1,54 +0,0 @@
# -*- coding: utf-8 -*-
# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web)
# Copyright (C) 2020 mmonkey
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division, print_function, unicode_literals
import os
from . import logger, ub
from .constants import CACHE_DIR as _CACHE_DIR
from datetime import datetime
THUMBNAIL_RESOLUTION_1X = 1
THUMBNAIL_RESOLUTION_2X = 2
log = logger.create()
def get_thumbnail_cache_dir():
if not os.path.isdir(_CACHE_DIR):
os.makedirs(_CACHE_DIR)
if not os.path.isdir(os.path.join(_CACHE_DIR, 'thumbnails')):
os.makedirs(os.path.join(_CACHE_DIR, 'thumbnails'))
return os.path.join(_CACHE_DIR, 'thumbnails')
def get_thumbnail_cache_path(thumbnail):
if thumbnail:
return os.path.join(get_thumbnail_cache_dir(), thumbnail.filename)
return None
def cover_thumbnail_exists_for_book(book):
if book and book.has_cover:
thumbnail = ub.session.query(ub.Thumbnail).filter(ub.Thumbnail.book_id == book.id).first()
if thumbnail and thumbnail.expiration > datetime.utcnow():
thumbnail_path = get_thumbnail_cache_path(thumbnail)
return thumbnail_path and os.path.isfile(thumbnail_path)
return False

View File

@ -18,6 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import division, print_function, unicode_literals from __future__ import division, print_function, unicode_literals
import atexit
import os import os
import sys import sys
import datetime import datetime
@ -42,12 +43,11 @@ from sqlalchemy import create_engine, exc, exists, event
from sqlalchemy import Column, ForeignKey from sqlalchemy import Column, ForeignKey
from sqlalchemy import String, Integer, SmallInteger, Boolean, DateTime, Float, JSON from sqlalchemy import String, Integer, SmallInteger, Boolean, DateTime, Float, JSON
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm.attributes import flag_modified from sqlalchemy.orm.attributes import flag_modified
from sqlalchemy.orm import backref, relationship, sessionmaker, Session, scoped_session from sqlalchemy.orm import backref, relationship, sessionmaker, Session, scoped_session
from werkzeug.security import generate_password_hash from werkzeug.security import generate_password_hash
from . import cli, constants from . import cli, constants, logger
session = None session = None
@ -435,6 +435,14 @@ class RemoteAuthToken(Base):
return '<Token %r>' % self.id return '<Token %r>' % self.id
def filename(context):
file_format = context.get_current_parameters()['format']
if file_format == 'jpeg':
return context.get_current_parameters()['uuid'] + '.jpg'
else:
return context.get_current_parameters()['uuid'] + '.' + file_format
class Thumbnail(Base): class Thumbnail(Base):
__tablename__ = 'thumbnail' __tablename__ = 'thumbnail'
@ -443,19 +451,10 @@ class Thumbnail(Base):
uuid = Column(String, default=lambda: str(uuid.uuid4()), unique=True) uuid = Column(String, default=lambda: str(uuid.uuid4()), unique=True)
format = Column(String, default='jpeg') format = Column(String, default='jpeg')
resolution = Column(SmallInteger, default=1) resolution = Column(SmallInteger, default=1)
filename = Column(String, default=filename)
generated_at = Column(DateTime, default=lambda: datetime.datetime.utcnow())
expiration = Column(DateTime, default=lambda: datetime.datetime.utcnow() + datetime.timedelta(days=30)) expiration = Column(DateTime, default=lambda: datetime.datetime.utcnow() + datetime.timedelta(days=30))
@hybrid_property
def extension(self):
if self.format == 'jpeg':
return 'jpg'
else:
return self.format
@hybrid_property
def filename(self):
return self.uuid + '.' + self.extension
# Migrate database to current version, has to be updated after every database change. Currently migration from # Migrate database to current version, has to be updated after every database change. Currently migration from
# everywhere to current should work. Migration is done by checking if relevant columns are existing, and than adding # everywhere to current should work. Migration is done by checking if relevant columns are existing, and than adding
@ -705,6 +704,9 @@ def get_new_session_instance():
new_engine = create_engine(u'sqlite:///{0}'.format(cli.settingspath), echo=False) new_engine = create_engine(u'sqlite:///{0}'.format(cli.settingspath), echo=False)
new_session = scoped_session(sessionmaker()) new_session = scoped_session(sessionmaker())
new_session.configure(bind=new_engine) new_session.configure(bind=new_engine)
atexit.register(lambda: new_session.remove() if new_session else True)
return new_session return new_session