wrote a class to handle epub parsing, implemented bytesize of all previous files, total bytesize. still needs implementation of bytesize of current file until CFI point.

2023-05-15 14:09:18 +02:00 · 2023-05-15 14:09:18 +02:00 · 10c66b6e63
commit 10c66b6e63
parent c29b1696f7
2 changed files with 81 additions and 0 deletions
--- a/cps/static/js/reading/epub-progress.js
+++ b/cps/static/js/reading/epub-progress.js
@ -0,0 +1,80 @@
+class EpubParser {
+    constructor(filesList) {
+        this.files = filesList;
+        this.parser = new DOMParser();
+        this.opfXml = this.getOPFXml();
+    }
+
+
+    getTextByteLength() {
+        let size = 0;
+        for (let key in y = Object.keys(this.files)) {
+            let file = this.files[y[key]];
+            if (file.name.endsWith("html")) {
+                size += file._data.uncompressedSize;
+            }
+        }
+        return size;
+    }
+
+    /**
+     * @param {string} filename name of the file in filelist
+     * @return {string} string representation of decompressed bytes
+     */
+    decompress(filename) {
+        return pako.inflate(this.files[filename]._data.compressedContent, {raw: true, to: "string"});
+    }
+
+    getOPFXml() {
+        let content = this.decompress("META-INF/container.xml");
+        let xml = this.parser.parseFromString(content, "text/xml");
+        let path = xml.getElementsByTagName("rootfile")[0].getAttribute("full-path");
+        this.opfDir = path.split("/").slice(0, -1).join("/");
+        return this.parser.parseFromString(this.decompress(path), "text/xml");
+    }
+
+
+    getSpine() {
+        return Array.from(this.opfXml.getElementsByTagName("spine")[0].children).map(node => node.getAttribute("idref"));
+    }
+
+    /**
+     resolves an idref in content.opf to its file
+     */
+    resolveIDref(idref) {
+        return this.opfXml.getElementById(idref).getAttribute("href");
+    }
+
+    /**
+     * returns absolute path from path relative to content.opf
+     * @param path
+     */
+    absPath(path) {
+        if (this.opfDir) {
+            return [this.opfDir, path].join("/");
+        } else {
+            return path;
+        }
+    }
+
+    /**
+     returns the sum of the bytesize of all html files that are located before it in the spine
+     @param {string} currentFile idref of the current file, also part of the CFI, e.g. here: #epubcfi(/6/2[titlepage]!/4/1:0) it would be "titlepage"
+     */
+    getPreviousFilesSize(currentFile) {
+        let bytesize = 0;
+        for (let file of this.getSpine()) {
+            if (file !== currentFile) {
+                let filepath = this.absPath(this.resolveIDref(currentFile));
+                //ignore non text files
+                if (filepath.endsWith("html")) {
+                    bytesize += this.files[filepath]._data.uncompressedSize;
+                }
+            } else {
+                break
+            }
+        }
+        return bytesize;
+    }
+
+}
--- a/cps/templates/read.html
+++ b/cps/templates/read.html
@ -153,6 +153,7 @@
 <script src="{{ url_for('static', filename='js/libs/screenfull.min.js') }}"></script>
 <script src="{{ url_for('static', filename='js/libs/reader.min.js') }}"></script>
 <script src="{{ url_for('static', filename='js/reading/epub.js') }}"></script>
+<script src="{{ url_for('static', filename='js/libs/pako.min.js') }}"</script>
 <script>
    function getTextByteLength() {
        let size = 0;