175 lines
6.4 KiB
JavaScript
175 lines
6.4 KiB
JavaScript
class EpubParser {
|
|
constructor(filesList) {
|
|
this.files = filesList;
|
|
this.parser = new DOMParser();
|
|
this.opfXml = this.getOPFXml();
|
|
this.encoder = new TextEncoder();
|
|
}
|
|
|
|
|
|
getTotalByteLength() {
|
|
let size = 0;
|
|
for (let key of Object.keys(this.files)) {
|
|
let file = this.files[key];
|
|
if (file.name.endsWith("html")) {
|
|
console.log(file.name + " " + file._data.uncompressedSize)
|
|
size += file._data.uncompressedSize;
|
|
}
|
|
}
|
|
return size;
|
|
}
|
|
|
|
/**
|
|
* gets file from files and returns decompressed content as string
|
|
* @param {string} filename name of the file in filelist
|
|
* @return {string} string representation of decompressed bytes
|
|
*/
|
|
decompress(filename) {
|
|
return pako.inflate(this.files[filename]._data.compressedContent, {raw: true, to: "string"});
|
|
}
|
|
|
|
getOPFXml() {
|
|
let content = this.decompress("META-INF/container.xml");
|
|
let xml = this.parser.parseFromString(content, "text/xml");
|
|
let path = xml.getElementsByTagName("rootfile")[0].getAttribute("full-path");
|
|
this.opfDir = path.split("/").slice(0, -1).join("/");
|
|
return this.parser.parseFromString(this.decompress(path), "text/xml");
|
|
}
|
|
|
|
|
|
getSpine() {
|
|
return Array.from(this.opfXml.getElementsByTagName("spine")[0].children).map(node => node.getAttribute("idref"));
|
|
}
|
|
|
|
/**
|
|
resolves an idref in content.opf to its file
|
|
*/
|
|
resolveIDref(idref) {
|
|
return this.absPath(this.opfXml.getElementById(idref).getAttribute("href"));
|
|
}
|
|
|
|
/**
|
|
* returns absolute path from path relative to content.opf
|
|
* @param path
|
|
*/
|
|
absPath(path) {
|
|
if (this.opfDir) {
|
|
return [this.opfDir, path].join("/");
|
|
} else {
|
|
return path;
|
|
}
|
|
}
|
|
|
|
/**
|
|
returns the sum of the bytesize of all html files that are located before it in the spine
|
|
@param {string} currentFile idref of the current file, also part of the CFI, e.g. here: #epubcfi(/6/2[titlepage]!/4/1:0) it would be "titlepage"
|
|
*/
|
|
getPreviousFilesSize(currentFile) {
|
|
let bytesize = 0;
|
|
for (let file of this.getSpine()) {
|
|
if (file !== currentFile) {
|
|
let filepath = this.resolveIDref(file);
|
|
//ignore non text files
|
|
if (filepath.endsWith("html")) {
|
|
console.log(filepath + " " + bytesize)
|
|
bytesize += this.files[filepath]._data.uncompressedSize;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return bytesize;
|
|
}
|
|
|
|
/**
|
|
* resolves the given cfi to the xml node it points to
|
|
* @param {string} cfi epub-cfi string in the form: epubcfi(/6/16[id13]!/4[id2]/4/2[doc12]/1:0)
|
|
* @return XML Text-Node
|
|
*/
|
|
cfiToXmlNode(cfi) {
|
|
let cfiPath = cfi.split("(")[1].split(")")[0];
|
|
let fileId = cfiPath.split("!")[0].split("[")[1].split("]")[0];
|
|
let xml = this.parser.parseFromString(this.decompress(this.resolveIDref(fileId)), "text/xml");
|
|
let components = cfiPath.split("!")[1].split("/").slice(1);
|
|
let currentNode = xml.getElementsByTagName("html")[0];
|
|
for (const component of components) {
|
|
this.validateChildNodes(currentNode);
|
|
console.log(currentNode);
|
|
console.log(component);
|
|
let index = 0;
|
|
if (component.includes("[")) {
|
|
index = parseInt(component.split("[")[0]) - 1;
|
|
currentNode = currentNode.childNodes[index];
|
|
console.assert(currentNode.getAttribute("id") === component.split("[")[1].split("]")[0], "failed to resolve node");
|
|
} else if (component.includes(":")) {
|
|
index = component.split(":")[0] - 1;
|
|
return currentNode.childNodes[index]; //exit point
|
|
} else {
|
|
index = parseInt(component);
|
|
currentNode = currentNode.childNodes[index - 1];
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* inserts missing text/element nodes to keep them alternating
|
|
* @param {*} parentNode
|
|
*/
|
|
validateChildNodes(parentNode) {
|
|
for (let index = 0; index < parentNode.childNodes.length;) {
|
|
const element = parentNode.childNodes[index];
|
|
if (index % 2 === 0 && element.nodeType === 1) {
|
|
element.parentNode.insertBefore(parentNode.ownerDocument.createTextNode(""), element);
|
|
continue;
|
|
}
|
|
if (index % 2 === 1 && element.nodeType === 3) {
|
|
element.insertBefore(parentNode.ownerDocument.createElement("")); //TODO check
|
|
continue;
|
|
}
|
|
index++;
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
takes the node that the cfi points at and counts the bytes of all nodes before that
|
|
*/
|
|
getCurrentFileProgress(CFI) {
|
|
let size = parseInt(CFI.split(":")[1])//text offset in node
|
|
let startnode = this.cfiToXmlNode(CFI); //returns text node
|
|
let xmlnsLength = startnode.parentNode.namespaceURI.length;
|
|
let prev = startnode.parentNode.previousElementSibling;
|
|
while (prev !== null) {
|
|
console.log("size: "+size)
|
|
console.log(prev.outerHTML)
|
|
console.log(this.encoder.encode(prev.outerHTML).length - xmlnsLength)
|
|
size += this.encoder.encode(prev.outerHTML).length - xmlnsLength;
|
|
prev = prev.previousElementSibling;
|
|
}
|
|
let parent = startnode.parentElement.parentElement;
|
|
while (parent !== null) {
|
|
let parentPrev = parent.previousElementSibling;
|
|
while (parentPrev !== null) {
|
|
console.log(parentPrev.outerHTML)
|
|
console.log(this.encoder.encode(parentPrev.outerHTML).length - xmlnsLength)
|
|
|
|
size += this.encoder.encode(parentPrev.outerHTML).length - xmlnsLength;
|
|
parentPrev = parentPrev.previousElementSibling;
|
|
}
|
|
parent = parent.parentElement;
|
|
}
|
|
return size;
|
|
}
|
|
|
|
getProgress(currentFile, CFI) {
|
|
let percentage = this.getTotalByteLength() / (this.getPreviousFilesSize(currentFile) + this.getCurrentFileProgress(CFI));
|
|
if (percentage === Infinity) {
|
|
return 0;
|
|
} else {
|
|
return percentage;
|
|
}
|
|
}
|
|
}
|
|
|
|
e = new EpubParser(reader.book.archive.zip.files)
|