/** ********************************************************************** * Tranquility Reader - A Firefox Webextension that cleans up * cluttered web pages ********************************************************************** Copyright (c) 2012-2022 Arun Kunchithapatham This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Contributors: Arun Kunchithapatham - Initial Contribution *********************************************************************** * */ /* * Process the messages appropriately * */ 'use strict'; var browser = browser || chrome; var currentURL = null; var dfsIndex = 1; var osVersion = browser.runtime.PlatformOs; var zoomValue = 1.0; var currentFontSize = "20px"; const DEBUG = true; var print = DEBUG ? alert : console.log function tranquilize(request, sender, sendResponse) { if (request.tranquility_action === 'Run') { print("Called to run Tranquility at: " + new Date()); RunOnLoad(); return Promise.resolve({response: "Completed Running Tranquility"}); } else if (request.tranquility_action === 'RunAndSave') { print("Called to run and Save Tranquility at: " + new Date()); RunAndSaveOnLoad(); return Promise.resolve({response: "Completed Saving Content Offline"}); } else if (request.tranquility_action == 'RunOnSelection') { print("Called to run Tranquility at: " + new Date()); RunOnSelection(); return Promise.resolve({response: "Completed Running Tranquility on Selection"}); } else if (request.tranquility_action === 'PopulateOfflineList') { print("Receive message to display offline files list"); displayOfflinePages(request.offline_data); } else if (request.tranquility_action === 'DisplayOfflineDocument') { print("Received offline document from database"); displayDocFromDB(request.cached_doc, request.url); } else if (request.tranquility_action === 'DeleteOfflineDocumentLink') { delDocFromDB(request.url); } else if (request.tranquility_action === 'CreateExportLink') { print("Received message to export offline links"); displayExportLink(request.offline_data); } else if (request.tranquility_action === 'ImportOfflinePages') { displayImportPage(); return Promise.resolve({response: "Created Page for Import Prompt"}); } else if (request.tranquility_action === 'AddAnnotation') { addAnnotation(); } else if (request.tranquility_action === 'UpdateTranquilityPreferences') { if (document.getElementsByClassName("tranquility_container").length > 0) { applyAllTranquilityPreferences(); return Promise.resolve({response: "Updated Tranquility Preferences"}); } else { return Promise.resolve({response: "Tab does not contain Tranquility Reader elements"}); } } else if (request.tranquility_action == 'Status') { return Promise.resolve({response: "Tranquility Has Already Run"}); } else if (request.tranquility_action == 'UpdateZoomValue') { updateZoomValue(request.zoomValue); return Promise.resolve({response: "Updated Zoom Value"}); } else if (request.tranquility_action == 'ExecutePostPDFPrintActions') { // Reset page to default column-num value post printing // Then reapply the image display preferences applyNumColumnsPreferences('defaultMode'); applyImageDisplayPreferences(); return Promise.resolve({response: "Executed Post PDF Print Actions"}); } else if (request.tranquility_action == 'None') { return Promise.resolve({response: "Receive Do Nothing Message"}); } else { print("Message not implemented: " + request.tranquility_action); } } function RunOnLoad() { requestZoomValue(); currentURL = location.toString(); // If we have already run tranquility, then just toggle back to the original webpage (un-tranquilize the page) if (document.body.getElementsByClassName("tranquility").length > 0) { // If this is an offline link, we need to get the data-active-link of the tranquility_offline_links_btn print("Document already in tranquility mode. Reverting to original page..."); let btn = document.getElementById('tranquility_offline_links_btn'); let url = null; if(btn.getAttribute('data-active-link')) { print("Found data active link..."); url = btn.getAttribute('data-active-link'); } else { url = currentURL; } print("url: " + url); // Handle corner case when the url has a "#" tag // this can prevent the window.location.assign from working! // window.location.assign(url.split("#")[0]); } // If tranquility has not been run, then "tranquilize" the document else { // Stop loading the document if it has not completed loading if(document.readyState == "loading") { window.stop(); // Show a progress-bar to indicate activity and then process the request // bar will automatically disappear since the document will be replaced let pbar = getProgressBar(document); pbar.style.visibility = 'visible'; processXMLHTTPRequest(currentURL, false); } else { // Show a progress-bar to indicate activity and then process the request // bar will automatically disappear since the document will be replaced let pbar = getProgressBar(document); pbar.style.visibility = 'visible'; processContentDoc(document, currentURL, false); } } } function RunOnSelection() { currentURL = location.toString(); // Typically used when the page has at least partially loaded and user has selected some text // However this should work even if we are running on an already processed page; maybe the user wants to // prune the tranquilized content further and read just a portion of the article // Stop loading the document if it has not completed loading if(document.readyState == "loading") { window.stop(); } // Obtain a DocumentFragment of the selected portion of the webpage let selection = document.getSelection(); let range = selection.getRangeAt(0); let frag = range.cloneContents(); // Show a progress-bar to indicate activity and then process the request // bar will automatically disappear since the document will be replaced let pbar = getProgressBar(document); pbar.style.visibility = 'visible'; // Clone the current page and replace entire body with the DocumentFragment let contentDoc = document.cloneNode(true); let docBody = contentDoc.body; while (docBody.firstChild) { docBody.removeChild(docBody.firstChild); } docBody.appendChild(frag); // Now run tranquility to process the DocumentFragment processContentDoc(contentDoc, currentURL, false); } function RunAndSaveOnLoad() { currentURL = location.toString(); // If we have already run tranquility, then just save content offline and exit if (document.readyState == "complete" && document.body.getElementsByClassName("tranquility").length > 0) { saveContentOffline(currentURL, document.cloneNode(true)); return; } // If tranquility has not been run, then "tranquilize" the document and then save the content offline if(document.readyState == "loading") { window.stop(); } // Show a progress-bar to indicate activity and then process the request // bar will automatically disappear since the document will be replaced let pbar = getProgressBar(document); pbar.style.visibility = 'visible'; processXMLHTTPRequest(currentURL, true); } function processXMLHTTPRequest(url, saveOffline) { // Handle corner case to avoid mixed content security warnings/errors let getURL = url; if (getURL.substr(0,5) == 'https') { print(getURL); getURL = getURL.replace(/^http\:/, 'https:'); print(getURL); } let oXHR = new XMLHttpRequest(); oXHR.onreadystatechange = function() { print(oXHR.readyState + ", " + oXHR.status); if(oXHR.readyState === 4) { if(oXHR.status === 200) { let oXHRDoc = oXHR.responseText; processResponse(oXHRDoc, url, saveOffline); } else { // Print error message to console and remove progress bar if any // print("Response status: " + oXHR.status); print("Unable to process document"); let pbar = document.getElementById("tranquility_progress_bar"); if (pbar) { pbar.style.backgroundColor = '#FF0000'; setTimeout(function() { pbar.parentNode.removeChild(pbar); }, 3000); } } } }; print(getURL); oXHR.open("GET", getURL, true); // Fix to handle pages that use iso-8859-1/windows-1252 encoding // if (document.characterSet.toLowerCase() == "windows-1252") { oXHR.overrideMimeType('text/html; charset=iso-8859-1'); } oXHR.send(null); } function processResponse (oXHRDoc, thisURL, saveOffline) { print("Processing Response..."); let parser = new DOMParser(); let contentDoc = parser.parseFromString(oXHRDoc, "text/html"); processContentDoc(contentDoc, thisURL, saveOffline); } function processContentDoc(contentDoc, thisURL, saveOffline) { // First move to the top of the document; for some reason // window.scroll(0, 0) at the end of processing does not seem to // always work // document.documentElement.scrollTop = 0; // Remove all event handlers by "deep" cloning the document // instead of cloning each element (saves some time and // the code is cleaner); now cloning the entire document // instead of just cloning only the body // let clonedDoc = contentDoc.cloneNode(true); document.replaceChild(clonedDoc.documentElement, document.documentElement); contentDoc = document; // Remove all script tags // let scriptTags = ["SCRIPT", "NOSCRIPT"]; for (let i = 0; i < scriptTags.length; i++) { removeTag(contentDoc, scriptTags[i]); } // Now replace document.documentElement; It looks like we need this step for // the window.getComputedStyle() function to work correctly // we can then copy over the document to the contentDoc variable and continue // as before // document.replaceChild(contentDoc.documentElement, document.documentElement); contentDoc = document; // First get a dfs search to index every single element in the // document let indexMap = {}; indexElements(indexMap, contentDoc.body); // Backup any title/heading related tags to restore in case they are removed // by the deletion logic // let hElemsMap = {}; cloneHElems(hElemsMap, contentDoc); // Collect any supporting links before processing the webpage let supporting_links = getSupportingLinks(contentDoc); print("Got supporting links..."); // Remove some elements that are typically like hidden elements // but can add to the text size of a document; remove them so that // their effect on later logic (textContent.length value) is minimized // let likeHidden = ["HEADER", "FOOTER", "NAV", "SVG", "PATH", "LINK", "STYLE"]; for (let i = 0; i < likeHidden.length; i++) { removeTag(contentDoc, likeHidden[i]); } // Remove unnecessary whitespaces and comments removeWhiteSpaceComments(contentDoc); //print("Removed white spaces and comments"); // Cleanup the head and unnecessary tags // Delete All Hidden Elements before doing anything further // These could be hidden images, div, spans, spacers, etc... // Delete any content that has display = 'none' or visibility == 'hidden' // This was originally done only for spacer images, but seems like a meaningful thing // to do for all elements, given that all scripts are also deleted in the Tranquility view // // First get the size of the document before removing hidden content and make a clone // in case we need to revert // let sizeBeforeDelHidden = computeSize(contentDoc.documentElement); let bkpContentDoc = contentDoc.cloneNode(true); deleteHiddenElements(contentDoc, "*"); print("Removed Hidden elements"); let sizeAfterDelHidden = computeSize(contentDoc.documentElement); print(sizeBeforeDelHidden, sizeAfterDelHidden); // If the content after deletion of hidden elements is less than 10% of the // content before deletion of hidden elements and the size after deletion // is less than 200 characters, then it is possible that the // website is hiding content within hidden elements // // Revert to the document state before this step and continue... // if (sizeAfterDelHidden < 200 && sizeAfterDelHidden / sizeBeforeDelHidden < 0.1) { print("Problem removing hidden elements..."); print("Website may be hiding content within hidden elements..."); print("Reverting to backedup document and continuing..."); print("Size Before: ", sizeBeforeDelHidden, "Size After: ", sizeAfterDelHidden); document.replaceChild(bkpContentDoc.documentElement, document.documentElement); contentDoc = document; } print("Size: ", computeSize(contentDoc.documentElement)); // Remove zero sized images; this is just another way of hiding elements // otherwise, these can get cloned and reappear // resized to the reading width, which is very annoying // This has a side effect of removing images that have not yet loaded // The problem will be addressed in a later release // deleteZeroSizeImages(contentDoc); print("Removed Zero Sized Images"); print("Size: ", computeSize(contentDoc.documentElement)); // Ensure that we set a base element before we replace the // web page with the new content; otherwise, relative URL // links will be based on the incorrect URL present in the // window.location // Then call convertLinksAbsolute to convert all relative // links to absolute links so that these links will also // work if we save this document for reading later // let baseElem = createNode(contentDoc, {type: 'BASE', attr: { href: thisURL } }); let heads = contentDoc.getElementsByTagName('HEAD'); for(let i = 0; i < heads.length; i++) { heads[i].appendChild(baseElem.cloneNode(true)); } convertLinksAbsolute(contentDoc, thisURL); print("Processing document..."); // Remove any links that have an onclick event (these are usually for sharing to social media) // removing such links is consistent with our preference to delete all javascript // print("Removing links with associated javascript events..."); let all_links = contentDoc.getElementsByTagName("A"); for (let i = all_links.length - 1; i >= 0; i--) { let onclickVal = all_links[i].getAttribute('onclick'); if (onclickVal != null) { all_links[i].setAttribute('onclick', "void(0);"); } } // If there is a single "MAIN" tag, then replace the entire document content with just the // contents of the main tag. Trust that the content creator has done the correct thing. // If and article tag exists, then... // If there is a single "ARTICLE" tag, then replace the entire document content with just the // contents of the article. Trust that the content creator has done the correct thing // (this is because articles are supposed to be within the main tag) // let mainsOrArticle = false; let mains = contentDoc.getElementsByTagName("main"); let articles = contentDoc.getElementsByTagName("article"); if (mains.length == 1) { let docBody = contentDoc.body; let mainContent = mains[0].cloneNode(true); if (computeSize(mainContent) > 200) { while (docBody.firstChild) { docBody.removeChild(docBody.firstChild); } docBody.appendChild(mainContent); print("Replaced body content with main contents..."); mainsOrArticle = true; } } if (articles.length == 1) { let docBody = contentDoc.body; let mainArticle = articles[0].cloneNode(true); if (computeSize(mainArticle) > 200) { while (docBody.firstChild) { docBody.removeChild(docBody.firstChild); } docBody.appendChild(mainArticle); print("Replaced body content with article contents..."); mainsOrArticle = true; } } print("Processed article/main content..."); print("Size: ", computeSize(contentDoc.documentElement)); // Remove unnecessary whitespaces and comments //removeWhiteSpaceComments(contentDoc); //print("Removed white spaces and comments"); // Cleanup the head and unnecessary tags let delTags = ["STYLE", "LINK", "META", "SCRIPT", "NOSCRIPT", "IFRAME", "SELECT", "DD", "INPUT", "TEXTAREA", "HEADER", "FOOTER", "NAV", "FORM", "BUTTON", "PICTURE", "FIGURE", "SVG"]; for(let i=0; i