rosenrot/plugins/readability/tranquilize.js

1259 lines
45 KiB
JavaScript
Raw Normal View History

/**
**********************************************************************
* Tranquility Reader - A Firefox Webextension that cleans up
* cluttered web pages
**********************************************************************
Copyright (c) 2012-2022 Arun Kunchithapatham
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Contributors:
Arun Kunchithapatham - Initial Contribution
***********************************************************************
*
*/
/*
* Process the messages appropriately
*
*/
'use strict';
var browser = browser || chrome;
var currentURL = null;
var dfsIndex = 1;
var osVersion = browser.runtime.PlatformOs;
var zoomValue = 1.0;
var currentFontSize = "20px";
const DEBUG = true;
var print = DEBUG ? alert : console.log
function tranquilize(request, sender, sendResponse) {
if (request.tranquility_action === 'Run') {
print("Called to run Tranquility at: " + new Date());
RunOnLoad();
return Promise.resolve({response: "Completed Running Tranquility"});
}
else if (request.tranquility_action === 'RunAndSave') {
print("Called to run and Save Tranquility at: " + new Date());
RunAndSaveOnLoad();
return Promise.resolve({response: "Completed Saving Content Offline"});
}
else if (request.tranquility_action == 'RunOnSelection') {
print("Called to run Tranquility at: " + new Date());
RunOnSelection();
return Promise.resolve({response: "Completed Running Tranquility on Selection"});
}
else if (request.tranquility_action === 'PopulateOfflineList') {
print("Receive message to display offline files list");
displayOfflinePages(request.offline_data);
}
else if (request.tranquility_action === 'DisplayOfflineDocument') {
print("Received offline document from database");
displayDocFromDB(request.cached_doc, request.url);
}
else if (request.tranquility_action === 'DeleteOfflineDocumentLink') {
delDocFromDB(request.url);
}
else if (request.tranquility_action === 'CreateExportLink') {
print("Received message to export offline links");
displayExportLink(request.offline_data);
}
else if (request.tranquility_action === 'ImportOfflinePages') {
displayImportPage();
return Promise.resolve({response: "Created Page for Import Prompt"});
}
else if (request.tranquility_action === 'AddAnnotation') {
addAnnotation();
}
else if (request.tranquility_action === 'UpdateTranquilityPreferences') {
if (document.getElementsByClassName("tranquility_container").length > 0) {
applyAllTranquilityPreferences();
return Promise.resolve({response: "Updated Tranquility Preferences"});
}
else {
return Promise.resolve({response: "Tab does not contain Tranquility Reader elements"});
}
}
else if (request.tranquility_action == 'Status') {
return Promise.resolve({response: "Tranquility Has Already Run"});
}
else if (request.tranquility_action == 'UpdateZoomValue') {
updateZoomValue(request.zoomValue);
return Promise.resolve({response: "Updated Zoom Value"});
}
else if (request.tranquility_action == 'ExecutePostPDFPrintActions') {
// Reset page to default column-num value post printing
// Then reapply the image display preferences
applyNumColumnsPreferences('defaultMode');
applyImageDisplayPreferences();
return Promise.resolve({response: "Executed Post PDF Print Actions"});
}
else if (request.tranquility_action == 'None') {
return Promise.resolve({response: "Receive Do Nothing Message"});
}
else {
print("Message not implemented: " + request.tranquility_action);
}
}
function RunOnLoad() {
requestZoomValue();
currentURL = location.toString();
// If we have already run tranquility, then just toggle back to the original webpage (un-tranquilize the page)
if (document.body.getElementsByClassName("tranquility").length > 0) {
// If this is an offline link, we need to get the data-active-link of the tranquility_offline_links_btn
print("Document already in tranquility mode. Reverting to original page...");
let btn = document.getElementById('tranquility_offline_links_btn');
let url = null;
if(btn.getAttribute('data-active-link')) {
print("Found data active link...");
url = btn.getAttribute('data-active-link');
}
else {
url = currentURL;
}
print("url: " + url);
// Handle corner case when the url has a "#" tag
// this can prevent the window.location.assign from working!
//
window.location.assign(url.split("#")[0]);
}
// If tranquility has not been run, then "tranquilize" the document
else {
// Stop loading the document if it has not completed loading
if(document.readyState == "loading") {
window.stop();
// Show a progress-bar to indicate activity and then process the request
// bar will automatically disappear since the document will be replaced
let pbar = getProgressBar(document);
pbar.style.visibility = 'visible';
processXMLHTTPRequest(currentURL, false);
}
else {
// Show a progress-bar to indicate activity and then process the request
// bar will automatically disappear since the document will be replaced
let pbar = getProgressBar(document);
pbar.style.visibility = 'visible';
processContentDoc(document, currentURL, false);
}
}
}
function RunOnSelection() {
currentURL = location.toString();
// Typically used when the page has at least partially loaded and user has selected some text
// However this should work even if we are running on an already processed page; maybe the user wants to
// prune the tranquilized content further and read just a portion of the article
// Stop loading the document if it has not completed loading
if(document.readyState == "loading") {
window.stop();
}
// Obtain a DocumentFragment of the selected portion of the webpage
let selection = document.getSelection();
let range = selection.getRangeAt(0);
let frag = range.cloneContents();
// Show a progress-bar to indicate activity and then process the request
// bar will automatically disappear since the document will be replaced
let pbar = getProgressBar(document);
pbar.style.visibility = 'visible';
// Clone the current page and replace entire body with the DocumentFragment
let contentDoc = document.cloneNode(true);
let docBody = contentDoc.body;
while (docBody.firstChild) {
docBody.removeChild(docBody.firstChild);
}
docBody.appendChild(frag);
// Now run tranquility to process the DocumentFragment
processContentDoc(contentDoc, currentURL, false);
}
function RunAndSaveOnLoad() {
currentURL = location.toString();
// If we have already run tranquility, then just save content offline and exit
if (document.readyState == "complete" && document.body.getElementsByClassName("tranquility").length > 0) {
saveContentOffline(currentURL, document.cloneNode(true));
return;
}
// If tranquility has not been run, then "tranquilize" the document and then save the content offline
if(document.readyState == "loading") {
window.stop();
}
// Show a progress-bar to indicate activity and then process the request
// bar will automatically disappear since the document will be replaced
let pbar = getProgressBar(document);
pbar.style.visibility = 'visible';
processXMLHTTPRequest(currentURL, true);
}
function processXMLHTTPRequest(url, saveOffline) {
// Handle corner case to avoid mixed content security warnings/errors
let getURL = url;
if (getURL.substr(0,5) == 'https') {
print(getURL);
getURL = getURL.replace(/^http\:/, 'https:');
print(getURL);
}
let oXHR = new XMLHttpRequest();
oXHR.onreadystatechange = function() {
print(oXHR.readyState + ", " + oXHR.status);
if(oXHR.readyState === 4) {
if(oXHR.status === 200) {
let oXHRDoc = oXHR.responseText;
processResponse(oXHRDoc, url, saveOffline);
}
else {
// Print error message to console and remove progress bar if any
//
print("Response status: " + oXHR.status);
print("Unable to process document");
let pbar = document.getElementById("tranquility_progress_bar");
if (pbar) {
pbar.style.backgroundColor = '#FF0000';
setTimeout(function() {
pbar.parentNode.removeChild(pbar);
}, 3000);
}
}
}
};
print(getURL);
oXHR.open("GET", getURL, true);
// Fix to handle pages that use iso-8859-1/windows-1252 encoding
//
if (document.characterSet.toLowerCase() == "windows-1252") {
oXHR.overrideMimeType('text/html; charset=iso-8859-1');
}
oXHR.send(null);
}
function processResponse (oXHRDoc, thisURL, saveOffline) {
print("Processing Response...");
let parser = new DOMParser();
let contentDoc = parser.parseFromString(oXHRDoc, "text/html");
processContentDoc(contentDoc, thisURL, saveOffline);
}
function processContentDoc(contentDoc, thisURL, saveOffline) {
// First move to the top of the document; for some reason
// window.scroll(0, 0) at the end of processing does not seem to
// always work
//
document.documentElement.scrollTop = 0;
// Remove all event handlers by "deep" cloning the document
// instead of cloning each element (saves some time and
// the code is cleaner); now cloning the entire document
// instead of just cloning only the body
//
let clonedDoc = contentDoc.cloneNode(true);
document.replaceChild(clonedDoc.documentElement, document.documentElement);
contentDoc = document;
// Remove all script tags
//
let scriptTags = ["SCRIPT", "NOSCRIPT"];
for (let i = 0; i < scriptTags.length; i++) {
removeTag(contentDoc, scriptTags[i]);
}
// Now replace document.documentElement; It looks like we need this step for
// the window.getComputedStyle() function to work correctly
// we can then copy over the document to the contentDoc variable and continue
// as before
//
document.replaceChild(contentDoc.documentElement, document.documentElement);
contentDoc = document;
// First get a dfs search to index every single element in the
// document
let indexMap = {};
indexElements(indexMap, contentDoc.body);
// Backup any title/heading related tags to restore in case they are removed
// by the deletion logic
//
let hElemsMap = {};
cloneHElems(hElemsMap, contentDoc);
// Collect any supporting links before processing the webpage
let supporting_links = getSupportingLinks(contentDoc);
print("Got supporting links...");
// Remove some elements that are typically like hidden elements
// but can add to the text size of a document; remove them so that
// their effect on later logic (textContent.length value) is minimized
//
let likeHidden = ["HEADER", "FOOTER", "NAV", "SVG", "PATH", "LINK", "STYLE"];
for (let i = 0; i < likeHidden.length; i++) {
removeTag(contentDoc, likeHidden[i]);
}
// Remove unnecessary whitespaces and comments
removeWhiteSpaceComments(contentDoc);
//print("Removed white spaces and comments");
// Cleanup the head and unnecessary tags
// Delete All Hidden Elements before doing anything further
// These could be hidden images, div, spans, spacers, etc...
// Delete any content that has display = 'none' or visibility == 'hidden'
// This was originally done only for spacer images, but seems like a meaningful thing
// to do for all elements, given that all scripts are also deleted in the Tranquility view
//
// First get the size of the document before removing hidden content and make a clone
// in case we need to revert
//
let sizeBeforeDelHidden = computeSize(contentDoc.documentElement);
let bkpContentDoc = contentDoc.cloneNode(true);
deleteHiddenElements(contentDoc, "*");
print("Removed Hidden elements");
let sizeAfterDelHidden = computeSize(contentDoc.documentElement);
print(sizeBeforeDelHidden, sizeAfterDelHidden);
// If the content after deletion of hidden elements is less than 10% of the
// content before deletion of hidden elements and the size after deletion
// is less than 200 characters, then it is possible that the
// website is hiding content within hidden elements
//
// Revert to the document state before this step and continue...
//
if (sizeAfterDelHidden < 200 && sizeAfterDelHidden / sizeBeforeDelHidden < 0.1) {
print("Problem removing hidden elements...");
print("Website may be hiding content within hidden elements...");
print("Reverting to backedup document and continuing...");
print("Size Before: ", sizeBeforeDelHidden, "Size After: ", sizeAfterDelHidden);
document.replaceChild(bkpContentDoc.documentElement, document.documentElement);
contentDoc = document;
}
print("Size: ", computeSize(contentDoc.documentElement));
// Remove zero sized images; this is just another way of hiding elements
// otherwise, these can get cloned and reappear
// resized to the reading width, which is very annoying
// This has a side effect of removing images that have not yet loaded
// The problem will be addressed in a later release
//
deleteZeroSizeImages(contentDoc);
print("Removed Zero Sized Images");
print("Size: ", computeSize(contentDoc.documentElement));
// Ensure that we set a base element before we replace the
// web page with the new content; otherwise, relative URL
// links will be based on the incorrect URL present in the
// window.location
// Then call convertLinksAbsolute to convert all relative
// links to absolute links so that these links will also
// work if we save this document for reading later
//
let baseElem = createNode(contentDoc, {type: 'BASE', attr: { href: thisURL } });
let heads = contentDoc.getElementsByTagName('HEAD');
for(let i = 0; i < heads.length; i++) {
heads[i].appendChild(baseElem.cloneNode(true));
}
convertLinksAbsolute(contentDoc, thisURL);
print("Processing document...");
// Remove any links that have an onclick event (these are usually for sharing to social media)
// removing such links is consistent with our preference to delete all javascript
//
print("Removing links with associated javascript events...");
let all_links = contentDoc.getElementsByTagName("A");
for (let i = all_links.length - 1; i >= 0; i--) {
let onclickVal = all_links[i].getAttribute('onclick');
if (onclickVal != null) {
all_links[i].setAttribute('onclick', "void(0);");
}
}
// If there is a single "MAIN" tag, then replace the entire document content with just the
// contents of the main tag. Trust that the content creator has done the correct thing.
// If and article tag exists, then...
// If there is a single "ARTICLE" tag, then replace the entire document content with just the
// contents of the article. Trust that the content creator has done the correct thing
// (this is because articles are supposed to be within the main tag)
//
let mainsOrArticle = false;
let mains = contentDoc.getElementsByTagName("main");
let articles = contentDoc.getElementsByTagName("article");
if (mains.length == 1) {
let docBody = contentDoc.body;
let mainContent = mains[0].cloneNode(true);
if (computeSize(mainContent) > 200) {
while (docBody.firstChild) {
docBody.removeChild(docBody.firstChild);
}
docBody.appendChild(mainContent);
print("Replaced body content with main contents...");
mainsOrArticle = true;
}
}
if (articles.length == 1) {
let docBody = contentDoc.body;
let mainArticle = articles[0].cloneNode(true);
if (computeSize(mainArticle) > 200) {
while (docBody.firstChild) {
docBody.removeChild(docBody.firstChild);
}
docBody.appendChild(mainArticle);
print("Replaced body content with article contents...");
mainsOrArticle = true;
}
}
print("Processed article/main content...");
print("Size: ", computeSize(contentDoc.documentElement));
// Remove unnecessary whitespaces and comments
//removeWhiteSpaceComments(contentDoc);
//print("Removed white spaces and comments");
// Cleanup the head and unnecessary tags
let delTags = ["STYLE", "LINK", "META", "SCRIPT", "NOSCRIPT", "IFRAME",
"SELECT", "DD", "INPUT", "TEXTAREA", "HEADER", "FOOTER",
"NAV", "FORM", "BUTTON", "PICTURE", "FIGURE", "SVG"];
for(let i=0; i<delTags.length; i++) {
let delTagExceptions = ["PICTURE", "FIGURE", "SVG"];
if (mainsOrArticle) {
if (!delTagExceptions.includes(delTags[i])) {
removeTag(contentDoc, delTags[i]);
}
}
else {
removeTag(contentDoc, delTags[i]);
}
print("Size: ", computeSize(contentDoc.documentElement));
}
print("Cleaned up unnecessary tags and headers");
print("Size: ", computeSize(contentDoc.documentElement));
// Cleanup elements that have classnames that are typically not main content
// This was included as a hidden element via css @media settings in 3.0.18
// but moving it to a regexp for more flexibility (borrowing idea from readability)
// since it is easier to undo the cleanup in javascript or add logic to skip
// certain elements that seem to have actual content in them
//
let unlikelyCandidates = /^social|soc|^header|footer|related|recommended|sponsored|action|navigation|promo|adCaption|comment|dfp|adHolder|billboard|slide|-ad-|_ad_|control-bar|menu|disqus|popup|pop-up|crumb|more-stories/i
let nodeIter = getNodeIterator(contentDoc.body, unlikelyCandidates, "className");
let node = null;
while ((node = nodeIter.nextNode())) {
let exceptions = ["BODY", "MAIN", "ARTICLE"];
if (exceptions.includes(node.nodeName.toUpperCase())) {
continue;
}
let docSize = computeSize(contentDoc.body);
let nodeSize = computeSize(node);
if (nodeSize/docSize > 0.9) {
continue;
}
print("Removing node with classname: ", node.className);
print(nodeSize, docSize);
node.parentNode.removeChild(node);
}
print("Cleaned up unlikely candidates");
print("Size: ", computeSize(contentDoc.documentElement));
// Reformat the header and use custom css
reformatHeader(contentDoc);
print("Reformatted headers...");
// Moving the cloneImage calls after we have
// cleaned up the unnecessary tags. This can help filter of any
// unneccessary icons ad images that are in these deleted tags
// and get added back later.
// Clone all the image nodes for later insertion
let imgCollection = {};
cloneImages(contentDoc.body, imgCollection);
// Ensure that we set a base element before we replace the
// Processing for ads related DIV's; several websites seem to use LI elements
// within the ads DIV's, or for navigation links which are not required in the
// Tranquility view. In this section, we try to delete DIV's that have at least
// x% of the DIV content within LI tags
let pruneAdsTagList = ["UL", "DIV", "ARTICLE", "SECTION"];
let totalSize = computeSize(contentDoc.documentElement);
for(let p=0; p < pruneAdsTagList.length; p++) {
pruneAdsTag(contentDoc, thisURL, pruneAdsTagList[p], 0.7, totalSize, imgCollection);
}
print("Pruned the AdsTag");
print("Size: ", computeSize(contentDoc.documentElement));
// Cleanup select tags that have content length smaller than minSize
// This helps clean up a number of junk DIV's before we get to real content
// Can be made a parameter in later versions
// First run with minSize ZERO
// Removed TD and DD for now
let pruneTagList = ["LI", "DIV", "OL", "UL", "FORM", "TABLE", "ARTICLE", "SECTION", "SPAN", "P"];
let minSize = 0;
totalSize = computeSize(contentDoc.documentElement);
for(let p=0; p < pruneTagList.length; p++) {
pruneTag(contentDoc, pruneTagList[p], 0.0, minSize, totalSize);
}
// Next run with minsize 5 (for a reduced subset of the tags)
// Removed TD, TABLE, and DD for now
pruneTagList = ["FORM", "DIV", "ARTICLE", "SECTION"];
minSize = 5;
totalSize = computeSize(contentDoc.documentElement);
for(let p=0; p < pruneTagList.length; p++) {
pruneTag(contentDoc, pruneTagList[p], 0.0, minSize, totalSize);
}
// Second pass
// Remove any elements that have zero length textContent
pruneTagList = ["LI", "DIV", "OL", "UL", "FORM", "TABLE", "ARTICLE", "SECTION", "SPAN", "P"];
minSize = 0;
totalSize = computeSize(contentDoc.documentElement);
for(let p=0; p < pruneTagList.length; p++) {
pruneTag(contentDoc, pruneTagList[p], 0.0, minSize, totalSize);
}
print("Completed second pass pruning");
// Try to remove unnecessary nested DIV's
// They mess up the padding and margins; use only in moderate pruning
// if the threshold is < 0.99999
for(let i=0; i < 5; i++) {
replaceParent(contentDoc, "DIV", 0.99999);
replaceParent(contentDoc, "SPAN", 0.99999);
}
print("Completed Replace parent loops");
// Format the tags in a nice readable font/style using custom css loaded in header
let reformatTagList = ["UL", "OL", "LI", "DIV", "SPAN", "P", "FONT", "BODY", "H1",
"H2", "H3", "PRE", "TABLE", "ARTICLE", "SECTION", "MAIN"];
for(let r=0; r < reformatTagList.length; r++) {
reformatTag(contentDoc, reformatTagList[r]);
}
print("Completed reformatting tags");
// Time to add back the images that we have cloned
//
addBackElems(contentDoc, "IMG", imgCollection, indexMap);
// Add back any title/h1 tags we backup that were removed incorrectly
addBackElems(contentDoc, "H1", hElemsMap, indexMap);
print("Reinserted images and H1 tags...");
// Remove target attribute from all anchor elements
// this will enable opening the link in the same browser tab
//
removeAnchorAttributes(contentDoc);
print("Removed Anchor attributes");
// Create the tranquility UI related elements
create_ui_elements(contentDoc, supporting_links, thisURL);
print("Created Tranquility UI elements");
print("Finished processing document");
// Finally apply all preferences and add Event listeners
applyAllTranquilityPreferences();
addBackEventListeners();
// Try one last time to remove any hidden/script elements that did not get removed for any reason
for (let i = 0; i < scriptTags.length; i++) {
removeTag(contentDoc, scriptTags[i]);
}
for (let i = 0; i < likeHidden.length; i++) {
removeTag(contentDoc, likeHidden[i]);
}
if (saveOffline) {
saveContentOffline(thisURL, document.cloneNode(true));
}
}
function removeWhiteSpaceComments(cdoc) {
let cnodes = cdoc.childNodes;
for(let i=cnodes.length -1; i > -1; i--) {
// Make sure that PRE nodes are ignored
// Otherwise, their spaces and line breaks are removed
// destroying their formatting
if(cnodes[i].nodeName == "PRE") {
continue;
}
if(cnodes[i].nodeType == 1) {
removeWhiteSpaceComments(cnodes[i]);
}
if(cnodes[i].nodeType == 3) {
let allText = cnodes[i].data;
cnodes[i].data = allText.replace(/\s{2,}/g, ' ');
}
if(cnodes[i].nodeType == 8 || cnodes[i].nodeType == 4) {
cnodes[i].parentNode.removeChild(cnodes[i]);
}
}
}
function removeTag(cdoc, tagString) {
print("Removing items with tag: ", tagString);
let regexp = new RegExp(tagString, 'i');
print(cdoc.body.getElementsByTagName(tagString).length);
let nodeIter = getNodeIterator(cdoc.body, regexp, "nodeName");
let node = null;
let ncounter = 0;
while ((node = nodeIter.nextNode())) {
ncounter += 1;
if(node.id == undefined || node.id.substr(0,11) !== "tranquility") {
node.parentNode.removeChild(node);
}
}
print("Removed ", ncounter, " items with tag: ", tagString);
}
function reformatHeader(cdoc) {
let heads = cdoc.getElementsByTagName('HEAD');
for(let i=0; i < heads.length; i++) {
let hChildren = heads[i].getElementsByTagName("*");
let titleNodeCount = 0;
while(hChildren.length > titleNodeCount) {
if (hChildren[titleNodeCount].nodeName.toUpperCase() !== "TITLE") {
heads[i].removeChild(hChildren[titleNodeCount]);
}
else {
titleNodeCount++;
}
}
}
}
function deleteHiddenElements(cdoc, tagString) {
// Remove elements that have display==none or visibility==hidden
let elems = cdoc.getElementsByTagName(tagString);
let ignoreList = ["HEAD", "TITLE"];
for(let i=elems.length - 1; i >=0; i--) {
if (ignoreList.includes(elems[i].nodeName.toUpperCase())) {
continue;
}
let cssProp = window.getComputedStyle(elems[i], null);
let cssVisibility = cssProp.getPropertyValue("visibility");
let cssDisplay = cssProp.getPropertyValue("display");
if(((cssVisibility != undefined) && (cssVisibility == 'hidden')) ||
((cssDisplay != undefined) && (cssDisplay == 'none'))) {
elems[i].parentNode.removeChild(elems[i]);
}
}
}
function pruneAdsTag(cdoc, url, tagString, thresholdPctg, totalSize, imgCollection) {
let c = cdoc.getElementsByTagName(tagString);
let len = c.length;
let tElem;
for(let i=0; i < len; i++) {
tElem = c[len-i-1];
let cLength = computeSize(tElem);
let pctg = cLength/totalSize;
// If the DIV/SECTION/ARTICLE is empty remove it right away
if(cLength == 0) {
tElem.parentNode.removeChild(tElem);
}
// If the DIV does not contain a significant portion of the web content
// AND the DIV contain mainly list elements then we can process to remove ads
// Here, we use the "A" anchor node as a proxy for the LI node since each
// navigation menu (or ads links menu) has a list of LI nodes that contain
// anchor nodes with links to a new web page/section
//
else if(pctg < 0.8) {
let anchorNodes = tElem.getElementsByTagName("A");
let anchorLength = 0;
let num_words = 0;
for(let j=0; j < anchorNodes.length; j++) {
// Ignore links that are # tags in the same document
// These are typically table of content type links for the
// current document and are useful to retain
//
if(anchorNodes[j].href.split("#")[0] == url.split("#")[0])
continue;
anchorLength += computeSize(anchorNodes[j]);
num_words += anchorNodes[j].textContent.split(/\s+/).length;
}
let avg_words_per_anchor = num_words/anchorNodes.length;
let inner_div_pctg = anchorLength/cLength;
// If the DIV has > thresholdPctg of its content within anchor nodes
// remove, the DIV. Additionally we can also look at the number of words
// per anchor, but for now, that is not enabled
if (inner_div_pctg >= thresholdPctg) {
let images = tElem.getElementsByTagName('img');
if (images.length > 0) {
for (let k = 0; k < images.length; k++) {
let idx = images[k].getAttribute('data-dfsIndex');
if (idx in imgCollection) {
delete imgCollection[idx];
}
}
}
tElem.parentNode.removeChild(tElem);
}
}
else {
// Do nothing
}
}
}
function pruneTag(cdoc, tagString, thresholdPctg, minSize, totalSize) {
let c = cdoc.getElementsByTagName(tagString);
let len = c.length;
let tElem;
for(let i=0; i < len; i++) {
tElem = c[len-i-1];
// If the DIV has a H1 child, then we want to retain the article
// heading and not delete it.
let h1elems = tElem.getElementsByTagName("H1");
if(h1elems.length > 0)
continue;
let cLength = computeSize(tElem);
let pctg = cLength/totalSize;
// Experimental; do not delete if the text content is > threshold of innerHTML
// currently hardcoded; trying to do better with blog style pages and comments
let ilength = tElem.innerHTML.replace('/\s/g', '').length + 1;
let inner_html_pctg = cLength/ilength;
if (((inner_html_pctg < 0.5) && (pctg < thresholdPctg)) || (cLength <= minSize)) {
tElem.parentNode.removeChild(tElem);
}
else {
// Do nothing
}
}
}
function replaceParent(cdoc, tagString, thresholdPctg) {
let c = cdoc.getElementsByTagName(tagString);
let cArray = [];
let len = c.length;
for(let i=0; i < len; i++) {
cArray[i] = c[i];
}
cArray.sort(function (a,b) { return b.innerHTML.length - a.innerHTML.length } );
let tElem;
for(let i=0; i < len; i++) {
tElem = cArray[len-i-1];
if((tElem.parentNode != undefined) && (tElem.parentNode.tagName == tElem.tagName)) {
let cLength = computeSize(tElem);
let pLength = computeSize(tElem.parentNode);
let pctg = cLength/pLength;
if ((pctg > thresholdPctg)) {
// If grandparent exists replace parent with this element
// else, remove all siblings
let grandparent = tElem.parentNode.parentNode;
if(grandparent != undefined)
grandparent.replaceChild(tElem.cloneNode(true), tElem.parentNode);
else {
let siblings = tElem.parentNode.childNodes;
for(let j=siblings.length - 1; j > -1; j--) {
if(siblings[j] !== tElem) {
tElem.parentNode.removeChild(siblings[j]);
}
}
}
}
else {
}
}
}
}
function reformatTag(cdoc, tagString) {
let c = cdoc.getElementsByTagName(tagString);
for(let i=0; i < c.length; i++) {
c[i].removeAttribute('class');
c[i].removeAttribute('style');
c[i].removeAttribute('width');
c[i].setAttribute('class', 'tranquility');
// Exception for the preformatted text so that we can
// apply only some of the formatting changes to preformatted text
// for example, fontName must not be changes so that we have an
// equal width character font for code readability, etc
//
if (c[i].nodeName == "PRE") {
c[i].setAttribute('class', 'tranquility_pre');
}
}
}
function computeSize(dElem) {
// Compute size removes spaces to do a better job of true size calculations
//
if(dElem.innerHTML) {
if(dElem.textContent) {
return dElem.textContent.replace(/\s/g, '').length;
}
else if(dElem.innerText) {
return dElem.innerText.replace(/\s/g, '').length;
}
else {
return 0;
}
}
else {
return 0;
}
}
function convertLinksAbsolute(node, baseURL) {
let alinks = node.getElementsByTagName('A');
for(let i=0; i < alinks.length; i++) {
// Fix where some pages with a "mail:" link fail when trying to construct
// the new URL; wrap this in a try/catch to handle any links that cannot
// be processed
try {
var absURL = new URL(alinks[i].href, baseURL);
alinks[i].href = absURL.href;
}
catch(error) {
print(error);
}
}
}
function getSupportingLinks(cDoc) {
// Search for 'Single Page' links and load them in current window
// Helps in simplifying the processing as well as in handling multi-page document
let altString = browser.i18n.getMessage("singlePageString").split(",");
let navString = browser.i18n.getMessage("navigationString").split(",");
let navRegExp = /^\d+$/;
let altURL;
let altLink;
let found = 0;
let altlinks = cDoc.getElementsByTagName('A');
// Insert all links into a temporary div for later use
let links_div = createNode(cDoc, {type: 'DIV', attr: { class:'tranquility_links', id:'tranquility_links' } });
let nav_links_div = createNode(cDoc, {type: 'DIV', attr: { class:'tranquility_nav_links' } });
let spacerString = " ";
let sp_elem = cDoc.createTextNode(spacerString);
for(let i=0; i < altlinks.length; i++) {
let altLinkClone = altlinks[i].cloneNode(true);
// Replace the contents of the link with its text content
// this can help cleanup images and other pointless tags/children
// that can cause problems for tranquility
altLinkClone.textContent = altLinkClone.textContent;
removeWhiteSpaceComments(altLinkClone);
//Collect any links that can be added to the "More Links" section
if(altLinkClone.textContent.length >= 20) {
let p_elem = createNode(cDoc, {type: 'P', attr: { class:'tranquility_links' } });
p_elem.appendChild(altLinkClone.cloneNode(true));
links_div.appendChild(p_elem.cloneNode(true));
}
// Collect any link that might be used for navigation in a multipage document
let navstr = altLinkClone.textContent.replace(/\s/g, '');
if(navstr && ((navString.indexOf(navstr.toUpperCase()) >= 0) ||
(navstr.search(navRegExp) != -1)) &&
(!altLinkClone.getAttribute('onclick')) &&
(altLinkClone.href) &&
(altLinkClone.href != "#") &&
(altLinkClone.href != (currentURL + "#")) &&
(altLinkClone.href.substr(0,10) !== "javascript")) {
nav_links_div.appendChild(altLinkClone.cloneNode(true));
nav_links_div.appendChild(sp_elem.cloneNode(true));
}
}
nav_links_div = cleanupNavLinks(nav_links_div.cloneNode(true));
return {links_div : links_div.cloneNode(true),
nav_links : nav_links_div.cloneNode(true)
};
}
function cleanupNavLinks(nlinks_div) {
let nlinks = nlinks_div.getElementsByTagName('A');
let nlinks_count = nlinks.length;
let navRegExp = /^\d+$/;
let nLinkExists = [];
let intNavLinks = 0;
for(let i=0; i < nlinks_count; i++) {
let navStr = nlinks[i].textContent.replace(/\s/g, '');
if(navStr.search(navRegExp) != -1)
intNavLinks++;
}
for(let i=nlinks_count - 1; i > -1; i--) {
let navStr = nlinks[i].textContent.replace(/\s/g, '');
// Remove the link if the number within is greater than the total number
// of navigation links collected. This will eliminate arbitrary links
// that have numbers within them
//
if((navStr.search(navRegExp) != -1) && (navStr > intNavLinks + 1))
nlinks[i].parentNode.removeChild(nlinks[i]);
// Remove links that are duplicates; some pages have navigation links at
// the top and bottom of the page; no need for us to duplicate them
//
else if(nLinkExists[navStr] != undefined)
nlinks[i].parentNode.removeChild(nlinks[i]);
// Else remove comment style links from the navigation bar
else if(nlinks[i].href.split("#")[0] == currentURL) {
nlinks[i].parentNode.removeChild(nlinks[i]);
}
else {
// Do nothing
}
nLinkExists[navStr] = 1;
}
return nlinks_div;
}
function removeDuplicateAndBadLinks(cdoc, url, orig_links) {
let encodedURL = encodeURIComponent(url.split("#")[0]);
let re = new RegExp("^http:");
let c = cdoc.getElementsByTagName('A');
let bodyHrefs = [];
for(let i=0; i < c.length; i++) {
bodyHrefs[c[i].href] = 1;
}
let d = orig_links.getElementsByTagName('A');
let moreHrefCounts = [];
for(let i=0; i < d.length; i++) {
if(moreHrefCounts[d[i].href] != undefined)
moreHrefCounts[d[i].href] += 1;
else
moreHrefCounts[d[i].href] = 1;
}
let len = d.length;
for(let j=0; j < len; j++) {
let tElem = d[len-j-1];
// Remove link if the "More Links" anchor node is either present in the main document
// or if it is a #tag reference to some element in the main document
if(bodyHrefs[tElem.href] != undefined) {
tElem.parentNode.removeChild(tElem);
}
else if(tElem.href.substr(0,10) == "javascript") {
tElem.parentNode.removeChild(tElem);
}
else if(encodeURIComponent(tElem.href.split("#")[0]) == encodedURL) {
tElem.parentNode.removeChild(tElem);
}
else if(tElem.textContent.replace('^/s+', '').search(re) != -1) {
tElem.parentNode.removeChild(tElem);
}
else if((moreHrefCounts[tElem.href] != undefined) && (moreHrefCounts[tElem.href] > 1)) {
moreHrefCounts[tElem.href] -= 1;
tElem.parentNode.removeChild(tElem);
}
else {
// Nothing to do
}
}
return orig_links.cloneNode(true);
}
function getAnchorNode(elem) {
let urlString = elem.href;
while((urlString == undefined) && (elem.parentNode != undefined)) {
elem = elem.parentNode;
urlString = elem.href;
if(urlString != undefined)
return urlString;
}
return urlString;
}
function removeAnchorAttributes(cdoc) {
let c = cdoc.getElementsByTagName('A');
for(let i=0; i < c.length; i++) {
// Do not process the tranquility_original_link_anchor
//
if (c[i].className == 'tranquility_original_link_anchor') {
continue;
}
if(c[i].getAttribute('target')) {
c[i].removeAttribute('target');
}
if(c[i].getAttribute('class')) {
c[i].removeAttribute('class');
}
if(c[i].getAttribute('onmousedown')) {
c[i].removeAttribute('onmousedown');
}
// Add all links to the 'tranquil_mode_links' to enable continuous browsing
c[i].setAttribute('class', 'tranquil_browsing_mode_link');
}
}
function createNode(cdoc, props) {
let thisNode = cdoc.createElement(props.type);
let keys = Object.keys(props.attr);
for (let i = 0; i < keys.length; i++) {
let key = keys[i];
thisNode.setAttribute(key, props.attr[key]);
}
return thisNode;
}
function getProgressBar(cdoc) {
let pbar = cdoc.getElementById('tranquility_progress_bar');
if (pbar == undefined) {
pbar = createNode(cdoc, {type: 'DIV', attr: { class:'tranquility_progress_bar', id:'tranquility_progress_bar' } });
pbar.style.visibility = 'hidden';
if (cdoc.body != undefined) {
cdoc.body.appendChild(pbar);
}
}
return pbar;
}
function indexElements(indexMap, node) {
if (node == null) return;
indexMap[dfsIndex] = node;
if (node.nodeType == 1) {
node.setAttribute('data-dfsIndex', dfsIndex);
node.setAttribute('data-origClassName', node.className);
}
dfsIndex += 1;
let children = node.childNodes;
for (let i = 0; i < children.length; i++) {
indexElements(indexMap, children[i]);
}
}
function cloneImages(cdoc, collection) {
// This function also preserves the original width/height of the images
// in data fields
let images = cdoc.getElementsByTagName('IMG');
for (let i = 0; i < images.length; i++) {
if (images[i].src.substr(0,4) == "data") {
continue;
}
let img = new Image();
let idx = images[i].getAttribute('data-dfsIndex');
img.src = images[i].src;
img.setAttribute('data-dfsIndex', idx);
img.alt = images[i].alt;
collection[idx] = img;
print(images[i].src + ": " + images[i].alt);
}
}
function addBackElems(cdoc, tagName, bkpElems, indexMap) {
let elems = cdoc.body.getElementsByTagName(tagName);
let elemMap = {};
for (let i = 0; i < elems.length; i++) {
let idx = elems[i].getAttribute('data-dfsIndex');
elemMap[idx] = i;
//print(idx, elems[i]);
}
for (let key in bkpElems) {
let elem = bkpElems[key];
// Skip adding back element if the current cleanup has already
// retained the original element
//
//print(elem.getAttribute('data-dfsIndex'), elem);
if (key in elemMap) {
//print("Found duplicate key...: ", key);
continue;
}
insertByDFSIndex(elem, cdoc);
}
}
function insertByDFSIndex(elem, cdoc) {
let children = cdoc.body.getElementsByTagName("*");
elem.className = 'tranquility';
let nextSibling = null;
let prevSibling = null;
let prevSiblingIdx = -1;
let elemIdx = parseInt(elem.getAttribute('data-dfsIndex'));
print(elemIdx);
for (let i = 0; i < children.length; i++) {
if (children[i].nodeType == 1) {
let idx = parseInt(children[i].getAttribute('data-dfsIndex'));
if (idx < elemIdx && idx > prevSiblingIdx) {
prevSibling = children[i];
prevSiblingIdx = idx;
}
if (idx > elemIdx) {
nextSibling = children[i];
break;
}
}
else {
}
}
if (nextSibling != null) {
nextSibling.insertAdjacentElement('beforebegin', elem);
}
else if (prevSibling != null) {
prevSibling.insertAdjacentElement('afterend', elem);
}
}
// Remove a node recursively based on the text-content of its parent
//
function removeNodeRecursive(thisNode) {
let thisNodeTextLen = computeSize(thisNode);
let parent = thisNode.parentNode;
let parentTextLen = computeSize(parent);
if (parentTextLen == thisNodeTextLen) {
removeNodeRecursive(parent);
}
else {
parent.removeChild(thisNode);
}
}
// Remove any image elements that are not hidden, but have a height/width set to zero
//
function deleteZeroSizeImages(cdoc) {
let images = cdoc.getElementsByTagName('IMG');
for (let i = images.length-1; i >= 0; i--) {
if (parseInt(images[i].getAttribute('height')) == 0 ||
parseInt(images[i].getAttribute('width')) == 0 ||
images[i].src.substr(0,4) == "data") {
images[i].parentNode.removeChild(images[i]);
}
}
}
function requestZoomValue() {
browser.runtime.sendMessage(
{
"action": "getZoomValue"
});
}
function updateZoomValue(zoom) {
print("Updating zoomValue to: " + zoom);
zoomValue = zoom;
}
function cloneHElems(hdict, cdoc) {
let hs = cdoc.getElementsByTagName("H1");
for (let i = 0; i < hs.length; i++) {
let elem = hs[i];
let idx = elem.getAttribute('data-dfsIndex');
hdict[idx] = elem.cloneNode(true);
}
}
function getNodeIterator(root, regexp, attr) {
return document.createNodeIterator(
root,
NodeFilter.SHOW_ALL,
{ acceptNode: function(node) {
let nodeAttr = node.className;
if (attr == "nodeName") {
nodeAttr = node.nodeName;
}
if (regexp.test(nodeAttr)) {
return NodeFilter.FILTER_ACCEPT;
}
}}
);
}
/*
* Assign tranquilize() as a listener for messages from the extension.
* */
// browser.runtime.onMessage.addListener(tranquilize);
/* Don't do that ^, instead just tranquilize */
tranquilize({ tranquility_action: 'Run'});