/* Imports */
import axios from "axios";
import { getCookie, applyIfCookieExists } from "../utils/getCookies.js";
import { Tabletojson } from "tabletojson";
import toMarkdown from "../utils/toMarkdown.js";
import { calculateStars } from "../utils/stars.js";
import { upsert } from "../database/mongo-wrapper.js";
/* Definitions */
let htmlEndPoint = "https://www.infer-pub.com/questions";
String.prototype.replaceAll = function replaceAll(search, replace) {
return this.split(search).join(replace);
};
const DEBUG_MODE = "on"; // "off"
const SLEEP_TIME_RANDOM = 7000; // miliseconds
const SLEEP_TIME_EXTRA = 2000;
/* Support functions */
async function fetchPage(page, cookie) {
console.log(`Page #${page}`);
if (page == 1) {
cookie = cookie.split(";")[0]; // Interesting that it otherwise doesn't work :(
}
let urlEndpoint = `${htmlEndPoint}/?page=${page}`;
console.log(urlEndpoint);
let response = await axios({
url: urlEndpoint,
method: "GET",
headers: {
"Content-Type": "text/html",
Cookie: cookie,
},
}).then((res) => res.data);
// console.log(response)
return response;
}
async function fetchStats(questionUrl, cookie) {
let response = await axios({
url: questionUrl + "/stats",
method: "GET",
headers: {
"Content-Type": "text/html",
Cookie: cookie,
Referer: questionUrl,
},
}).then((res) => res.data);
if (response.includes("Sign up or sign in to forecast")) {
throw Error("Not logged in");
}
// Is binary?
let isbinary = response.includes("binary?":true");
// console.log(`is binary? ${isbinary}`)
let options = [];
if (isbinary) {
// Crowd percentage
let htmlElements = response.split("\n");
// DEBUG_MODE == "on" ? htmlLines.forEach(line => console.log(line)) : id()
let h3Element = htmlElements.filter((str) => str.includes("
"))[0];
// DEBUG_MODE == "on" ? console.log(h5elements) : id()
let crowdpercentage = h3Element.split(">")[1].split("<")[0];
let probability = Number(crowdpercentage.replace("%", "")) / 100;
options.push(
{
name: "Yes",
probability: probability,
type: "PROBABILITY",
},
{
name: "No",
probability: +(1 - probability).toFixed(2), // avoids floating point shenanigans
type: "PROBABILITY",
}
);
} else {
try {
let optionsBody = response.split("tbody")[1]; // Previously [1], then previously [3] but they added a new table.
// console.log(optionsBody)
let optionsHtmlElement = "
";
let tablesAsJson = Tabletojson.convert(optionsHtmlElement);
let firstTable = tablesAsJson[0];
options = firstTable.map((element) => ({
name: element["0"],
probability: Number(element["1"].replace("%", "")) / 100,
type: "PROBABILITY",
}));
} catch (error) {
let optionsBody = response.split("tbody")[3]; // Catch if the error is related to table position
let optionsHtmlElement = "";
let tablesAsJson = Tabletojson.convert(optionsHtmlElement);
let firstTable = tablesAsJson[0];
if (firstTable) {
options = firstTable.map((element) => ({
name: element["0"],
probability: Number(element["1"].replace("%", "")) / 100,
type: "PROBABILITY",
}));
} else {
// New type of question, tricky to parse the options
// Just leave options = [] for now.
// https://www.cset-foretell.com/blog/rolling-question-formats
}
}
}
// Description
let descriptionraw = response.split(``)[0];
let descriptionprocessed2 = descriptionprocessed1.replace(">", "");
let descriptionprocessed3 = descriptionprocessed2.replace(
"To suggest a change or clarification to this question, please select Request Clarification from the green gear-shaped dropdown button to the right of the question.",
``
);
// console.log(descriptionprocessed3)
let descriptionprocessed4 = descriptionprocessed3.replaceAll(
"\r\n\r\n",
"\n"
);
let descriptionprocessed5 = descriptionprocessed4.replaceAll("\n\n", "\n");
let descriptionprocessed6 = descriptionprocessed5.replaceAll(""", `"`);
let descriptionprocessed7 = descriptionprocessed6.replaceAll("'", "'");
let descriptionprocessed8 = toMarkdown(descriptionprocessed7);
let description = descriptionprocessed8;
// Number of forecasts
//console.log(response)
//console.log(response.split("prediction_sets_count":")[1])
let numforecasts = response
.split("prediction_sets_count":")[1]
.split(",")[0];
// console.log(numforecasts)
// Number of predictors
let numforecasters = response
.split("predictors_count":")[1]
.split(",")[0];
// console.log(numpredictors)
let result = {
description: description,
options: options,
timestamp: new Date().toISOString(),
qualityindicators: {
numforecasts: Number(numforecasts),
numforecasters: Number(numforecasters),
stars: calculateStars("Infer", { numforecasts }),
},
};
return result;
}
function isSignedIn(html) {
let isSignedInBool = !(
html.includes("You need to sign in or sign up before continuing") ||
html.includes("Sign up")
);
if (!isSignedInBool) {
console.log("Error: Not signed in.");
}
console.log(`Signed in? ${isSignedInBool}`);
return isSignedInBool;
}
function isEnd(html) {
let isEndBool = html.includes("No questions match your filter");
if (isEndBool) {
//console.log(html)
}
console.log(`IsEnd? ${isEndBool}`);
return isEndBool;
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/* Body */
async function infer_inner(cookie) {
let i = 1;
let response = await fetchPage(i, cookie);
let results = [];
let init = Date.now();
// console.log("Downloading... This might take a couple of minutes. Results will be shown.")
while (!isEnd(response) && isSignedIn(response)) {
let htmlLines = response.split("\n");
// let h4elements = htmlLines.filter(str => str.includes("", "")
.replace("", "");
await sleep(Math.random() * SLEEP_TIME_RANDOM + SLEEP_TIME_EXTRA); // don't be as noticeable
try {
let moreinfo = await fetchStats(url, cookie);
let questionNumRegex = new RegExp("questions/([0-9]+)");
let questionNum = url.match(questionNumRegex)[1]; //.split("questions/")[1].split("-")[0];
let id = `infer-${questionNum}`;
let question = {
id: id,
title: title,
url: url,
platform: "Infer",
...moreinfo,
};
if (
i % 30 == 0 &&
!(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on")
) {
console.log(
`Page #${i}` &&
!(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on")
);
console.log(question);
}
results.push(question);
if (process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on") {
console.log(url);
console.log(question);
}
} catch (error) {
console.log(error);
console.log(
`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`
);
}
}
i++;
//i=Number(i)+1
console.log(
"Sleeping for ~5secs so as to not be as noticeable to the infer servers"
);
await sleep(Math.random() * SLEEP_TIME_RANDOM + SLEEP_TIME_EXTRA); // don't be as noticeable
try {
response = await fetchPage(i, cookie);
} catch (error) {
console.log(error);
console.log(
`The program encountered some error when fetching page #${i}, so it won't appear on the final json. It is possible that this page wasn't actually a prediction question pages`
);
}
}
// let string = JSON.stringify(results,null, 2)
// fs.writeFileSync('./data/infer-questions.json', string);
// console.log(results)
if (results.length > 0) {
await upsert(results, "infer-questions");
} else {
console.log("Not updating results, as process was not signed in");
}
let end = Date.now();
let difference = end - init;
console.log(
`Took ${difference / 1000} seconds, or ${difference / (1000 * 60)} minutes.`
);
}
export async function infer() {
let cookie = process.env.INFER_COOKIE || getCookie("infer");
await applyIfCookieExists(cookie, infer_inner);
}