feat: Rework INFER fetcher using the json embedded in its html
This commit is contained in:
parent
63219d429d
commit
7aa44746bf
|
@ -17,6 +17,7 @@ String.prototype.replaceAll = function replaceAll(search, replace) {
|
||||||
const DEBUG_MODE: "on" | "off" = "off"; // "off"
|
const DEBUG_MODE: "on" | "off" = "off"; // "off"
|
||||||
const SLEEP_TIME_RANDOM = 7000; // miliseconds
|
const SLEEP_TIME_RANDOM = 7000; // miliseconds
|
||||||
const SLEEP_TIME_EXTRA = 2000;
|
const SLEEP_TIME_EXTRA = 2000;
|
||||||
|
|
||||||
/* Support functions */
|
/* Support functions */
|
||||||
|
|
||||||
async function fetchPage(page, cookie) {
|
async function fetchPage(page, cookie) {
|
||||||
|
@ -52,94 +53,50 @@ async function fetchStats(questionUrl, cookie) {
|
||||||
if (response.includes("Sign up or sign in to forecast")) {
|
if (response.includes("Sign up or sign in to forecast")) {
|
||||||
throw Error("Not logged in");
|
throw Error("Not logged in");
|
||||||
}
|
}
|
||||||
|
// Init
|
||||||
// Is binary?
|
|
||||||
let isbinary = response.includes("binary?":true");
|
|
||||||
// console.log(`is binary? ${isbinary}`)
|
|
||||||
let options = [];
|
let options = [];
|
||||||
if (isbinary) {
|
|
||||||
// Crowd percentage
|
// Parse the embedded json
|
||||||
let htmlElements = response.split("\n");
|
let htmlElements = response.split("\n");
|
||||||
// DEBUG_MODE == "on" ? htmlLines.forEach(line => console.log(line)) : id()
|
let jsonLines = htmlElements.filter((element) =>
|
||||||
let h3Element = htmlElements.filter((str) => str.includes("<h3>"))[0];
|
element.includes("data-react-props")
|
||||||
// DEBUG_MODE == "on" ? console.log(h5elements) : id()
|
);
|
||||||
let crowdpercentage = h3Element.split(">")[1].split("<")[0];
|
let embeddedJsons = jsonLines.map((jsonLine, i) => {
|
||||||
let probability = Number(crowdpercentage.replace("%", "")) / 100;
|
let innerJSONasHTML = jsonLine.split('data-react-props="')[1].split('"')[0];
|
||||||
options.push(
|
let json = JSON.parse(innerJSONasHTML.replaceAll(""", '"'));
|
||||||
{
|
return json;
|
||||||
name: "Yes",
|
});
|
||||||
probability: probability,
|
let firstEmbeddedJson = embeddedJsons[0];
|
||||||
|
let title = firstEmbeddedJson.question.name;
|
||||||
|
let description = firstEmbeddedJson.question.description;
|
||||||
|
let comments_count = firstEmbeddedJson.question.comments_count;
|
||||||
|
let numforecasters = firstEmbeddedJson.question.predictors_count;
|
||||||
|
let numforecasts = firstEmbeddedJson.question.prediction_sets_count;
|
||||||
|
let forecastType = firstEmbeddedJson.question.type;
|
||||||
|
if (
|
||||||
|
forecastType.includes("Binary") ||
|
||||||
|
forecastType.includes("NonExclusiveOpinionPoolQuestion") ||
|
||||||
|
forecastType.includes("Forecast::Question") ||
|
||||||
|
!forecastType.includes("Forecast::MultiTimePeriodQuestion")
|
||||||
|
) {
|
||||||
|
options = firstEmbeddedJson.question.answers.map((answer) => ({
|
||||||
|
name: answer.name,
|
||||||
|
probability: answer.normalized_probability,
|
||||||
type: "PROBABILITY",
|
type: "PROBABILITY",
|
||||||
},
|
}));
|
||||||
{
|
if (options.length == 1 && options[0].name == "Yes") {
|
||||||
|
let probabilityNo =
|
||||||
|
options[0].probability > 1
|
||||||
|
? 1 - options[0].probability / 100
|
||||||
|
: 1 - options[0].probability;
|
||||||
|
let optionNo = {
|
||||||
name: "No",
|
name: "No",
|
||||||
probability: +(1 - probability).toFixed(2), // avoids floating point shenanigans
|
probability: probabilityNo,
|
||||||
type: "PROBABILITY",
|
type: "PROBABILITY",
|
||||||
}
|
};
|
||||||
);
|
options.push(optionNo);
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
let optionsBody = response.split("tbody")[1]; // Previously [1], then previously [3] but they added a new table.
|
|
||||||
// console.log(optionsBody)
|
|
||||||
let optionsHtmlElement = "<table" + optionsBody + "table>";
|
|
||||||
let tablesAsJson = Tabletojson.convert(optionsHtmlElement);
|
|
||||||
let firstTable = tablesAsJson[0];
|
|
||||||
options = firstTable.map((element) => ({
|
|
||||||
name: element["0"],
|
|
||||||
probability: Number(element["1"].replace("%", "")) / 100,
|
|
||||||
type: "PROBABILITY",
|
|
||||||
}));
|
|
||||||
} catch (error) {
|
|
||||||
let optionsBody = response.split("tbody")[3]; // Catch if the error is related to table position
|
|
||||||
let optionsHtmlElement = "<table" + optionsBody + "table>";
|
|
||||||
let tablesAsJson = Tabletojson.convert(optionsHtmlElement);
|
|
||||||
let firstTable = tablesAsJson[0];
|
|
||||||
if (firstTable) {
|
|
||||||
options = firstTable.map((element) => ({
|
|
||||||
name: element["0"],
|
|
||||||
probability: Number(element["1"].replace("%", "")) / 100,
|
|
||||||
type: "PROBABILITY",
|
|
||||||
}));
|
|
||||||
} else {
|
|
||||||
// New type of question, tricky to parse the options
|
|
||||||
// Just leave options = [] for now.
|
|
||||||
// https://www.cset-foretell.com/blog/rolling-question-formats
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
// Description
|
|
||||||
let descriptionraw = response.split(`<meta name="description" content="`)[1];
|
|
||||||
let descriptionprocessed1 = descriptionraw.split(`">`)[0];
|
|
||||||
let descriptionprocessed2 = descriptionprocessed1.replace(">", "");
|
|
||||||
let descriptionprocessed3 = descriptionprocessed2.replace(
|
|
||||||
"To suggest a change or clarification to this question, please select Request Clarification from the green gear-shaped dropdown button to the right of the question.",
|
|
||||||
``
|
|
||||||
);
|
|
||||||
// console.log(descriptionprocessed3)
|
|
||||||
let descriptionprocessed4 = descriptionprocessed3.replaceAll(
|
|
||||||
"\r\n\r\n",
|
|
||||||
"\n"
|
|
||||||
);
|
|
||||||
let descriptionprocessed5 = descriptionprocessed4.replaceAll("\n\n", "\n");
|
|
||||||
let descriptionprocessed6 = descriptionprocessed5.replaceAll(""", `"`);
|
|
||||||
let descriptionprocessed7 = descriptionprocessed6.replaceAll("'", "'");
|
|
||||||
let descriptionprocessed8 = toMarkdown(descriptionprocessed7);
|
|
||||||
let description = descriptionprocessed8;
|
|
||||||
|
|
||||||
// Number of forecasts
|
|
||||||
//console.log(response)
|
|
||||||
//console.log(response.split("prediction_sets_count":")[1])
|
|
||||||
let numforecasts = response
|
|
||||||
.split("prediction_sets_count":")[1]
|
|
||||||
.split(",")[0];
|
|
||||||
// console.log(numforecasts)
|
|
||||||
|
|
||||||
// Number of predictors
|
|
||||||
let numforecasters = response
|
|
||||||
.split("predictors_count":")[1]
|
|
||||||
.split(",")[0];
|
|
||||||
// console.log(numpredictors)
|
|
||||||
|
|
||||||
let result = {
|
let result = {
|
||||||
description: description,
|
description: description,
|
||||||
options: options,
|
options: options,
|
||||||
|
@ -147,10 +104,11 @@ async function fetchStats(questionUrl, cookie) {
|
||||||
qualityindicators: {
|
qualityindicators: {
|
||||||
numforecasts: Number(numforecasts),
|
numforecasts: Number(numforecasts),
|
||||||
numforecasters: Number(numforecasters),
|
numforecasters: Number(numforecasters),
|
||||||
|
comments_count: Number(comments_count),
|
||||||
stars: calculateStars(platformName, { numforecasts }),
|
stars: calculateStars(platformName, { numforecasts }),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
// console.log(JSON.stringify(result, null, 4));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,10 +176,13 @@ async function infer_inner(cookie: string) {
|
||||||
let question: Forecast = {
|
let question: Forecast = {
|
||||||
id: id,
|
id: id,
|
||||||
title: title,
|
title: title,
|
||||||
|
description: moreinfo.description,
|
||||||
url: url,
|
url: url,
|
||||||
platform: platformName,
|
platform: platformName,
|
||||||
|
options: moreinfo.options,
|
||||||
...moreinfo,
|
...moreinfo,
|
||||||
};
|
};
|
||||||
|
console.log(JSON.stringify(question, null, 4));
|
||||||
if (
|
if (
|
||||||
i % 30 == 0 &&
|
i % 30 == 0 &&
|
||||||
!(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on")
|
!(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user