From afaea9106ad42f884ec667b0a1e0fbd88efbabc7 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Fri, 11 Jun 2021 21:13:28 +0200 Subject: [PATCH] Fixed error which broke CSET predictions --- src/platforms/csetforetell-fetch-old.js | 201 ++++++++++++++++++++++++ src/platforms/csetforetell-fetch.js | 42 +++-- 2 files changed, 227 insertions(+), 16 deletions(-) create mode 100644 src/platforms/csetforetell-fetch-old.js diff --git a/src/platforms/csetforetell-fetch-old.js b/src/platforms/csetforetell-fetch-old.js new file mode 100644 index 0000000..67c3e59 --- /dev/null +++ b/src/platforms/csetforetell-fetch-old.js @@ -0,0 +1,201 @@ +/* Imports */ +import axios from "axios" +import {getCookie, applyIfCookieExists} from "../utils/getCookies.js" +import {Tabletojson} from "tabletojson" +import toMarkdown from "../utils/toMarkdown.js" +import {calculateStars} from "../utils/stars.js" +import {upsert} from "../utils/mongo-wrapper.js" + +/* Definitions */ +let htmlEndPoint = 'https://www.cset-foretell.com/questions?page=' +String.prototype.replaceAll = function replaceAll(search, replace) { return this.split(search).join(replace); } + +/* Support functions */ + +async function fetchPage(page, cookie){ + if(page==1){ + cookie=cookie.split(";")[0] // Interesting that it otherwise doesn't work :( + } + let response = await axios({ + url: htmlEndPoint+page, + method: 'GET', + headers: ({ + 'Content-Type': 'text/html', + 'Cookie': cookie + }), + }) + .then(res => res.data) + // console.log(response) + return response +} + +async function fetchStats(questionUrl, cookie){ + let response = await axios({ + url: questionUrl+"/stats", + method: 'GET', + headers: ({ + 'Content-Type': 'text/html', + 'Cookie': cookie, + 'Referer': questionUrl, + }), + }) + .then(res => res.data) + //console.log(response) + + // Is binary? + let isbinary = response.includes("binary?":true") + // console.log(`is binary? ${isbinary}`) + let options = [] + if(isbinary){ + // Crowd percentage + let htmlElements = response.split("\n") + // console.log(htmlElements) + let h3Element = htmlElements.filter(str => str.includes("

"))[0] + let crowdpercentage = h3Element.split(">")[1].split("<")[0] + let probability = Number(crowdpercentage.replace("%", ""))/100 + options.push(({ + name: "Yes", + probability: probability, + type: "PROBABILITY" + }), ({ + name: "No", + probability: +(1-probability).toFixed(2), // avoids floating point shenanigans + type: "PROBABILITY" + })) + }else{ + let optionsBody = response.split("tbody")[3] // Previously [1], but they added a new table. + // console.log(optionsBody) + let optionsHtmlElement = "" + let tablesAsJson = Tabletojson.convert(optionsHtmlElement) + let firstTable = tablesAsJson[0] + options = firstTable.map(element => ({ + name: element['0'], + probability: Number(element['1'].replace("%",""))/100, + type: "PROBABILITY" + })) + } + // Description + let descriptionraw = response.split(``)[0] + let descriptionprocessed2 = descriptionprocessed1.replace(">", "") + let descriptionprocessed3 = descriptionprocessed2.replace("To suggest a change or clarification to this question, please select Request Clarification from the green gear-shaped dropdown button to the right of the question.", ``) + // console.log(descriptionprocessed3) + let descriptionprocessed4=descriptionprocessed3.replaceAll("\r\n\r\n", "\n") + let descriptionprocessed5= descriptionprocessed4.replaceAll("\n\n", "\n") + let descriptionprocessed6=descriptionprocessed5.replaceAll(""", `"`) + let descriptionprocessed7=descriptionprocessed6.replaceAll("'", "'") + let descriptionprocessed8=toMarkdown(descriptionprocessed7) + let description = descriptionprocessed8 + + // Number of forecasts + let numforecasts = response.split("prediction_sets_count":")[1].split(",")[0] + // console.log(numforecasts) + + // Number of predictors + let numforecasters = response.split("predictors_count":")[1].split(",")[0] + // console.log(numpredictors) + + let result = { + "description": description, + "options": options, + "timestamp": new Date().toISOString(), + "qualityindicators": { + "numforecasts": numforecasts, + "numforecasters": numforecasters, + "stars": calculateStars("CSET-foretell", {numforecasts}) + } + } + + return result +} + +function isEnd(html){ + return html.includes("No questions match your filter") +} + +function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +/* Body */ + +async function csetforetell_inner(cookie){ + let i=0 + let response = await fetchPage(i, cookie) + let results = [] + let init = Date.now() + // console.log("Downloading... This might take a couple of minutes. Results will be shown.") + while(!isEnd(response)){ + + let htmlLines = response.split("\n") + let h4elements = htmlLines.filter(str => str.includes("
') + let url = h4elementSplit[0].split('
', "") + await sleep(1000 + Math.random()*1000) // don't be as noticeable + try{ + let moreinfo = await fetchStats(url, cookie) + if(moreinfo.isbinary){ + if(!moreinfo.crowdpercentage){ // then request again. + moreinfo = await fetchStats(url, cookie) + } + } + let question = ({ + "title": title, + "url": url, + "platform": "CSET-foretell", + ...moreinfo + }) + if(i % 30 == 0){ + console.log(`Page #${i}`) + console.log(question) + } + results.push(question) + if(process.env.DEBUG_MODE == "on"){ + console.log(question) + } + + } catch(error){ + console.log(error) + console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`) + } + i=i+1 + } + + // console.log("Sleeping for ~5secs so as to not be as noticeable to the cset-foretell servers") + await sleep(5000 + Math.random()*1000) // don't be as noticeable + + try{ + response = await fetchPage(i, cookie) + }catch(error){ + console.log(error) + console.log(`The program encountered some error when fetching page #${i}, so it won't appear on the final json. It is possible that this page wasn't actually a prediction question pages`) + } + } + // let string = JSON.stringify(results,null, 2) + // fs.writeFileSync('./data/csetforetell-questions.json', string); + // console.log(results) + await upsert(results, "csetforetell-questions") + + + let end = Date.now() + let difference = end-init + console.log(`Took ${difference/1000} seconds, or ${difference/(1000*60)} minutes.`) +} + + +export async function csetforetell(){ + let cookie = process.env.CSETFORETELL_COOKIE || getCookie("csetforetell") + await applyIfCookieExists(cookie, csetforetell_inner) +} diff --git a/src/platforms/csetforetell-fetch.js b/src/platforms/csetforetell-fetch.js index 67c3e59..9f9c77f 100644 --- a/src/platforms/csetforetell-fetch.js +++ b/src/platforms/csetforetell-fetch.js @@ -63,16 +63,29 @@ async function fetchStats(questionUrl, cookie){ type: "PROBABILITY" })) }else{ - let optionsBody = response.split("tbody")[3] // Previously [1], but they added a new table. - // console.log(optionsBody) - let optionsHtmlElement = "" - let tablesAsJson = Tabletojson.convert(optionsHtmlElement) - let firstTable = tablesAsJson[0] - options = firstTable.map(element => ({ - name: element['0'], - probability: Number(element['1'].replace("%",""))/100, - type: "PROBABILITY" - })) + try{ + let optionsBody = response.split("tbody")[1] // Previously [1], then previously [3] but they added a new table. + // console.log(optionsBody) + let optionsHtmlElement = "" + let tablesAsJson = Tabletojson.convert(optionsHtmlElement) + let firstTable = tablesAsJson[0] + options = firstTable.map(element => ({ + name: element['0'], + probability: Number(element['1'].replace("%",""))/100, + type: "PROBABILITY" + })) + }catch(error){ + let optionsBody = response.split("tbody")[3] // Catch if the error is related to table position + let optionsHtmlElement = "" + let tablesAsJson = Tabletojson.convert(optionsHtmlElement) + let firstTable = tablesAsJson[0] + options = firstTable.map(element => ({ + name: element['0'], + probability: Number(element['1'].replace("%",""))/100, + type: "PROBABILITY" + })) + } + } // Description let descriptionraw = response.split(`') let url = h4elementSplit[0].split('

', "") await sleep(1000 + Math.random()*1000) // don't be as noticeable + try{ let moreinfo = await fetchStats(url, cookie) - if(moreinfo.isbinary){ - if(!moreinfo.crowdpercentage){ // then request again. - moreinfo = await fetchStats(url, cookie) - } - } let question = ({ "title": title, "url": url, @@ -170,6 +179,7 @@ async function csetforetell_inner(cookie){ console.log(error) console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`) } + i=i+1 }