Fixed error which broke CSET predictions
This commit is contained in:
parent
ffbd09416d
commit
afaea9106a
201
src/platforms/csetforetell-fetch-old.js
Normal file
201
src/platforms/csetforetell-fetch-old.js
Normal file
|
@ -0,0 +1,201 @@
|
||||||
|
/* Imports */
|
||||||
|
import axios from "axios"
|
||||||
|
import {getCookie, applyIfCookieExists} from "../utils/getCookies.js"
|
||||||
|
import {Tabletojson} from "tabletojson"
|
||||||
|
import toMarkdown from "../utils/toMarkdown.js"
|
||||||
|
import {calculateStars} from "../utils/stars.js"
|
||||||
|
import {upsert} from "../utils/mongo-wrapper.js"
|
||||||
|
|
||||||
|
/* Definitions */
|
||||||
|
let htmlEndPoint = 'https://www.cset-foretell.com/questions?page='
|
||||||
|
String.prototype.replaceAll = function replaceAll(search, replace) { return this.split(search).join(replace); }
|
||||||
|
|
||||||
|
/* Support functions */
|
||||||
|
|
||||||
|
async function fetchPage(page, cookie){
|
||||||
|
if(page==1){
|
||||||
|
cookie=cookie.split(";")[0] // Interesting that it otherwise doesn't work :(
|
||||||
|
}
|
||||||
|
let response = await axios({
|
||||||
|
url: htmlEndPoint+page,
|
||||||
|
method: 'GET',
|
||||||
|
headers: ({
|
||||||
|
'Content-Type': 'text/html',
|
||||||
|
'Cookie': cookie
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
.then(res => res.data)
|
||||||
|
// console.log(response)
|
||||||
|
return response
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchStats(questionUrl, cookie){
|
||||||
|
let response = await axios({
|
||||||
|
url: questionUrl+"/stats",
|
||||||
|
method: 'GET',
|
||||||
|
headers: ({
|
||||||
|
'Content-Type': 'text/html',
|
||||||
|
'Cookie': cookie,
|
||||||
|
'Referer': questionUrl,
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
.then(res => res.data)
|
||||||
|
//console.log(response)
|
||||||
|
|
||||||
|
// Is binary?
|
||||||
|
let isbinary = response.includes("binary?":true")
|
||||||
|
// console.log(`is binary? ${isbinary}`)
|
||||||
|
let options = []
|
||||||
|
if(isbinary){
|
||||||
|
// Crowd percentage
|
||||||
|
let htmlElements = response.split("\n")
|
||||||
|
// console.log(htmlElements)
|
||||||
|
let h3Element = htmlElements.filter(str => str.includes("<h3>"))[0]
|
||||||
|
let crowdpercentage = h3Element.split(">")[1].split("<")[0]
|
||||||
|
let probability = Number(crowdpercentage.replace("%", ""))/100
|
||||||
|
options.push(({
|
||||||
|
name: "Yes",
|
||||||
|
probability: probability,
|
||||||
|
type: "PROBABILITY"
|
||||||
|
}), ({
|
||||||
|
name: "No",
|
||||||
|
probability: +(1-probability).toFixed(2), // avoids floating point shenanigans
|
||||||
|
type: "PROBABILITY"
|
||||||
|
}))
|
||||||
|
}else{
|
||||||
|
let optionsBody = response.split("tbody")[3] // Previously [1], but they added a new table.
|
||||||
|
// console.log(optionsBody)
|
||||||
|
let optionsHtmlElement = "<table" + optionsBody + "table>"
|
||||||
|
let tablesAsJson = Tabletojson.convert(optionsHtmlElement)
|
||||||
|
let firstTable = tablesAsJson[0]
|
||||||
|
options = firstTable.map(element => ({
|
||||||
|
name: element['0'],
|
||||||
|
probability: Number(element['1'].replace("%",""))/100,
|
||||||
|
type: "PROBABILITY"
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
// Description
|
||||||
|
let descriptionraw = response.split(`<meta name="description" content="`)[1]
|
||||||
|
let descriptionprocessed1 = descriptionraw.split(`">`)[0]
|
||||||
|
let descriptionprocessed2 = descriptionprocessed1.replace(">", "")
|
||||||
|
let descriptionprocessed3 = descriptionprocessed2.replace("To suggest a change or clarification to this question, please select Request Clarification from the green gear-shaped dropdown button to the right of the question.", ``)
|
||||||
|
// console.log(descriptionprocessed3)
|
||||||
|
let descriptionprocessed4=descriptionprocessed3.replaceAll("\r\n\r\n", "\n")
|
||||||
|
let descriptionprocessed5= descriptionprocessed4.replaceAll("\n\n", "\n")
|
||||||
|
let descriptionprocessed6=descriptionprocessed5.replaceAll(""", `"`)
|
||||||
|
let descriptionprocessed7=descriptionprocessed6.replaceAll("'", "'")
|
||||||
|
let descriptionprocessed8=toMarkdown(descriptionprocessed7)
|
||||||
|
let description = descriptionprocessed8
|
||||||
|
|
||||||
|
// Number of forecasts
|
||||||
|
let numforecasts = response.split("prediction_sets_count":")[1].split(",")[0]
|
||||||
|
// console.log(numforecasts)
|
||||||
|
|
||||||
|
// Number of predictors
|
||||||
|
let numforecasters = response.split("predictors_count":")[1].split(",")[0]
|
||||||
|
// console.log(numpredictors)
|
||||||
|
|
||||||
|
let result = {
|
||||||
|
"description": description,
|
||||||
|
"options": options,
|
||||||
|
"timestamp": new Date().toISOString(),
|
||||||
|
"qualityindicators": {
|
||||||
|
"numforecasts": numforecasts,
|
||||||
|
"numforecasters": numforecasters,
|
||||||
|
"stars": calculateStars("CSET-foretell", {numforecasts})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
function isEnd(html){
|
||||||
|
return html.includes("No questions match your filter")
|
||||||
|
}
|
||||||
|
|
||||||
|
function sleep(ms) {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Body */
|
||||||
|
|
||||||
|
async function csetforetell_inner(cookie){
|
||||||
|
let i=0
|
||||||
|
let response = await fetchPage(i, cookie)
|
||||||
|
let results = []
|
||||||
|
let init = Date.now()
|
||||||
|
// console.log("Downloading... This might take a couple of minutes. Results will be shown.")
|
||||||
|
while(!isEnd(response)){
|
||||||
|
|
||||||
|
let htmlLines = response.split("\n")
|
||||||
|
let h4elements = htmlLines.filter(str => str.includes("<h5><a href=") || str.includes("<h4><a href="))
|
||||||
|
|
||||||
|
if(process.env.DEBUG_MODE == "on"){
|
||||||
|
console.log(response)
|
||||||
|
}
|
||||||
|
|
||||||
|
//console.log("")
|
||||||
|
//console.log("")
|
||||||
|
//console.log(h4elements)
|
||||||
|
|
||||||
|
for(let h4element of h4elements){
|
||||||
|
let h4elementSplit = h4element.split('"><span>')
|
||||||
|
let url = h4elementSplit[0].split('<a href="')[1]
|
||||||
|
//console.log(url)
|
||||||
|
let title = h4elementSplit[1].replace('</span></a></h4>', "").replace('</span></a></h5>', "")
|
||||||
|
await sleep(1000 + Math.random()*1000) // don't be as noticeable
|
||||||
|
try{
|
||||||
|
let moreinfo = await fetchStats(url, cookie)
|
||||||
|
if(moreinfo.isbinary){
|
||||||
|
if(!moreinfo.crowdpercentage){ // then request again.
|
||||||
|
moreinfo = await fetchStats(url, cookie)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let question = ({
|
||||||
|
"title": title,
|
||||||
|
"url": url,
|
||||||
|
"platform": "CSET-foretell",
|
||||||
|
...moreinfo
|
||||||
|
})
|
||||||
|
if(i % 30 == 0){
|
||||||
|
console.log(`Page #${i}`)
|
||||||
|
console.log(question)
|
||||||
|
}
|
||||||
|
results.push(question)
|
||||||
|
if(process.env.DEBUG_MODE == "on"){
|
||||||
|
console.log(question)
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch(error){
|
||||||
|
console.log(error)
|
||||||
|
console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`)
|
||||||
|
}
|
||||||
|
i=i+1
|
||||||
|
}
|
||||||
|
|
||||||
|
// console.log("Sleeping for ~5secs so as to not be as noticeable to the cset-foretell servers")
|
||||||
|
await sleep(5000 + Math.random()*1000) // don't be as noticeable
|
||||||
|
|
||||||
|
try{
|
||||||
|
response = await fetchPage(i, cookie)
|
||||||
|
}catch(error){
|
||||||
|
console.log(error)
|
||||||
|
console.log(`The program encountered some error when fetching page #${i}, so it won't appear on the final json. It is possible that this page wasn't actually a prediction question pages`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// let string = JSON.stringify(results,null, 2)
|
||||||
|
// fs.writeFileSync('./data/csetforetell-questions.json', string);
|
||||||
|
// console.log(results)
|
||||||
|
await upsert(results, "csetforetell-questions")
|
||||||
|
|
||||||
|
|
||||||
|
let end = Date.now()
|
||||||
|
let difference = end-init
|
||||||
|
console.log(`Took ${difference/1000} seconds, or ${difference/(1000*60)} minutes.`)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export async function csetforetell(){
|
||||||
|
let cookie = process.env.CSETFORETELL_COOKIE || getCookie("csetforetell")
|
||||||
|
await applyIfCookieExists(cookie, csetforetell_inner)
|
||||||
|
}
|
|
@ -63,7 +63,8 @@ async function fetchStats(questionUrl, cookie){
|
||||||
type: "PROBABILITY"
|
type: "PROBABILITY"
|
||||||
}))
|
}))
|
||||||
}else{
|
}else{
|
||||||
let optionsBody = response.split("tbody")[3] // Previously [1], but they added a new table.
|
try{
|
||||||
|
let optionsBody = response.split("tbody")[1] // Previously [1], then previously [3] but they added a new table.
|
||||||
// console.log(optionsBody)
|
// console.log(optionsBody)
|
||||||
let optionsHtmlElement = "<table" + optionsBody + "table>"
|
let optionsHtmlElement = "<table" + optionsBody + "table>"
|
||||||
let tablesAsJson = Tabletojson.convert(optionsHtmlElement)
|
let tablesAsJson = Tabletojson.convert(optionsHtmlElement)
|
||||||
|
@ -73,6 +74,18 @@ async function fetchStats(questionUrl, cookie){
|
||||||
probability: Number(element['1'].replace("%",""))/100,
|
probability: Number(element['1'].replace("%",""))/100,
|
||||||
type: "PROBABILITY"
|
type: "PROBABILITY"
|
||||||
}))
|
}))
|
||||||
|
}catch(error){
|
||||||
|
let optionsBody = response.split("tbody")[3] // Catch if the error is related to table position
|
||||||
|
let optionsHtmlElement = "<table" + optionsBody + "table>"
|
||||||
|
let tablesAsJson = Tabletojson.convert(optionsHtmlElement)
|
||||||
|
let firstTable = tablesAsJson[0]
|
||||||
|
options = firstTable.map(element => ({
|
||||||
|
name: element['0'],
|
||||||
|
probability: Number(element['1'].replace("%",""))/100,
|
||||||
|
type: "PROBABILITY"
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
// Description
|
// Description
|
||||||
let descriptionraw = response.split(`<meta name="description" content="`)[1]
|
let descriptionraw = response.split(`<meta name="description" content="`)[1]
|
||||||
|
@ -139,18 +152,14 @@ async function csetforetell_inner(cookie){
|
||||||
//console.log(h4elements)
|
//console.log(h4elements)
|
||||||
|
|
||||||
for(let h4element of h4elements){
|
for(let h4element of h4elements){
|
||||||
|
|
||||||
let h4elementSplit = h4element.split('"><span>')
|
let h4elementSplit = h4element.split('"><span>')
|
||||||
let url = h4elementSplit[0].split('<a href="')[1]
|
let url = h4elementSplit[0].split('<a href="')[1]
|
||||||
//console.log(url)
|
|
||||||
let title = h4elementSplit[1].replace('</span></a></h4>', "").replace('</span></a></h5>', "")
|
let title = h4elementSplit[1].replace('</span></a></h4>', "").replace('</span></a></h5>', "")
|
||||||
await sleep(1000 + Math.random()*1000) // don't be as noticeable
|
await sleep(1000 + Math.random()*1000) // don't be as noticeable
|
||||||
|
|
||||||
try{
|
try{
|
||||||
let moreinfo = await fetchStats(url, cookie)
|
let moreinfo = await fetchStats(url, cookie)
|
||||||
if(moreinfo.isbinary){
|
|
||||||
if(!moreinfo.crowdpercentage){ // then request again.
|
|
||||||
moreinfo = await fetchStats(url, cookie)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let question = ({
|
let question = ({
|
||||||
"title": title,
|
"title": title,
|
||||||
"url": url,
|
"url": url,
|
||||||
|
@ -170,6 +179,7 @@ async function csetforetell_inner(cookie){
|
||||||
console.log(error)
|
console.log(error)
|
||||||
console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`)
|
console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`)
|
||||||
}
|
}
|
||||||
|
|
||||||
i=i+1
|
i=i+1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user