2021-01-12 12:43:41 +00:00
/* Imports */
import axios from "axios"
2021-04-10 18:18:22 +00:00
import { getCookie , applyIfCookieExists } from "../utils/getCookies.js"
2021-02-18 16:12:55 +00:00
import { Tabletojson } from "tabletojson"
2021-04-08 16:42:48 +00:00
import toMarkdown from "../utils/toMarkdown.js"
import { calculateStars } from "../utils/stars.js"
import { upsert } from "../utils/mongo-wrapper.js"
2021-01-12 12:43:41 +00:00
/* Definitions */
let htmlEndPoint = 'https://www.cset-foretell.com/questions?page='
2021-04-08 17:32:44 +00:00
String . prototype . replaceAll = function replaceAll ( search , replace ) { return this . split ( search ) . join ( replace ) ; }
2021-01-12 12:43:41 +00:00
/* Support functions */
async function fetchPage ( page , cookie ) {
2021-04-25 17:17:34 +00:00
if ( page == 1 ) {
cookie = cookie . split ( ";" ) [ 0 ] // Interesting that it otherwise doesn't work :(
}
2021-01-12 12:43:41 +00:00
let response = await axios ( {
url : htmlEndPoint + page ,
method : 'GET' ,
headers : ( {
'Content-Type' : 'text/html' ,
'Cookie' : cookie
} ) ,
} )
. then ( res => res . data )
2021-04-08 19:32:03 +00:00
// console.log(response)
2021-01-12 12:43:41 +00:00
return response
}
async function fetchStats ( questionUrl , cookie ) {
let response = await axios ( {
url : questionUrl + "/stats" ,
method : 'GET' ,
headers : ( {
'Content-Type' : 'text/html' ,
'Cookie' : cookie ,
'Referer' : questionUrl ,
} ) ,
} )
. then ( res => res . data )
//console.log(response)
// Is binary?
let isbinary = response . includes ( "binary?":true" )
2021-02-18 16:12:55 +00:00
let options = [ ]
2021-01-12 12:43:41 +00:00
if ( isbinary ) {
// Crowd percentage
let htmlElements = response . split ( "\n" )
2021-04-25 17:17:34 +00:00
// console.log(htmlElements)
2021-02-16 14:18:23 +00:00
let h3Element = htmlElements . filter ( str => str . includes ( "<h3>" ) ) [ 0 ]
2021-01-12 12:43:41 +00:00
let crowdpercentage = h3Element . split ( ">" ) [ 1 ] . split ( "<" ) [ 0 ]
2021-02-18 16:12:55 +00:00
let probability = Number ( crowdpercentage . replace ( "%" , "" ) ) / 100
options . push ( ( {
name : "Yes" ,
probability : probability ,
type : "PROBABILITY"
} ) , ( {
name : "No" ,
probability : + ( 1 - probability ) . toFixed ( 2 ) , // avoids floating point shenanigans
type : "PROBABILITY"
} ) )
} else {
2021-04-25 17:17:34 +00:00
let optionsBody = response . split ( "tbody" ) [ 1 ]
// console.log(optionsBody)
let optionsHtmlElement = "<table" + optionsBody + "table>"
2021-02-18 16:12:55 +00:00
let tablesAsJson = Tabletojson . convert ( optionsHtmlElement )
let firstTable = tablesAsJson [ 0 ]
options = firstTable . map ( element => ( {
name : element [ '0' ] ,
probability : Number ( element [ '1' ] . replace ( "%" , "" ) ) / 100 ,
type : "PROBABILITY"
} ) )
2021-01-12 12:43:41 +00:00
}
2021-04-25 17:17:34 +00:00
// Description
let descriptionraw = response . split ( ` <meta name="description" content=" ` ) [ 1 ]
2021-02-03 17:35:38 +00:00
let descriptionprocessed1 = descriptionraw . split ( ` "> ` ) [ 0 ]
let descriptionprocessed2 = descriptionprocessed1 . replace ( ">" , "" )
let descriptionprocessed3 = descriptionprocessed2 . replace ( "To suggest a change or clarification to this question, please select Request Clarification from the green gear-shaped dropdown button to the right of the question." , ` ` )
2021-04-08 19:32:03 +00:00
// console.log(descriptionprocessed3)
2021-02-03 17:35:38 +00:00
let descriptionprocessed4 = descriptionprocessed3 . replaceAll ( "\r\n\r\n" , "\n" )
2021-04-25 17:17:34 +00:00
let descriptionprocessed5 = descriptionprocessed4 . replaceAll ( "\n\n" , "\n" )
2021-02-03 17:35:38 +00:00
let descriptionprocessed6 = descriptionprocessed5 . replaceAll ( """ , ` " ` )
let descriptionprocessed7 = descriptionprocessed6 . replaceAll ( "'" , "'" )
2021-02-16 14:18:23 +00:00
let descriptionprocessed8 = toMarkdown ( descriptionprocessed7 )
2021-02-03 17:35:38 +00:00
let description = descriptionprocessed8
2021-04-25 17:17:34 +00:00
2021-01-12 12:43:41 +00:00
// Number of forecasts
let numforecasts = response . split ( "prediction_sets_count":" ) [ 1 ] . split ( "," ) [ 0 ]
2021-04-08 19:32:03 +00:00
// console.log(numforecasts)
2021-04-25 17:17:34 +00:00
2021-01-12 12:43:41 +00:00
// Number of predictors
let numforecasters = response . split ( "predictors_count":" ) [ 1 ] . split ( "," ) [ 0 ]
2021-04-08 19:32:03 +00:00
// console.log(numpredictors)
2021-01-12 12:43:41 +00:00
let result = {
2021-02-18 16:12:55 +00:00
"description" : description ,
"options" : options ,
2021-04-07 20:29:21 +00:00
"timestamp" : new Date ( ) . toISOString ( ) ,
"qualityindicators" : {
"numforecasts" : numforecasts ,
"numforecasters" : numforecasters ,
"stars" : calculateStars ( "CSET-foretell" , { numforecasts } )
}
2021-01-12 12:43:41 +00:00
}
return result
}
function isEnd ( html ) {
return html . includes ( "No questions match your filter" )
}
function sleep ( ms ) {
return new Promise ( resolve => setTimeout ( resolve , ms ) ) ;
}
/* Body */
2021-04-10 18:18:22 +00:00
async function csetforetell _inner ( cookie ) {
2021-01-12 12:43:41 +00:00
let i = 1
let response = await fetchPage ( i , cookie )
2021-04-08 16:42:48 +00:00
let results = [ ]
2021-01-12 12:43:41 +00:00
let init = Date . now ( )
2021-04-08 19:32:03 +00:00
// console.log("Downloading... This might take a couple of minutes. Results will be shown.")
2021-01-12 12:43:41 +00:00
while ( ! isEnd ( response ) ) {
2021-04-25 17:17:34 +00:00
2021-01-12 12:43:41 +00:00
let htmlLines = response . split ( "\n" )
2021-04-25 17:17:34 +00:00
let h4elements = htmlLines . filter ( str => str . includes ( "<h5><a href=" ) || str . includes ( "<h4><a href=" ) )
if ( process . env . DEBUG _MODE == "on" ) {
console . log ( ` Page # ${ i } ` )
console . log ( response )
}
//console.log("")
//console.log("")
//console.log(h4elements)
2021-01-12 12:43:41 +00:00
for ( let h4element of h4elements ) {
let h4elementSplit = h4element . split ( '"><span>' )
let url = h4elementSplit [ 0 ] . split ( '<a href="' ) [ 1 ]
2021-04-25 17:17:34 +00:00
//console.log(url)
2021-01-12 12:43:41 +00:00
let title = h4elementSplit [ 1 ] . replace ( '</span></a></h4>' , "" )
await sleep ( 1000 + Math . random ( ) * 1000 ) // don't be as noticeable
try {
let moreinfo = await fetchStats ( url , cookie )
if ( moreinfo . isbinary ) {
if ( ! moreinfo . crowdpercentage ) { // then request again.
moreinfo = await fetchStats ( url , cookie )
}
}
let question = ( {
2021-02-19 18:57:44 +00:00
"title" : title ,
"url" : url ,
"platform" : "CSET-foretell" ,
2021-01-12 12:43:41 +00:00
... moreinfo
} )
2021-04-11 18:08:59 +00:00
if ( i % 30 == 0 ) {
2021-04-08 19:32:03 +00:00
console . log ( ` Page # ${ i } ` )
console . log ( question )
}
2021-04-08 16:42:48 +00:00
results . push ( question )
2021-04-25 17:17:34 +00:00
if ( process . env . DEBUG _MODE == "on" ) {
console . log ( question )
}
2021-01-12 12:43:41 +00:00
} catch ( error ) {
2021-04-08 17:19:56 +00:00
console . log ( error )
2021-01-12 12:43:41 +00:00
console . log ( ` We encountered some error when fetching the URL: ${ url } , so it won't appear on the final json ` )
}
}
i = i + 1
2021-04-08 19:32:03 +00:00
// console.log("Sleeping for ~5secs so as to not be as noticeable to the cset-foretell servers")
2021-01-12 12:43:41 +00:00
await sleep ( 5000 + Math . random ( ) * 1000 ) // don't be as noticeable
try {
response = await fetchPage ( i , cookie )
} catch ( error ) {
2021-04-08 19:32:03 +00:00
console . log ( error )
2021-01-12 12:43:41 +00:00
console . log ( ` The program encountered some error when fetching page # ${ i } , so it won't appear on the final json. It is possible that this page wasn't actually a prediction question pages ` )
}
}
2021-04-08 16:42:48 +00:00
// let string = JSON.stringify(results,null, 2)
// fs.writeFileSync('./data/csetforetell-questions.json', string);
2021-04-11 18:08:59 +00:00
// console.log(results)
2021-04-08 16:42:48 +00:00
await upsert ( results , "csetforetell-questions" )
2021-04-10 18:18:22 +00:00
2021-01-12 12:43:41 +00:00
let end = Date . now ( )
let difference = end - init
console . log ( ` Took ${ difference / 1000 } seconds, or ${ difference / ( 1000 * 60 ) } minutes. ` )
}
2021-04-10 18:18:22 +00:00
export async function csetforetell ( ) {
let cookie = process . env . CSETFORETELL _COOKIE || getCookie ( "csetforetell" )
2021-04-10 18:28:19 +00:00
await applyIfCookieExists ( cookie , csetforetell _inner )
2021-04-10 18:18:22 +00:00
}