metaforecast/src/platforms/goodjudmentopen-fetch.js

212 lines
7.0 KiB
JavaScript
Raw Normal View History

2021-01-12 12:43:41 +00:00
/* Imports */
import fs from 'fs'
import axios from "axios"
import {getCookie, applyIfCookieExists} from "../utils/getCookies.js"
2021-03-02 13:29:27 +00:00
import { Tabletojson } from "tabletojson"
import { calculateStars } from "../utils/stars.js"
import toMarkdown from "../utils/toMarkdown.js"
import { upsert } from "../utils/mongo-wrapper.js"
2021-01-12 12:43:41 +00:00
/* Definitions */
let htmlEndPoint = 'https://www.gjopen.com/questions?page='
2021-04-08 20:51:02 +00:00
let annoyingPromptUrls = ["https://www.gjopen.com/questions/1933-what-forecasting-questions-should-we-ask-what-questions-would-you-like-to-forecast-on-gjopen", "https://www.gjopen.com/questions/1779-are-there-any-forecasting-tips-tricks-and-experiences-you-would-like-to-share-and-or-discuss-with-your-fellow-forecasters"]
2021-11-05 13:45:55 +00:00
const DEBUG_MODE = "off" // "on"
const id = x => x
2021-01-12 12:43:41 +00:00
/* Support functions */
2021-03-02 13:29:27 +00:00
async function fetchPage(page, cookie) {
let response = await axios({
url: htmlEndPoint + page,
2021-01-12 12:43:41 +00:00
method: 'GET',
2021-03-02 13:29:27 +00:00
headers: ({
'Content-Type': 'text/html',
'Cookie': cookie
2021-01-12 12:43:41 +00:00
}),
})
2021-03-02 13:29:27 +00:00
.then(res => res.data)
2021-01-12 12:43:41 +00:00
//console.log(response)
return response
}
2021-03-02 13:29:27 +00:00
async function fetchStats(questionUrl, cookie) {
let response = await axios({
url: questionUrl + "/stats",
2021-01-12 12:43:41 +00:00
method: 'GET',
2021-03-02 13:29:27 +00:00
headers: ({
'Content-Type': 'text/html',
'Cookie': cookie,
'Referer': questionUrl,
2021-01-12 12:43:41 +00:00
}),
})
2021-03-02 13:29:27 +00:00
.then(res => res.data)
2021-01-12 12:43:41 +00:00
//console.log(response)
2021-03-02 13:29:27 +00:00
2021-01-12 12:43:41 +00:00
// Is binary?
let isbinary = response.includes("binary?":true")
let options = []
2021-03-02 13:29:27 +00:00
if (isbinary) {
2021-01-12 12:43:41 +00:00
// Crowd percentage
let htmlElements = response.split("\n")
2021-02-16 14:18:23 +00:00
let h3Element = htmlElements.filter(str => str.includes("<h3>"))[0]
2021-03-16 16:55:45 +00:00
// console.log(h3Element)
2021-01-12 12:43:41 +00:00
let crowdpercentage = h3Element.split(">")[1].split("<")[0]
2021-03-02 13:29:27 +00:00
let probability = Number(crowdpercentage.replace("%", "")) / 100
options.push(({
name: "Yes",
probability: probability,
type: "PROBABILITY"
}), ({
name: "No",
2021-03-02 13:29:27 +00:00
probability: +(1 - probability).toFixed(2), // avoids floating point shenanigans
type: "PROBABILITY"
}))
2021-03-02 13:29:27 +00:00
} else {
let optionsHtmlElement = "<table" + response.split("tbody")[1] + "table>"
let tablesAsJson = Tabletojson.convert(optionsHtmlElement)
let firstTable = tablesAsJson[0]
options = firstTable.map(element => ({
name: element['0'],
2021-03-02 13:29:27 +00:00
probability: Number(element['1'].replace("%", "")) / 100,
type: "PROBABILITY"
}))
//console.log(optionsHtmlElement)
//console.log(options)
2021-01-12 12:43:41 +00:00
}
2021-02-03 17:35:38 +00:00
// Description
let descriptionraw = response.split(`<div id="question-background" class="collapse smb">`)[1]
let descriptionprocessed1 = descriptionraw.split(`</div>`)[0]
2021-03-02 13:29:27 +00:00
let descriptionprocessed2 = toMarkdown(descriptionprocessed1)
2021-02-03 17:35:38 +00:00
let descriptionprocessed3 = descriptionprocessed2.split("\n")
.filter(string => !string.includes("Confused? Check our"))
.join("\n")
let description = descriptionprocessed3
2021-03-02 13:29:27 +00:00
2021-01-12 12:43:41 +00:00
// Number of forecasts
let numforecasts = response.split("prediction_sets_count&quot;:")[1].split(",")[0]
//console.log(numforecasts)
2021-03-02 13:29:27 +00:00
2021-01-12 12:43:41 +00:00
// Number of predictors
let numforecasters = response.split("predictors_count&quot;:")[1].split(",")[0]
//console.log(numpredictors)
2021-03-02 13:29:27 +00:00
// Calculate the stars
let minProbability = Math.min(...options.map(option => option.probability))
let maxProbability = Math.max(...options.map(option => option.probability))
2021-04-07 20:29:21 +00:00
2021-01-12 12:43:41 +00:00
let result = {
2021-03-02 13:29:27 +00:00
"description": description,
"options": options,
2021-04-07 20:29:21 +00:00
"timestamp": new Date().toISOString(),
"qualityindicators": {
"numforecasts": Number(numforecasts),
"numforecasters": Number(numforecasters),
2021-04-07 20:29:21 +00:00
"stars": calculateStars("Good Judgment Open", ({ numforecasts, minProbability, maxProbability }))
}
2021-01-12 12:43:41 +00:00
}
return result
}
2021-08-08 17:42:21 +00:00
function isSignedIn(html){
2021-08-08 17:42:21 +00:00
let isSignedInBool = !( html.includes("You need to sign in or sign up before continuing") || html.includes("Sign up") )
// console.log(html)
if(!isSignedInBool){
console.log("Error: Not signed in.")
}
2021-08-08 17:42:21 +00:00
console.log(`is signed in? ${isSignedInBool}`)
return isSignedInBool
}
function isEnd(html){
let isEndBool = html.includes("No questions match your filter")
if(isEndBool){
//console.log(html)
}
console.log(`IsEnd? ${isEndBool}`)
return isEndBool
2021-01-12 12:43:41 +00:00
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/* Body */
async function goodjudgmentopen_inner(cookie) {
2021-03-02 13:29:27 +00:00
let i = 1
2021-01-12 12:43:41 +00:00
let response = await fetchPage(i, cookie)
2021-11-05 13:45:55 +00:00
let results = []
2021-01-12 12:43:41 +00:00
let init = Date.now()
2021-04-08 19:32:03 +00:00
// console.log("Downloading... This might take a couple of minutes. Results will be shown.")
2021-08-08 17:42:21 +00:00
while(!isEnd(response) && isSignedIn(response)){
2021-01-12 12:43:41 +00:00
let htmlLines = response.split("\n")
2021-11-05 13:45:55 +00:00
DEBUG_MODE == "on" ? htmlLines.forEach(line => console.log(line)) : id()
let h5elements = htmlLines.filter(str => str.includes("<h5> <a href="))
DEBUG_MODE == "on" ? console.log(h5elements) : id()
2021-04-08 20:51:02 +00:00
let j = 0
2021-03-02 13:29:27 +00:00
for (let h5element of h5elements) {
2021-01-12 12:43:41 +00:00
let h5elementSplit = h5element.split('"><span>')
let url = h5elementSplit[0].split('<a href="')[1]
2021-04-08 20:51:02 +00:00
if(!annoyingPromptUrls.includes(url)){
let title = h5elementSplit[1].replace('</span></a></h5>', "")
await sleep(1000 + Math.random() * 1000) // don't be as noticeable
try {
let moreinfo = await fetchStats(url, cookie)
if (moreinfo.isbinary) {
if (!moreinfo.crowdpercentage) { // then request again.
moreinfo = await fetchStats(url, cookie)
}
2021-01-12 12:43:41 +00:00
}
2021-04-08 20:51:02 +00:00
let question = ({
"title": title,
"url": url,
"platform": "Good Judgment Open",
...moreinfo
})
2021-11-05 13:45:55 +00:00
if(j % 30 == 0 || DEBUG_MODE == "on"){
2021-04-08 20:51:02 +00:00
console.log(`Page #${i}`)
console.log(question)
}
// console.log(question)
results.push(question)
} catch (error) {
console.log(error)
console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`)
2021-01-12 12:43:41 +00:00
}
}
2021-04-08 20:51:02 +00:00
j = j+1
2021-01-12 12:43:41 +00:00
}
2021-03-02 13:29:27 +00:00
i = i + 1
2021-04-08 19:32:03 +00:00
// console.log("Sleeping for 5secs so as to not be as noticeable to the gjopen servers")
2021-03-02 13:29:27 +00:00
await sleep(5000 + Math.random() * 1000) // don't be a dick to gjopen server
try {
response = await fetchPage(i, cookie)
} catch (error) {
console.log(error)
2021-01-12 12:43:41 +00:00
console.log(`We encountered some error when fetching page #${i}, so it won't appear on the final json`)
}
}
// let string = JSON.stringify(results, null, 2)
// fs.writeFileSync('./data/goodjudmentopen-questions.json', string);
2021-11-05 13:45:55 +00:00
console.log(results)
if(results.length > 0){
await upsert(results, "goodjudmentopen-questions")
}else{
console.log("Not updating results, as process was not signed in")
}
2021-03-02 13:29:27 +00:00
2021-01-12 12:43:41 +00:00
let end = Date.now()
2021-03-02 13:29:27 +00:00
let difference = end - init
console.log(`Took ${difference / 1000} seconds, or ${difference / (1000 * 60)} minutes.`)
2021-01-12 12:43:41 +00:00
}
2021-04-10 18:28:19 +00:00
export async function goodjudgmentopen(){
let cookie = process.env.GOODJUDGMENTOPENCOOKIE || getCookie("goodjudmentopen")
await applyIfCookieExists(cookie, goodjudgmentopen_inner)
}