From 4d736f711d6c309f1e783fa1889acf9456308146 Mon Sep 17 00:00:00 2001 From: Vyacheslav Matyukhin Date: Thu, 19 May 2022 13:39:53 +0400 Subject: [PATCH 1/3] feat: metaculus validates api, supports --id cli arg --- package-lock.json | 76 ++++++ package.json | 1 + src/backend/platforms/metaculus.ts | 381 +++++++++++++++++------------ 3 files changed, 306 insertions(+), 152 deletions(-) diff --git a/package-lock.json b/package-lock.json index c447dc6..bd2247f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -28,6 +28,7 @@ "@types/textversionjs": "^1.1.1", "@types/tunnel": "^0.0.3", "airtable": "^0.11.1", + "ajv": "^8.11.0", "algoliasearch": "^4.10.3", "autoprefixer": "^10.1.0", "axios": "^0.25.0", @@ -3498,6 +3499,21 @@ "node": ">=8.0.0" } }, + "node_modules/ajv": { + "version": "8.11.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.11.0.tgz", + "integrity": "sha512-wGgprdCvMalC0BztXvitD2hC04YffAvtsUn93JbGXYLAtCUO4xd17mCCZQxUOItiBwZvJScWo8NIvQMQ71rdpg==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, "node_modules/algoliasearch": { "version": "4.10.3", "resolved": "https://registry.npmjs.org/algoliasearch/-/algoliasearch-4.10.3.tgz", @@ -5771,6 +5787,11 @@ "url": "https://github.com/sponsors/jaydenseric" } }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" + }, "node_modules/fast-equals": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/fast-equals/-/fast-equals-2.0.4.tgz", @@ -7454,6 +7475,11 @@ "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==", "license": "MIT" }, + "node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" + }, "node_modules/json-stable-stringify": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/json-stable-stringify/-/json-stable-stringify-1.0.1.tgz", @@ -37587,6 +37613,14 @@ "node": ">=0.10.0" } }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/require-main-filename": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-2.0.0.tgz", @@ -39027,6 +39061,14 @@ "tslib": "^2.0.3" } }, + "node_modules/uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dependencies": { + "punycode": "^2.1.0" + } + }, "node_modules/url-parse-lax": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-3.0.0.tgz", @@ -42453,6 +42495,17 @@ "node-fetch": "^2.6.7" } }, + "ajv": { + "version": "8.11.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.11.0.tgz", + "integrity": "sha512-wGgprdCvMalC0BztXvitD2hC04YffAvtsUn93JbGXYLAtCUO4xd17mCCZQxUOItiBwZvJScWo8NIvQMQ71rdpg==", + "requires": { + "fast-deep-equal": "^3.1.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2", + "uri-js": "^4.2.2" + } + }, "algoliasearch": { "version": "4.10.3", "resolved": "https://registry.npmjs.org/algoliasearch/-/algoliasearch-4.10.3.tgz", @@ -44096,6 +44149,11 @@ "resolved": "https://registry.npmjs.org/extract-files/-/extract-files-9.0.0.tgz", "integrity": "sha512-CvdFfHkC95B4bBBk36hcEmvdR2awOdhhVUYH6S/zrVj3477zven/fJMYg7121h4T1xHZC+tetUpubpAhxwI7hQ==" }, + "fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" + }, "fast-equals": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/fast-equals/-/fast-equals-2.0.4.tgz", @@ -45304,6 +45362,11 @@ "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", "integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==" }, + "json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" + }, "json-stable-stringify": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/json-stable-stringify/-/json-stable-stringify-1.0.1.tgz", @@ -67926,6 +67989,11 @@ "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", "dev": true }, + "require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==" + }, "require-main-filename": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-2.0.0.tgz", @@ -68920,6 +68988,14 @@ "tslib": "^2.0.3" } }, + "uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "requires": { + "punycode": "^2.1.0" + } + }, "url-parse-lax": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-3.0.0.tgz", diff --git a/package.json b/package.json index f141b9e..80e9ced 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "@types/textversionjs": "^1.1.1", "@types/tunnel": "^0.0.3", "airtable": "^0.11.1", + "ajv": "^8.11.0", "algoliasearch": "^4.10.3", "autoprefixer": "^10.1.0", "axios": "^0.25.0", diff --git a/src/backend/platforms/metaculus.ts b/src/backend/platforms/metaculus.ts index 710f3d3..f1fef7a 100644 --- a/src/backend/platforms/metaculus.ts +++ b/src/backend/platforms/metaculus.ts @@ -1,4 +1,5 @@ /* Imports */ +import Ajv, { JTDDataType } from "ajv/dist/jtd"; import axios from "axios"; import { average } from "../../utils"; @@ -8,24 +9,87 @@ import { FetchedQuestion, Platform } from "./"; /* Definitions */ const platformName = "metaculus"; -let now = new Date().toISOString(); -let DEBUG_MODE = "off"; -let SLEEP_TIME = 5000; +const now = new Date().toISOString(); +const SLEEP_TIME = 5000; -/* Support functions */ -async function fetchMetaculusQuestions(next: string) { - // Numbers about a given address: how many, how much, at what price, etc. - let response; - let data; +const apiQuestionSchema = { + properties: { + page_url: { + type: "string", + }, + title: { + type: "string", + }, + publish_time: { + type: "string", + }, + close_time: { + type: "string", + }, + resolve_time: { + type: "string", + }, + number_of_predictions: { + type: "uint32", + }, + possibilities: { + properties: { + type: { + type: "string", // TODO - enum? + }, + }, + additionalProperties: true, + }, + community_prediction: { + properties: { + full: { + properties: { + q1: { + type: "float64", + }, + q2: { + type: "float64", + }, + q3: { + type: "float64", + }, + }, + additionalProperties: true, + }, + }, + additionalProperties: true, + }, + }, + additionalProperties: true, +} as const; + +const apiMultipleQuestionsSchema = { + properties: { + results: { + elements: apiQuestionSchema, + }, + next: { + type: "string", + nullable: true, + }, + }, + additionalProperties: true, +} as const; + +type ApiQuestion = JTDDataType; +type ApiMultipleQuestions = JTDDataType; + +const validateApiQuestion = new Ajv().compile(apiQuestionSchema); +const validateApiMultipleQuestions = new Ajv().compile< + JTDDataType +>(apiMultipleQuestionsSchema); + +async function fetchWithRetries(url: string): Promise { try { - response = await axios({ - url: next, - method: "GET", - headers: { "Content-Type": "application/json" }, - }); - data = response.data; + const response = await axios.get(url); + return response.data; } catch (error) { - console.log(`Error in async function fetchMetaculusQuestions(next)`); + console.log(`Error while fetching ${url}`); console.log(error); if (axios.isAxiosError(error)) { if (error.response?.headers["retry-after"]) { @@ -36,170 +100,183 @@ async function fetchMetaculusQuestions(next: string) { await sleep(SLEEP_TIME); } } - } finally { - try { - response = await axios({ - url: next, - method: "GET", - headers: { "Content-Type": "application/json" }, - }); - data = response.data; - } catch (error) { - console.log(error); - return { results: [] }; - } } - // console.log(response) - return data; + const response = await axios.get(url); + return response.data; } -async function fetchMetaculusQuestionDescription(slug: string) { - try { - let response = await axios({ - method: "get", - url: "https://www.metaculus.com" + slug, - }).then((response) => response.data); - return response; - } catch (error) { - console.log(`Error in: fetchMetaculusQuestionDescription`); - console.log( - `We encountered some error when attempting to fetch a metaculus page. Trying again` - ); - if ( - axios.isAxiosError(error) && - typeof error.response != "undefined" && - typeof error.response.headers != "undefined" && - typeof error.response.headers["retry-after"] != "undefined" - ) { - const timeout = error.response.headers["retry-after"]; - console.log(`Timeout: ${timeout}`); - await sleep(Number(timeout) * 1000 + SLEEP_TIME); - } else { - await sleep(SLEEP_TIME); - } - try { - let response = await axios({ - method: "get", - url: "https://www.metaculus.com" + slug, - }).then((response) => response.data); - // console.log(response) - return response; - } catch (error) { - console.log( - `We encountered some error when attempting to fetch a metaculus page.` - ); - console.log("Error", error); - throw "Giving up"; - } +/* Support functions */ +async function fetchApiQuestions(next: string): Promise { + const data = await fetchWithRetries(next); + if (validateApiMultipleQuestions(data)) { + return data; } + throw new Error("Response validation failed"); } -export const metaculus: Platform = { +async function fetchSingleApiQuestion(url: string): Promise { + const data = await fetchWithRetries(url); + if (validateApiQuestion(data)) { + return data; + } + throw new Error("Response validation failed"); +} + +async function fetchQuestionHtml(slug: string) { + return await fetchWithRetries("https://www.metaculus.com" + slug); +} + +async function fetchQuestionPage(slug: string) { + const questionPage = await fetchQuestionHtml(slug); + const isPublicFigurePrediction = questionPage.includes( + "A public prediction by" + ); + + let description: string = ""; + if (!isPublicFigurePrediction) { + const descriptionraw = questionPage.split( + `
` + )[1]; + const descriptionprocessed1 = descriptionraw.split("
")[0]; + description = toMarkdown(descriptionprocessed1); + } + + return { + isPublicFigurePrediction, + description, + }; +} + +async function apiQuestionToFetchedQuestion( + apiQuestion: ApiQuestion +): Promise { + if (apiQuestion.publish_time > now || now > apiQuestion.resolve_time) { + return null; + } + await sleep(SLEEP_TIME / 2); + + const questionPage = await fetchQuestionPage(apiQuestion.page_url); + + if (questionPage.isPublicFigurePrediction) { + console.log("- [Skipping public prediction]"); + return null; + } + + const isBinary = apiQuestion.possibilities.type === "binary"; + let options: FetchedQuestion["options"] = []; + if (isBinary) { + const probability = Number(apiQuestion.community_prediction.full.q2); + options = [ + { + name: "Yes", + probability: probability, + type: "PROBABILITY", + }, + { + name: "No", + probability: 1 - probability, + type: "PROBABILITY", + }, + ]; + } + const question: FetchedQuestion = { + id: `${platformName}-${apiQuestion.id}`, + title: apiQuestion.title, + url: "https://www.metaculus.com" + apiQuestion.page_url, + description: questionPage.description, + options, + qualityindicators: { + numforecasts: apiQuestion.number_of_predictions, + }, + extra: { + resolution_data: { + publish_time: apiQuestion.publish_time, + resolution: apiQuestion.resolution, + close_time: apiQuestion.close_time, + resolve_time: apiQuestion.resolve_time, + }, + }, + //"status": result.status, + //"publish_time": result.publish_time, + //"close_time": result.close_time, + //"type": result.possibilities.type, // We want binary ones here. + //"last_activity_time": result.last_activity_time, + }; + if (apiQuestion.number_of_predictions < 10) { + return null; + } + + return question; +} + +export const metaculus: Platform<"id" | "debug"> = { name: platformName, label: "Metaculus", color: "#006669", - version: "v1", - async fetcher() { - // let metaculusQuestionsInit = await fetchMetaculusQuestions(1) - // let numQueries = Math.round(Number(metaculusQuestionsInit.count) / 20) - // console.log(`Downloading... This might take a while. Total number of queries: ${numQueries}`) - // for (let i = 4; i <= numQueries; i++) { // change numQueries to 10 if one want to just test } - let all_questions = []; - let next = "https://www.metaculus.com/api2/questions/"; + version: "v2", + fetcherArgs: ["id", "debug"], + async fetcher(opts) { + let allQuestions: FetchedQuestion[] = []; + + if (opts.args?.id) { + const apiQuestion = await fetchSingleApiQuestion( + `https://www.metaculus.com/api2/questions/${opts.args?.id}` + ); + const question = await apiQuestionToFetchedQuestion(apiQuestion); + console.log(question); + return { + questions: question ? [question] : [], + partial: true, + }; + } + + let next: string | null = "https://www.metaculus.com/api2/questions/"; let i = 1; while (next) { - if (i % 20 == 0) { + if (i % 20 === 0) { console.log("Sleeping for 500ms"); await sleep(SLEEP_TIME); } console.log(`\nQuery #${i}`); - let metaculusQuestions = await fetchMetaculusQuestions(next); - let results = metaculusQuestions.results; - let j = false; - for (let result of results) { - if (result.publish_time < now && now < result.resolve_time) { - await sleep(SLEEP_TIME / 2); - let questionPage = await fetchMetaculusQuestionDescription( - result.page_url - ); - if (!questionPage.includes("A public prediction by")) { - // console.log(questionPage) - let descriptionraw = questionPage.split( - `
` - )[1]; //.split(`
`)[1] - let descriptionprocessed1 = descriptionraw.split("
")[0]; - let descriptionprocessed2 = toMarkdown(descriptionprocessed1); - let description = descriptionprocessed2; - let isbinary = result.possibilities.type == "binary"; - let options: FetchedQuestion["options"] = []; - if (isbinary) { - let probability = Number(result.community_prediction.full.q2); - options = [ - { - name: "Yes", - probability: probability, - type: "PROBABILITY", - }, - { - name: "No", - probability: 1 - probability, - type: "PROBABILITY", - }, - ]; - } - let id = `${platformName}-${result.id}`; - let interestingInfo: FetchedQuestion = { - id, - title: result.title, - url: "https://www.metaculus.com" + result.page_url, - description, - options, - qualityindicators: { - numforecasts: Number(result.number_of_predictions), - }, - extra: { - resolution_data: { - publish_time: result.publish_time, - resolution: result.resolution, - close_time: result.close_time, - resolve_time: result.resolve_time, - }, - }, - //"status": result.status, - //"publish_time": result.publish_time, - //"close_time": result.close_time, - //"type": result.possibilities.type, // We want binary ones here. - //"last_activity_time": result.last_activity_time, - }; - if (Number(result.number_of_predictions) >= 10) { - console.log(`- ${interestingInfo.title}`); - all_questions.push(interestingInfo); - if ((!j && i % 20 == 0) || DEBUG_MODE == "on") { - console.log(interestingInfo); - j = true; - } - } - } else { - console.log("- [Skipping public prediction]"); - } + const metaculusQuestions: ApiMultipleQuestions = await fetchApiQuestions( + next + ); + const results = metaculusQuestions.results; + + let j = false; + + for (const result of results) { + const question = await apiQuestionToFetchedQuestion(result); + if (!question) { + continue; } + console.log(`- ${question.title}`); + if ((!j && i % 20 === 0) || opts.args?.debug) { + console.log(question); + j = true; + } + allQuestions.push(question); } + next = metaculusQuestions.next; i = i + 1; } - return all_questions; + return { + questions: allQuestions, + partial: false, + }; }, calculateStars(data) { const { numforecasts } = data.qualityindicators; - let nuno = () => + const nuno = () => (numforecasts || 0) > 300 ? 4 : (numforecasts || 0) > 100 ? 3 : 2; - let eli = () => 3; - let misha = () => 3; - let starsDecimal = average([nuno(), eli(), misha()]); - let starsInteger = Math.round(starsDecimal); + const eli = () => 3; + const misha = () => 3; + const starsDecimal = average([nuno(), eli(), misha()]); + const starsInteger = Math.round(starsDecimal); return starsInteger; }, }; From d684d074f53bc66c26d41ede237f0d92473b5650 Mon Sep 17 00:00:00 2001 From: Vyacheslav Matyukhin Date: Thu, 19 May 2022 13:55:56 +0400 Subject: [PATCH 2/3] feat: metaculus fetcher takes markdown description from js vars --- src/backend/platforms/metaculus.ts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/backend/platforms/metaculus.ts b/src/backend/platforms/metaculus.ts index f1fef7a..d15d4e7 100644 --- a/src/backend/platforms/metaculus.ts +++ b/src/backend/platforms/metaculus.ts @@ -4,7 +4,6 @@ import axios from "axios"; import { average } from "../../utils"; import { sleep } from "../utils/sleep"; -import toMarkdown from "../utils/toMarkdown"; import { FetchedQuestion, Platform } from "./"; /* Definitions */ @@ -134,11 +133,13 @@ async function fetchQuestionPage(slug: string) { let description: string = ""; if (!isPublicFigurePrediction) { - const descriptionraw = questionPage.split( - `
` - )[1]; - const descriptionprocessed1 = descriptionraw.split("
")[0]; - description = toMarkdown(descriptionprocessed1); + const match = questionPage.match( + /\s*window\.metacData\.question = (.+);\s*/ + ); + if (!match) { + throw new Error("metacData not found"); + } + description = JSON.parse(match[1]).description; } return { From f5a3c16322afbe2fdfe5c0b637b1dc7e6f069436 Mon Sep 17 00:00:00 2001 From: Vyacheslav Matyukhin Date: Mon, 30 May 2022 23:39:13 +0300 Subject: [PATCH 3/3] feat: metaculus group questions, description from api --- src/backend/platforms/metaculus.ts | 283 ----------------------- src/backend/platforms/metaculus/api.ts | 213 +++++++++++++++++ src/backend/platforms/metaculus/index.ts | 184 +++++++++++++++ 3 files changed, 397 insertions(+), 283 deletions(-) delete mode 100644 src/backend/platforms/metaculus.ts create mode 100644 src/backend/platforms/metaculus/api.ts create mode 100644 src/backend/platforms/metaculus/index.ts diff --git a/src/backend/platforms/metaculus.ts b/src/backend/platforms/metaculus.ts deleted file mode 100644 index d15d4e7..0000000 --- a/src/backend/platforms/metaculus.ts +++ /dev/null @@ -1,283 +0,0 @@ -/* Imports */ -import Ajv, { JTDDataType } from "ajv/dist/jtd"; -import axios from "axios"; - -import { average } from "../../utils"; -import { sleep } from "../utils/sleep"; -import { FetchedQuestion, Platform } from "./"; - -/* Definitions */ -const platformName = "metaculus"; -const now = new Date().toISOString(); -const SLEEP_TIME = 5000; - -const apiQuestionSchema = { - properties: { - page_url: { - type: "string", - }, - title: { - type: "string", - }, - publish_time: { - type: "string", - }, - close_time: { - type: "string", - }, - resolve_time: { - type: "string", - }, - number_of_predictions: { - type: "uint32", - }, - possibilities: { - properties: { - type: { - type: "string", // TODO - enum? - }, - }, - additionalProperties: true, - }, - community_prediction: { - properties: { - full: { - properties: { - q1: { - type: "float64", - }, - q2: { - type: "float64", - }, - q3: { - type: "float64", - }, - }, - additionalProperties: true, - }, - }, - additionalProperties: true, - }, - }, - additionalProperties: true, -} as const; - -const apiMultipleQuestionsSchema = { - properties: { - results: { - elements: apiQuestionSchema, - }, - next: { - type: "string", - nullable: true, - }, - }, - additionalProperties: true, -} as const; - -type ApiQuestion = JTDDataType; -type ApiMultipleQuestions = JTDDataType; - -const validateApiQuestion = new Ajv().compile(apiQuestionSchema); -const validateApiMultipleQuestions = new Ajv().compile< - JTDDataType ->(apiMultipleQuestionsSchema); - -async function fetchWithRetries(url: string): Promise { - try { - const response = await axios.get(url); - return response.data; - } catch (error) { - console.log(`Error while fetching ${url}`); - console.log(error); - if (axios.isAxiosError(error)) { - if (error.response?.headers["retry-after"]) { - const timeout = error.response.headers["retry-after"]; - console.log(`Timeout: ${timeout}`); - await sleep(Number(timeout) * 1000 + SLEEP_TIME); - } else { - await sleep(SLEEP_TIME); - } - } - } - const response = await axios.get(url); - return response.data; -} - -/* Support functions */ -async function fetchApiQuestions(next: string): Promise { - const data = await fetchWithRetries(next); - if (validateApiMultipleQuestions(data)) { - return data; - } - throw new Error("Response validation failed"); -} - -async function fetchSingleApiQuestion(url: string): Promise { - const data = await fetchWithRetries(url); - if (validateApiQuestion(data)) { - return data; - } - throw new Error("Response validation failed"); -} - -async function fetchQuestionHtml(slug: string) { - return await fetchWithRetries("https://www.metaculus.com" + slug); -} - -async function fetchQuestionPage(slug: string) { - const questionPage = await fetchQuestionHtml(slug); - const isPublicFigurePrediction = questionPage.includes( - "A public prediction by" - ); - - let description: string = ""; - if (!isPublicFigurePrediction) { - const match = questionPage.match( - /\s*window\.metacData\.question = (.+);\s*/ - ); - if (!match) { - throw new Error("metacData not found"); - } - description = JSON.parse(match[1]).description; - } - - return { - isPublicFigurePrediction, - description, - }; -} - -async function apiQuestionToFetchedQuestion( - apiQuestion: ApiQuestion -): Promise { - if (apiQuestion.publish_time > now || now > apiQuestion.resolve_time) { - return null; - } - await sleep(SLEEP_TIME / 2); - - const questionPage = await fetchQuestionPage(apiQuestion.page_url); - - if (questionPage.isPublicFigurePrediction) { - console.log("- [Skipping public prediction]"); - return null; - } - - const isBinary = apiQuestion.possibilities.type === "binary"; - let options: FetchedQuestion["options"] = []; - if (isBinary) { - const probability = Number(apiQuestion.community_prediction.full.q2); - options = [ - { - name: "Yes", - probability: probability, - type: "PROBABILITY", - }, - { - name: "No", - probability: 1 - probability, - type: "PROBABILITY", - }, - ]; - } - const question: FetchedQuestion = { - id: `${platformName}-${apiQuestion.id}`, - title: apiQuestion.title, - url: "https://www.metaculus.com" + apiQuestion.page_url, - description: questionPage.description, - options, - qualityindicators: { - numforecasts: apiQuestion.number_of_predictions, - }, - extra: { - resolution_data: { - publish_time: apiQuestion.publish_time, - resolution: apiQuestion.resolution, - close_time: apiQuestion.close_time, - resolve_time: apiQuestion.resolve_time, - }, - }, - //"status": result.status, - //"publish_time": result.publish_time, - //"close_time": result.close_time, - //"type": result.possibilities.type, // We want binary ones here. - //"last_activity_time": result.last_activity_time, - }; - if (apiQuestion.number_of_predictions < 10) { - return null; - } - - return question; -} - -export const metaculus: Platform<"id" | "debug"> = { - name: platformName, - label: "Metaculus", - color: "#006669", - version: "v2", - fetcherArgs: ["id", "debug"], - async fetcher(opts) { - let allQuestions: FetchedQuestion[] = []; - - if (opts.args?.id) { - const apiQuestion = await fetchSingleApiQuestion( - `https://www.metaculus.com/api2/questions/${opts.args?.id}` - ); - const question = await apiQuestionToFetchedQuestion(apiQuestion); - console.log(question); - return { - questions: question ? [question] : [], - partial: true, - }; - } - - let next: string | null = "https://www.metaculus.com/api2/questions/"; - let i = 1; - while (next) { - if (i % 20 === 0) { - console.log("Sleeping for 500ms"); - await sleep(SLEEP_TIME); - } - console.log(`\nQuery #${i}`); - - const metaculusQuestions: ApiMultipleQuestions = await fetchApiQuestions( - next - ); - const results = metaculusQuestions.results; - - let j = false; - - for (const result of results) { - const question = await apiQuestionToFetchedQuestion(result); - if (!question) { - continue; - } - console.log(`- ${question.title}`); - if ((!j && i % 20 === 0) || opts.args?.debug) { - console.log(question); - j = true; - } - allQuestions.push(question); - } - - next = metaculusQuestions.next; - i = i + 1; - } - - return { - questions: allQuestions, - partial: false, - }; - }, - - calculateStars(data) { - const { numforecasts } = data.qualityindicators; - const nuno = () => - (numforecasts || 0) > 300 ? 4 : (numforecasts || 0) > 100 ? 3 : 2; - const eli = () => 3; - const misha = () => 3; - const starsDecimal = average([nuno(), eli(), misha()]); - const starsInteger = Math.round(starsDecimal); - return starsInteger; - }, -}; diff --git a/src/backend/platforms/metaculus/api.ts b/src/backend/platforms/metaculus/api.ts new file mode 100644 index 0000000..1d15a83 --- /dev/null +++ b/src/backend/platforms/metaculus/api.ts @@ -0,0 +1,213 @@ +import Ajv, { JTDDataType, ValidateFunction } from "ajv/dist/jtd"; +import axios from "axios"; +import { sleep } from "../../utils/sleep"; + +// Type examples: +// - group: https://www.metaculus.com/api2/questions/9866/ +// - claim: https://www.metaculus.com/api2/questions/9668/ +// - subquestion forecast: https://www.metaculus.com/api2/questions/10069/ +// - basic forecast: https://www.metaculus.com/api2/questions/11005/ + +const RETRY_SLEEP_TIME = 5000; + +const commonProps = { + id: { + type: "uint32", + }, + title: { + type: "string", + }, +} as const; + +const predictableProps = { + publish_time: { + type: "string", + }, + close_time: { + type: "string", + }, + resolve_time: { + type: "string", + }, + resolution: { + type: "float64", + nullable: true, + }, + possibilities: { + properties: { + type: { + // Enum["binary", "continuous"], via https://github.com/quantified-uncertainty/metaforecast/pull/84#discussion_r878240875 + // but metaculus might add new values in the future and we don't want the fetcher to break + type: "string", + }, + }, + additionalProperties: true, + }, + number_of_predictions: { + type: "uint32", + }, + community_prediction: { + properties: { + full: { + properties: { + q1: { + type: "float64", + }, + q2: { + type: "float64", + }, + q3: { + type: "float64", + }, + }, + additionalProperties: true, + }, + }, + additionalProperties: true, + }, +} as const; + +const pageProps = { + page_url: { + type: "string", + }, + group: { + type: "uint32", + nullable: true, + }, +} as const; + +// these are missing in /api2/questions/ requests, and building two schemas is too much pain +const optionalPageProps = { + description: { + type: "string", + }, + description_html: { + type: "string", + }, +} as const; + +const apiQuestionSchema = { + discriminator: "type", + mapping: { + forecast: { + properties: { + ...commonProps, + ...pageProps, + ...predictableProps, + }, + optionalProperties: { + ...optionalPageProps, + }, + additionalProperties: true, + }, + group: { + properties: { + ...commonProps, + ...pageProps, + sub_questions: { + elements: { + properties: { + ...commonProps, + ...predictableProps, + }, + additionalProperties: true, + }, + }, + }, + optionalProperties: { + ...optionalPageProps, + }, + additionalProperties: true, + }, + // we're not interested in claims currently (but we should be?) + claim: { + properties: { + ...commonProps, + ...pageProps, + }, + optionalProperties: { + ...optionalPageProps, + }, + additionalProperties: true, + }, + }, +} as const; + +const apiMultipleQuestionsSchema = { + properties: { + results: { + elements: apiQuestionSchema, + }, + next: { + type: "string", + nullable: true, + }, + }, + additionalProperties: true, +} as const; + +export type ApiCommon = JTDDataType<{ + properties: typeof commonProps; +}>; +export type ApiPredictable = JTDDataType<{ + properties: typeof predictableProps; +}>; +export type ApiQuestion = JTDDataType; +export type ApiMultipleQuestions = JTDDataType< + typeof apiMultipleQuestionsSchema +>; + +const validateApiQuestion = new Ajv().compile(apiQuestionSchema); +const validateApiMultipleQuestions = new Ajv().compile( + apiMultipleQuestionsSchema +); + +async function fetchWithRetries(url: string): Promise { + try { + const response = await axios.get(url); + return response.data; + } catch (error) { + console.log(`Error while fetching ${url}`); + console.log(error); + if (axios.isAxiosError(error)) { + if (error.response?.headers["retry-after"]) { + const timeout = error.response.headers["retry-after"]; + console.log(`Timeout: ${timeout}`); + await sleep(Number(timeout) * 1000 + 1000); + } else { + await sleep(RETRY_SLEEP_TIME); + } + } + } + const response = await axios.get(url); + return response.data; +} + +const fetchAndValidate = async ( + url: string, + validator: ValidateFunction +): Promise => { + console.log(url); + const data = await fetchWithRetries(url); + if (validator(data)) { + return data; + } + throw new Error( + `Response validation for url ${url} failed: ` + + JSON.stringify(validator.errors) + ); +}; + +export async function fetchApiQuestions( + next: string +): Promise { + return await fetchAndValidate(next, validateApiMultipleQuestions); +} + +export async function fetchSingleApiQuestion(id: number): Promise { + return await fetchAndValidate( + `https://www.metaculus.com/api2/questions/${id}/`, + validateApiQuestion + ); +} diff --git a/src/backend/platforms/metaculus/index.ts b/src/backend/platforms/metaculus/index.ts new file mode 100644 index 0000000..3cefa2b --- /dev/null +++ b/src/backend/platforms/metaculus/index.ts @@ -0,0 +1,184 @@ +import { FetchedQuestion, Platform } from ".."; +import { average } from "../../../utils"; +import { sleep } from "../../utils/sleep"; +import { + ApiCommon, + ApiMultipleQuestions, + ApiPredictable, + ApiQuestion, + fetchApiQuestions, + fetchSingleApiQuestion, +} from "./api"; + +const platformName = "metaculus"; +const now = new Date().toISOString(); +const SLEEP_TIME = 2500; + +async function apiQuestionToFetchedQuestions( + apiQuestion: ApiQuestion +): Promise { + // one item can expand: + // - to 0 questions if we don't want it; + // - to 1 question if it's a simple forecast + // - to multiple questions if it's a group (see https://github.com/quantified-uncertainty/metaforecast/pull/84 for details) + + await sleep(SLEEP_TIME); + + const skip = (q: ApiPredictable): boolean => { + if (q.publish_time > now || now > q.resolve_time) { + return true; + } + if (q.number_of_predictions < 10) { + return true; + } + return false; + }; + + const buildFetchedQuestion = ( + q: ApiPredictable & ApiCommon + ): Omit => { + const isBinary = q.possibilities.type === "binary"; + let options: FetchedQuestion["options"] = []; + if (isBinary) { + const probability = Number(q.community_prediction.full.q2); + options = [ + { + name: "Yes", + probability: probability, + type: "PROBABILITY", + }, + { + name: "No", + probability: 1 - probability, + type: "PROBABILITY", + }, + ]; + } + return { + id: `${platformName}-${q.id}`, + options, + qualityindicators: { + numforecasts: q.number_of_predictions, + }, + extra: { + resolution_data: { + publish_time: apiQuestion.publish_time, + resolution: apiQuestion.resolution, + close_time: apiQuestion.close_time, + resolve_time: apiQuestion.resolve_time, + }, + }, + }; + }; + + if (apiQuestion.type === "group") { + const apiQuestionDetails = await fetchSingleApiQuestion(apiQuestion.id); + return apiQuestion.sub_questions + .filter((q) => !skip(q)) + .map((sq) => { + const tmp = buildFetchedQuestion(sq); + return { + ...tmp, + title: `${apiQuestion.title} (${sq.title})`, + description: apiQuestionDetails.description || "", + url: `https://www.metaculus.com${apiQuestion.page_url}?sub-question=${sq.id}`, + }; + }); + } else if (apiQuestion.type === "forecast") { + const apiQuestionDetails = await fetchSingleApiQuestion(apiQuestion.id); + if (apiQuestion.group) { + return []; // sub-question, should be handled on the group level + } + if (skip(apiQuestion)) { + return []; + } + + const tmp = buildFetchedQuestion(apiQuestion); + return [ + { + ...tmp, + title: apiQuestion.title, + description: apiQuestionDetails.description || "", + url: "https://www.metaculus.com" + apiQuestion.page_url, + }, + ]; + } else { + if (apiQuestion.type !== "claim") { + // should never happen, since `discriminator` in JTD schema causes a strict runtime check + console.log( + `Unknown metaculus question type: ${ + (apiQuestion as any).type + }, skipping` + ); + } + return []; + } +} + +export const metaculus: Platform<"id" | "debug"> = { + name: platformName, + label: "Metaculus", + color: "#006669", + version: "v2", + fetcherArgs: ["id", "debug"], + async fetcher(opts) { + let allQuestions: FetchedQuestion[] = []; + + if (opts.args?.id) { + const id = Number(opts.args.id); + const apiQuestion = await fetchSingleApiQuestion(id); + const questions = await apiQuestionToFetchedQuestions(apiQuestion); + console.log(questions); + return { + questions, + partial: true, + }; + } + + let next: string | null = "https://www.metaculus.com/api2/questions/"; + let i = 1; + while (next) { + if (i % 20 === 0) { + console.log(`Sleeping for ${SLEEP_TIME}ms`); + await sleep(SLEEP_TIME); + } + console.log(`\nQuery #${i} - ${next}`); + + const apiQuestions: ApiMultipleQuestions = await fetchApiQuestions(next); + const results = apiQuestions.results; + + let j = false; + + for (const result of results) { + const questions = await apiQuestionToFetchedQuestions(result); + for (const question of questions) { + console.log(`- ${question.title}`); + if ((!j && i % 20 === 0) || opts.args?.debug) { + console.log(question); + j = true; + } + allQuestions.push(question); + } + } + + next = apiQuestions.next; + i += 1; + } + + return { + questions: allQuestions, + partial: false, + }; + }, + + calculateStars(data) { + const { numforecasts } = data.qualityindicators; + const nuno = () => + (numforecasts || 0) > 300 ? 4 : (numforecasts || 0) > 100 ? 3 : 2; + const eli = () => 3; + const misha = () => 3; + const starsDecimal = average([nuno(), eli(), misha()]); + const starsInteger = Math.round(starsDecimal); + return starsInteger; + }, +};