diff --git a/src/backend/platforms/metaculus.ts b/src/backend/platforms/metaculus.ts deleted file mode 100644 index d15d4e7..0000000 --- a/src/backend/platforms/metaculus.ts +++ /dev/null @@ -1,283 +0,0 @@ -/* Imports */ -import Ajv, { JTDDataType } from "ajv/dist/jtd"; -import axios from "axios"; - -import { average } from "../../utils"; -import { sleep } from "../utils/sleep"; -import { FetchedQuestion, Platform } from "./"; - -/* Definitions */ -const platformName = "metaculus"; -const now = new Date().toISOString(); -const SLEEP_TIME = 5000; - -const apiQuestionSchema = { - properties: { - page_url: { - type: "string", - }, - title: { - type: "string", - }, - publish_time: { - type: "string", - }, - close_time: { - type: "string", - }, - resolve_time: { - type: "string", - }, - number_of_predictions: { - type: "uint32", - }, - possibilities: { - properties: { - type: { - type: "string", // TODO - enum? - }, - }, - additionalProperties: true, - }, - community_prediction: { - properties: { - full: { - properties: { - q1: { - type: "float64", - }, - q2: { - type: "float64", - }, - q3: { - type: "float64", - }, - }, - additionalProperties: true, - }, - }, - additionalProperties: true, - }, - }, - additionalProperties: true, -} as const; - -const apiMultipleQuestionsSchema = { - properties: { - results: { - elements: apiQuestionSchema, - }, - next: { - type: "string", - nullable: true, - }, - }, - additionalProperties: true, -} as const; - -type ApiQuestion = JTDDataType; -type ApiMultipleQuestions = JTDDataType; - -const validateApiQuestion = new Ajv().compile(apiQuestionSchema); -const validateApiMultipleQuestions = new Ajv().compile< - JTDDataType ->(apiMultipleQuestionsSchema); - -async function fetchWithRetries(url: string): Promise { - try { - const response = await axios.get(url); - return response.data; - } catch (error) { - console.log(`Error while fetching ${url}`); - console.log(error); - if (axios.isAxiosError(error)) { - if (error.response?.headers["retry-after"]) { - const timeout = error.response.headers["retry-after"]; - console.log(`Timeout: ${timeout}`); - await sleep(Number(timeout) * 1000 + SLEEP_TIME); - } else { - await sleep(SLEEP_TIME); - } - } - } - const response = await axios.get(url); - return response.data; -} - -/* Support functions */ -async function fetchApiQuestions(next: string): Promise { - const data = await fetchWithRetries(next); - if (validateApiMultipleQuestions(data)) { - return data; - } - throw new Error("Response validation failed"); -} - -async function fetchSingleApiQuestion(url: string): Promise { - const data = await fetchWithRetries(url); - if (validateApiQuestion(data)) { - return data; - } - throw new Error("Response validation failed"); -} - -async function fetchQuestionHtml(slug: string) { - return await fetchWithRetries("https://www.metaculus.com" + slug); -} - -async function fetchQuestionPage(slug: string) { - const questionPage = await fetchQuestionHtml(slug); - const isPublicFigurePrediction = questionPage.includes( - "A public prediction by" - ); - - let description: string = ""; - if (!isPublicFigurePrediction) { - const match = questionPage.match( - /\s*window\.metacData\.question = (.+);\s*/ - ); - if (!match) { - throw new Error("metacData not found"); - } - description = JSON.parse(match[1]).description; - } - - return { - isPublicFigurePrediction, - description, - }; -} - -async function apiQuestionToFetchedQuestion( - apiQuestion: ApiQuestion -): Promise { - if (apiQuestion.publish_time > now || now > apiQuestion.resolve_time) { - return null; - } - await sleep(SLEEP_TIME / 2); - - const questionPage = await fetchQuestionPage(apiQuestion.page_url); - - if (questionPage.isPublicFigurePrediction) { - console.log("- [Skipping public prediction]"); - return null; - } - - const isBinary = apiQuestion.possibilities.type === "binary"; - let options: FetchedQuestion["options"] = []; - if (isBinary) { - const probability = Number(apiQuestion.community_prediction.full.q2); - options = [ - { - name: "Yes", - probability: probability, - type: "PROBABILITY", - }, - { - name: "No", - probability: 1 - probability, - type: "PROBABILITY", - }, - ]; - } - const question: FetchedQuestion = { - id: `${platformName}-${apiQuestion.id}`, - title: apiQuestion.title, - url: "https://www.metaculus.com" + apiQuestion.page_url, - description: questionPage.description, - options, - qualityindicators: { - numforecasts: apiQuestion.number_of_predictions, - }, - extra: { - resolution_data: { - publish_time: apiQuestion.publish_time, - resolution: apiQuestion.resolution, - close_time: apiQuestion.close_time, - resolve_time: apiQuestion.resolve_time, - }, - }, - //"status": result.status, - //"publish_time": result.publish_time, - //"close_time": result.close_time, - //"type": result.possibilities.type, // We want binary ones here. - //"last_activity_time": result.last_activity_time, - }; - if (apiQuestion.number_of_predictions < 10) { - return null; - } - - return question; -} - -export const metaculus: Platform<"id" | "debug"> = { - name: platformName, - label: "Metaculus", - color: "#006669", - version: "v2", - fetcherArgs: ["id", "debug"], - async fetcher(opts) { - let allQuestions: FetchedQuestion[] = []; - - if (opts.args?.id) { - const apiQuestion = await fetchSingleApiQuestion( - `https://www.metaculus.com/api2/questions/${opts.args?.id}` - ); - const question = await apiQuestionToFetchedQuestion(apiQuestion); - console.log(question); - return { - questions: question ? [question] : [], - partial: true, - }; - } - - let next: string | null = "https://www.metaculus.com/api2/questions/"; - let i = 1; - while (next) { - if (i % 20 === 0) { - console.log("Sleeping for 500ms"); - await sleep(SLEEP_TIME); - } - console.log(`\nQuery #${i}`); - - const metaculusQuestions: ApiMultipleQuestions = await fetchApiQuestions( - next - ); - const results = metaculusQuestions.results; - - let j = false; - - for (const result of results) { - const question = await apiQuestionToFetchedQuestion(result); - if (!question) { - continue; - } - console.log(`- ${question.title}`); - if ((!j && i % 20 === 0) || opts.args?.debug) { - console.log(question); - j = true; - } - allQuestions.push(question); - } - - next = metaculusQuestions.next; - i = i + 1; - } - - return { - questions: allQuestions, - partial: false, - }; - }, - - calculateStars(data) { - const { numforecasts } = data.qualityindicators; - const nuno = () => - (numforecasts || 0) > 300 ? 4 : (numforecasts || 0) > 100 ? 3 : 2; - const eli = () => 3; - const misha = () => 3; - const starsDecimal = average([nuno(), eli(), misha()]); - const starsInteger = Math.round(starsDecimal); - return starsInteger; - }, -}; diff --git a/src/backend/platforms/metaculus/api.ts b/src/backend/platforms/metaculus/api.ts new file mode 100644 index 0000000..1d15a83 --- /dev/null +++ b/src/backend/platforms/metaculus/api.ts @@ -0,0 +1,213 @@ +import Ajv, { JTDDataType, ValidateFunction } from "ajv/dist/jtd"; +import axios from "axios"; +import { sleep } from "../../utils/sleep"; + +// Type examples: +// - group: https://www.metaculus.com/api2/questions/9866/ +// - claim: https://www.metaculus.com/api2/questions/9668/ +// - subquestion forecast: https://www.metaculus.com/api2/questions/10069/ +// - basic forecast: https://www.metaculus.com/api2/questions/11005/ + +const RETRY_SLEEP_TIME = 5000; + +const commonProps = { + id: { + type: "uint32", + }, + title: { + type: "string", + }, +} as const; + +const predictableProps = { + publish_time: { + type: "string", + }, + close_time: { + type: "string", + }, + resolve_time: { + type: "string", + }, + resolution: { + type: "float64", + nullable: true, + }, + possibilities: { + properties: { + type: { + // Enum["binary", "continuous"], via https://github.com/quantified-uncertainty/metaforecast/pull/84#discussion_r878240875 + // but metaculus might add new values in the future and we don't want the fetcher to break + type: "string", + }, + }, + additionalProperties: true, + }, + number_of_predictions: { + type: "uint32", + }, + community_prediction: { + properties: { + full: { + properties: { + q1: { + type: "float64", + }, + q2: { + type: "float64", + }, + q3: { + type: "float64", + }, + }, + additionalProperties: true, + }, + }, + additionalProperties: true, + }, +} as const; + +const pageProps = { + page_url: { + type: "string", + }, + group: { + type: "uint32", + nullable: true, + }, +} as const; + +// these are missing in /api2/questions/ requests, and building two schemas is too much pain +const optionalPageProps = { + description: { + type: "string", + }, + description_html: { + type: "string", + }, +} as const; + +const apiQuestionSchema = { + discriminator: "type", + mapping: { + forecast: { + properties: { + ...commonProps, + ...pageProps, + ...predictableProps, + }, + optionalProperties: { + ...optionalPageProps, + }, + additionalProperties: true, + }, + group: { + properties: { + ...commonProps, + ...pageProps, + sub_questions: { + elements: { + properties: { + ...commonProps, + ...predictableProps, + }, + additionalProperties: true, + }, + }, + }, + optionalProperties: { + ...optionalPageProps, + }, + additionalProperties: true, + }, + // we're not interested in claims currently (but we should be?) + claim: { + properties: { + ...commonProps, + ...pageProps, + }, + optionalProperties: { + ...optionalPageProps, + }, + additionalProperties: true, + }, + }, +} as const; + +const apiMultipleQuestionsSchema = { + properties: { + results: { + elements: apiQuestionSchema, + }, + next: { + type: "string", + nullable: true, + }, + }, + additionalProperties: true, +} as const; + +export type ApiCommon = JTDDataType<{ + properties: typeof commonProps; +}>; +export type ApiPredictable = JTDDataType<{ + properties: typeof predictableProps; +}>; +export type ApiQuestion = JTDDataType; +export type ApiMultipleQuestions = JTDDataType< + typeof apiMultipleQuestionsSchema +>; + +const validateApiQuestion = new Ajv().compile(apiQuestionSchema); +const validateApiMultipleQuestions = new Ajv().compile( + apiMultipleQuestionsSchema +); + +async function fetchWithRetries(url: string): Promise { + try { + const response = await axios.get(url); + return response.data; + } catch (error) { + console.log(`Error while fetching ${url}`); + console.log(error); + if (axios.isAxiosError(error)) { + if (error.response?.headers["retry-after"]) { + const timeout = error.response.headers["retry-after"]; + console.log(`Timeout: ${timeout}`); + await sleep(Number(timeout) * 1000 + 1000); + } else { + await sleep(RETRY_SLEEP_TIME); + } + } + } + const response = await axios.get(url); + return response.data; +} + +const fetchAndValidate = async ( + url: string, + validator: ValidateFunction +): Promise => { + console.log(url); + const data = await fetchWithRetries(url); + if (validator(data)) { + return data; + } + throw new Error( + `Response validation for url ${url} failed: ` + + JSON.stringify(validator.errors) + ); +}; + +export async function fetchApiQuestions( + next: string +): Promise { + return await fetchAndValidate(next, validateApiMultipleQuestions); +} + +export async function fetchSingleApiQuestion(id: number): Promise { + return await fetchAndValidate( + `https://www.metaculus.com/api2/questions/${id}/`, + validateApiQuestion + ); +} diff --git a/src/backend/platforms/metaculus/index.ts b/src/backend/platforms/metaculus/index.ts new file mode 100644 index 0000000..3cefa2b --- /dev/null +++ b/src/backend/platforms/metaculus/index.ts @@ -0,0 +1,184 @@ +import { FetchedQuestion, Platform } from ".."; +import { average } from "../../../utils"; +import { sleep } from "../../utils/sleep"; +import { + ApiCommon, + ApiMultipleQuestions, + ApiPredictable, + ApiQuestion, + fetchApiQuestions, + fetchSingleApiQuestion, +} from "./api"; + +const platformName = "metaculus"; +const now = new Date().toISOString(); +const SLEEP_TIME = 2500; + +async function apiQuestionToFetchedQuestions( + apiQuestion: ApiQuestion +): Promise { + // one item can expand: + // - to 0 questions if we don't want it; + // - to 1 question if it's a simple forecast + // - to multiple questions if it's a group (see https://github.com/quantified-uncertainty/metaforecast/pull/84 for details) + + await sleep(SLEEP_TIME); + + const skip = (q: ApiPredictable): boolean => { + if (q.publish_time > now || now > q.resolve_time) { + return true; + } + if (q.number_of_predictions < 10) { + return true; + } + return false; + }; + + const buildFetchedQuestion = ( + q: ApiPredictable & ApiCommon + ): Omit => { + const isBinary = q.possibilities.type === "binary"; + let options: FetchedQuestion["options"] = []; + if (isBinary) { + const probability = Number(q.community_prediction.full.q2); + options = [ + { + name: "Yes", + probability: probability, + type: "PROBABILITY", + }, + { + name: "No", + probability: 1 - probability, + type: "PROBABILITY", + }, + ]; + } + return { + id: `${platformName}-${q.id}`, + options, + qualityindicators: { + numforecasts: q.number_of_predictions, + }, + extra: { + resolution_data: { + publish_time: apiQuestion.publish_time, + resolution: apiQuestion.resolution, + close_time: apiQuestion.close_time, + resolve_time: apiQuestion.resolve_time, + }, + }, + }; + }; + + if (apiQuestion.type === "group") { + const apiQuestionDetails = await fetchSingleApiQuestion(apiQuestion.id); + return apiQuestion.sub_questions + .filter((q) => !skip(q)) + .map((sq) => { + const tmp = buildFetchedQuestion(sq); + return { + ...tmp, + title: `${apiQuestion.title} (${sq.title})`, + description: apiQuestionDetails.description || "", + url: `https://www.metaculus.com${apiQuestion.page_url}?sub-question=${sq.id}`, + }; + }); + } else if (apiQuestion.type === "forecast") { + const apiQuestionDetails = await fetchSingleApiQuestion(apiQuestion.id); + if (apiQuestion.group) { + return []; // sub-question, should be handled on the group level + } + if (skip(apiQuestion)) { + return []; + } + + const tmp = buildFetchedQuestion(apiQuestion); + return [ + { + ...tmp, + title: apiQuestion.title, + description: apiQuestionDetails.description || "", + url: "https://www.metaculus.com" + apiQuestion.page_url, + }, + ]; + } else { + if (apiQuestion.type !== "claim") { + // should never happen, since `discriminator` in JTD schema causes a strict runtime check + console.log( + `Unknown metaculus question type: ${ + (apiQuestion as any).type + }, skipping` + ); + } + return []; + } +} + +export const metaculus: Platform<"id" | "debug"> = { + name: platformName, + label: "Metaculus", + color: "#006669", + version: "v2", + fetcherArgs: ["id", "debug"], + async fetcher(opts) { + let allQuestions: FetchedQuestion[] = []; + + if (opts.args?.id) { + const id = Number(opts.args.id); + const apiQuestion = await fetchSingleApiQuestion(id); + const questions = await apiQuestionToFetchedQuestions(apiQuestion); + console.log(questions); + return { + questions, + partial: true, + }; + } + + let next: string | null = "https://www.metaculus.com/api2/questions/"; + let i = 1; + while (next) { + if (i % 20 === 0) { + console.log(`Sleeping for ${SLEEP_TIME}ms`); + await sleep(SLEEP_TIME); + } + console.log(`\nQuery #${i} - ${next}`); + + const apiQuestions: ApiMultipleQuestions = await fetchApiQuestions(next); + const results = apiQuestions.results; + + let j = false; + + for (const result of results) { + const questions = await apiQuestionToFetchedQuestions(result); + for (const question of questions) { + console.log(`- ${question.title}`); + if ((!j && i % 20 === 0) || opts.args?.debug) { + console.log(question); + j = true; + } + allQuestions.push(question); + } + } + + next = apiQuestions.next; + i += 1; + } + + return { + questions: allQuestions, + partial: false, + }; + }, + + calculateStars(data) { + const { numforecasts } = data.qualityindicators; + const nuno = () => + (numforecasts || 0) > 300 ? 4 : (numforecasts || 0) > 100 ? 3 : 2; + const eli = () => 3; + const misha = () => 3; + const starsDecimal = average([nuno(), eli(), misha()]); + const starsInteger = Math.round(starsDecimal); + return starsInteger; + }, +};