feat: metaculus group questions, description from api

This commit is contained in:
Vyacheslav Matyukhin 2022-05-30 23:39:13 +03:00
parent d684d074f5
commit f5a3c16322
No known key found for this signature in database
GPG Key ID: 3D2A774C5489F96C
3 changed files with 397 additions and 283 deletions

View File

@ -1,283 +0,0 @@
/* Imports */
import Ajv, { JTDDataType } from "ajv/dist/jtd";
import axios from "axios";
import { average } from "../../utils";
import { sleep } from "../utils/sleep";
import { FetchedQuestion, Platform } from "./";
/* Definitions */
const platformName = "metaculus";
const now = new Date().toISOString();
const SLEEP_TIME = 5000;
const apiQuestionSchema = {
properties: {
page_url: {
type: "string",
},
title: {
type: "string",
},
publish_time: {
type: "string",
},
close_time: {
type: "string",
},
resolve_time: {
type: "string",
},
number_of_predictions: {
type: "uint32",
},
possibilities: {
properties: {
type: {
type: "string", // TODO - enum?
},
},
additionalProperties: true,
},
community_prediction: {
properties: {
full: {
properties: {
q1: {
type: "float64",
},
q2: {
type: "float64",
},
q3: {
type: "float64",
},
},
additionalProperties: true,
},
},
additionalProperties: true,
},
},
additionalProperties: true,
} as const;
const apiMultipleQuestionsSchema = {
properties: {
results: {
elements: apiQuestionSchema,
},
next: {
type: "string",
nullable: true,
},
},
additionalProperties: true,
} as const;
type ApiQuestion = JTDDataType<typeof apiQuestionSchema>;
type ApiMultipleQuestions = JTDDataType<typeof apiMultipleQuestionsSchema>;
const validateApiQuestion = new Ajv().compile<ApiQuestion>(apiQuestionSchema);
const validateApiMultipleQuestions = new Ajv().compile<
JTDDataType<typeof apiMultipleQuestionsSchema>
>(apiMultipleQuestionsSchema);
async function fetchWithRetries<T = unknown>(url: string): Promise<T> {
try {
const response = await axios.get<T>(url);
return response.data;
} catch (error) {
console.log(`Error while fetching ${url}`);
console.log(error);
if (axios.isAxiosError(error)) {
if (error.response?.headers["retry-after"]) {
const timeout = error.response.headers["retry-after"];
console.log(`Timeout: ${timeout}`);
await sleep(Number(timeout) * 1000 + SLEEP_TIME);
} else {
await sleep(SLEEP_TIME);
}
}
}
const response = await axios.get<T>(url);
return response.data;
}
/* Support functions */
async function fetchApiQuestions(next: string): Promise<ApiMultipleQuestions> {
const data = await fetchWithRetries<object>(next);
if (validateApiMultipleQuestions(data)) {
return data;
}
throw new Error("Response validation failed");
}
async function fetchSingleApiQuestion(url: string): Promise<ApiQuestion> {
const data = await fetchWithRetries<object>(url);
if (validateApiQuestion(data)) {
return data;
}
throw new Error("Response validation failed");
}
async function fetchQuestionHtml(slug: string) {
return await fetchWithRetries<string>("https://www.metaculus.com" + slug);
}
async function fetchQuestionPage(slug: string) {
const questionPage = await fetchQuestionHtml(slug);
const isPublicFigurePrediction = questionPage.includes(
"A public prediction by"
);
let description: string = "";
if (!isPublicFigurePrediction) {
const match = questionPage.match(
/\s*window\.metacData\.question = (.+);\s*/
);
if (!match) {
throw new Error("metacData not found");
}
description = JSON.parse(match[1]).description;
}
return {
isPublicFigurePrediction,
description,
};
}
async function apiQuestionToFetchedQuestion(
apiQuestion: ApiQuestion
): Promise<FetchedQuestion | null> {
if (apiQuestion.publish_time > now || now > apiQuestion.resolve_time) {
return null;
}
await sleep(SLEEP_TIME / 2);
const questionPage = await fetchQuestionPage(apiQuestion.page_url);
if (questionPage.isPublicFigurePrediction) {
console.log("- [Skipping public prediction]");
return null;
}
const isBinary = apiQuestion.possibilities.type === "binary";
let options: FetchedQuestion["options"] = [];
if (isBinary) {
const probability = Number(apiQuestion.community_prediction.full.q2);
options = [
{
name: "Yes",
probability: probability,
type: "PROBABILITY",
},
{
name: "No",
probability: 1 - probability,
type: "PROBABILITY",
},
];
}
const question: FetchedQuestion = {
id: `${platformName}-${apiQuestion.id}`,
title: apiQuestion.title,
url: "https://www.metaculus.com" + apiQuestion.page_url,
description: questionPage.description,
options,
qualityindicators: {
numforecasts: apiQuestion.number_of_predictions,
},
extra: {
resolution_data: {
publish_time: apiQuestion.publish_time,
resolution: apiQuestion.resolution,
close_time: apiQuestion.close_time,
resolve_time: apiQuestion.resolve_time,
},
},
//"status": result.status,
//"publish_time": result.publish_time,
//"close_time": result.close_time,
//"type": result.possibilities.type, // We want binary ones here.
//"last_activity_time": result.last_activity_time,
};
if (apiQuestion.number_of_predictions < 10) {
return null;
}
return question;
}
export const metaculus: Platform<"id" | "debug"> = {
name: platformName,
label: "Metaculus",
color: "#006669",
version: "v2",
fetcherArgs: ["id", "debug"],
async fetcher(opts) {
let allQuestions: FetchedQuestion[] = [];
if (opts.args?.id) {
const apiQuestion = await fetchSingleApiQuestion(
`https://www.metaculus.com/api2/questions/${opts.args?.id}`
);
const question = await apiQuestionToFetchedQuestion(apiQuestion);
console.log(question);
return {
questions: question ? [question] : [],
partial: true,
};
}
let next: string | null = "https://www.metaculus.com/api2/questions/";
let i = 1;
while (next) {
if (i % 20 === 0) {
console.log("Sleeping for 500ms");
await sleep(SLEEP_TIME);
}
console.log(`\nQuery #${i}`);
const metaculusQuestions: ApiMultipleQuestions = await fetchApiQuestions(
next
);
const results = metaculusQuestions.results;
let j = false;
for (const result of results) {
const question = await apiQuestionToFetchedQuestion(result);
if (!question) {
continue;
}
console.log(`- ${question.title}`);
if ((!j && i % 20 === 0) || opts.args?.debug) {
console.log(question);
j = true;
}
allQuestions.push(question);
}
next = metaculusQuestions.next;
i = i + 1;
}
return {
questions: allQuestions,
partial: false,
};
},
calculateStars(data) {
const { numforecasts } = data.qualityindicators;
const nuno = () =>
(numforecasts || 0) > 300 ? 4 : (numforecasts || 0) > 100 ? 3 : 2;
const eli = () => 3;
const misha = () => 3;
const starsDecimal = average([nuno(), eli(), misha()]);
const starsInteger = Math.round(starsDecimal);
return starsInteger;
},
};

View File

@ -0,0 +1,213 @@
import Ajv, { JTDDataType, ValidateFunction } from "ajv/dist/jtd";
import axios from "axios";
import { sleep } from "../../utils/sleep";
// Type examples:
// - group: https://www.metaculus.com/api2/questions/9866/
// - claim: https://www.metaculus.com/api2/questions/9668/
// - subquestion forecast: https://www.metaculus.com/api2/questions/10069/
// - basic forecast: https://www.metaculus.com/api2/questions/11005/
const RETRY_SLEEP_TIME = 5000;
const commonProps = {
id: {
type: "uint32",
},
title: {
type: "string",
},
} as const;
const predictableProps = {
publish_time: {
type: "string",
},
close_time: {
type: "string",
},
resolve_time: {
type: "string",
},
resolution: {
type: "float64",
nullable: true,
},
possibilities: {
properties: {
type: {
// Enum["binary", "continuous"], via https://github.com/quantified-uncertainty/metaforecast/pull/84#discussion_r878240875
// but metaculus might add new values in the future and we don't want the fetcher to break
type: "string",
},
},
additionalProperties: true,
},
number_of_predictions: {
type: "uint32",
},
community_prediction: {
properties: {
full: {
properties: {
q1: {
type: "float64",
},
q2: {
type: "float64",
},
q3: {
type: "float64",
},
},
additionalProperties: true,
},
},
additionalProperties: true,
},
} as const;
const pageProps = {
page_url: {
type: "string",
},
group: {
type: "uint32",
nullable: true,
},
} as const;
// these are missing in /api2/questions/ requests, and building two schemas is too much pain
const optionalPageProps = {
description: {
type: "string",
},
description_html: {
type: "string",
},
} as const;
const apiQuestionSchema = {
discriminator: "type",
mapping: {
forecast: {
properties: {
...commonProps,
...pageProps,
...predictableProps,
},
optionalProperties: {
...optionalPageProps,
},
additionalProperties: true,
},
group: {
properties: {
...commonProps,
...pageProps,
sub_questions: {
elements: {
properties: {
...commonProps,
...predictableProps,
},
additionalProperties: true,
},
},
},
optionalProperties: {
...optionalPageProps,
},
additionalProperties: true,
},
// we're not interested in claims currently (but we should be?)
claim: {
properties: {
...commonProps,
...pageProps,
},
optionalProperties: {
...optionalPageProps,
},
additionalProperties: true,
},
},
} as const;
const apiMultipleQuestionsSchema = {
properties: {
results: {
elements: apiQuestionSchema,
},
next: {
type: "string",
nullable: true,
},
},
additionalProperties: true,
} as const;
export type ApiCommon = JTDDataType<{
properties: typeof commonProps;
}>;
export type ApiPredictable = JTDDataType<{
properties: typeof predictableProps;
}>;
export type ApiQuestion = JTDDataType<typeof apiQuestionSchema>;
export type ApiMultipleQuestions = JTDDataType<
typeof apiMultipleQuestionsSchema
>;
const validateApiQuestion = new Ajv().compile<ApiQuestion>(apiQuestionSchema);
const validateApiMultipleQuestions = new Ajv().compile<ApiMultipleQuestions>(
apiMultipleQuestionsSchema
);
async function fetchWithRetries<T = unknown>(url: string): Promise<T> {
try {
const response = await axios.get<T>(url);
return response.data;
} catch (error) {
console.log(`Error while fetching ${url}`);
console.log(error);
if (axios.isAxiosError(error)) {
if (error.response?.headers["retry-after"]) {
const timeout = error.response.headers["retry-after"];
console.log(`Timeout: ${timeout}`);
await sleep(Number(timeout) * 1000 + 1000);
} else {
await sleep(RETRY_SLEEP_TIME);
}
}
}
const response = await axios.get<T>(url);
return response.data;
}
const fetchAndValidate = async <T = unknown>(
url: string,
validator: ValidateFunction<T>
): Promise<T> => {
console.log(url);
const data = await fetchWithRetries<object>(url);
if (validator(data)) {
return data;
}
throw new Error(
`Response validation for url ${url} failed: ` +
JSON.stringify(validator.errors)
);
};
export async function fetchApiQuestions(
next: string
): Promise<ApiMultipleQuestions> {
return await fetchAndValidate(next, validateApiMultipleQuestions);
}
export async function fetchSingleApiQuestion(id: number): Promise<ApiQuestion> {
return await fetchAndValidate(
`https://www.metaculus.com/api2/questions/${id}/`,
validateApiQuestion
);
}

View File

@ -0,0 +1,184 @@
import { FetchedQuestion, Platform } from "..";
import { average } from "../../../utils";
import { sleep } from "../../utils/sleep";
import {
ApiCommon,
ApiMultipleQuestions,
ApiPredictable,
ApiQuestion,
fetchApiQuestions,
fetchSingleApiQuestion,
} from "./api";
const platformName = "metaculus";
const now = new Date().toISOString();
const SLEEP_TIME = 2500;
async function apiQuestionToFetchedQuestions(
apiQuestion: ApiQuestion
): Promise<FetchedQuestion[]> {
// one item can expand:
// - to 0 questions if we don't want it;
// - to 1 question if it's a simple forecast
// - to multiple questions if it's a group (see https://github.com/quantified-uncertainty/metaforecast/pull/84 for details)
await sleep(SLEEP_TIME);
const skip = (q: ApiPredictable): boolean => {
if (q.publish_time > now || now > q.resolve_time) {
return true;
}
if (q.number_of_predictions < 10) {
return true;
}
return false;
};
const buildFetchedQuestion = (
q: ApiPredictable & ApiCommon
): Omit<FetchedQuestion, "url" | "description" | "title"> => {
const isBinary = q.possibilities.type === "binary";
let options: FetchedQuestion["options"] = [];
if (isBinary) {
const probability = Number(q.community_prediction.full.q2);
options = [
{
name: "Yes",
probability: probability,
type: "PROBABILITY",
},
{
name: "No",
probability: 1 - probability,
type: "PROBABILITY",
},
];
}
return {
id: `${platformName}-${q.id}`,
options,
qualityindicators: {
numforecasts: q.number_of_predictions,
},
extra: {
resolution_data: {
publish_time: apiQuestion.publish_time,
resolution: apiQuestion.resolution,
close_time: apiQuestion.close_time,
resolve_time: apiQuestion.resolve_time,
},
},
};
};
if (apiQuestion.type === "group") {
const apiQuestionDetails = await fetchSingleApiQuestion(apiQuestion.id);
return apiQuestion.sub_questions
.filter((q) => !skip(q))
.map((sq) => {
const tmp = buildFetchedQuestion(sq);
return {
...tmp,
title: `${apiQuestion.title} (${sq.title})`,
description: apiQuestionDetails.description || "",
url: `https://www.metaculus.com${apiQuestion.page_url}?sub-question=${sq.id}`,
};
});
} else if (apiQuestion.type === "forecast") {
const apiQuestionDetails = await fetchSingleApiQuestion(apiQuestion.id);
if (apiQuestion.group) {
return []; // sub-question, should be handled on the group level
}
if (skip(apiQuestion)) {
return [];
}
const tmp = buildFetchedQuestion(apiQuestion);
return [
{
...tmp,
title: apiQuestion.title,
description: apiQuestionDetails.description || "",
url: "https://www.metaculus.com" + apiQuestion.page_url,
},
];
} else {
if (apiQuestion.type !== "claim") {
// should never happen, since `discriminator` in JTD schema causes a strict runtime check
console.log(
`Unknown metaculus question type: ${
(apiQuestion as any).type
}, skipping`
);
}
return [];
}
}
export const metaculus: Platform<"id" | "debug"> = {
name: platformName,
label: "Metaculus",
color: "#006669",
version: "v2",
fetcherArgs: ["id", "debug"],
async fetcher(opts) {
let allQuestions: FetchedQuestion[] = [];
if (opts.args?.id) {
const id = Number(opts.args.id);
const apiQuestion = await fetchSingleApiQuestion(id);
const questions = await apiQuestionToFetchedQuestions(apiQuestion);
console.log(questions);
return {
questions,
partial: true,
};
}
let next: string | null = "https://www.metaculus.com/api2/questions/";
let i = 1;
while (next) {
if (i % 20 === 0) {
console.log(`Sleeping for ${SLEEP_TIME}ms`);
await sleep(SLEEP_TIME);
}
console.log(`\nQuery #${i} - ${next}`);
const apiQuestions: ApiMultipleQuestions = await fetchApiQuestions(next);
const results = apiQuestions.results;
let j = false;
for (const result of results) {
const questions = await apiQuestionToFetchedQuestions(result);
for (const question of questions) {
console.log(`- ${question.title}`);
if ((!j && i % 20 === 0) || opts.args?.debug) {
console.log(question);
j = true;
}
allQuestions.push(question);
}
}
next = apiQuestions.next;
i += 1;
}
return {
questions: allQuestions,
partial: false,
};
},
calculateStars(data) {
const { numforecasts } = data.qualityindicators;
const nuno = () =>
(numforecasts || 0) > 300 ? 4 : (numforecasts || 0) > 100 ? 3 : 2;
const eli = () => 3;
const misha = () => 3;
const starsDecimal = average([nuno(), eli(), misha()]);
const starsInteger = Math.round(starsDecimal);
return starsInteger;
},
};