Merge pull request from quantified-uncertainty/metaculus-improvements

Metaculus fetcher improvements
This commit is contained in:
Vyacheslav Matyukhin 2022-05-31 22:11:09 +03:00 committed by GitHub
commit c3d144337b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 474 additions and 205 deletions

76
package-lock.json generated
View File

@ -28,6 +28,7 @@
"@types/textversionjs": "^1.1.1",
"@types/tunnel": "^0.0.3",
"airtable": "^0.11.1",
"ajv": "^8.11.0",
"algoliasearch": "^4.10.3",
"autoprefixer": "^10.1.0",
"axios": "^0.25.0",
@ -3498,6 +3499,21 @@
"node": ">=8.0.0"
}
},
"node_modules/ajv": {
"version": "8.11.0",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.11.0.tgz",
"integrity": "sha512-wGgprdCvMalC0BztXvitD2hC04YffAvtsUn93JbGXYLAtCUO4xd17mCCZQxUOItiBwZvJScWo8NIvQMQ71rdpg==",
"dependencies": {
"fast-deep-equal": "^3.1.1",
"json-schema-traverse": "^1.0.0",
"require-from-string": "^2.0.2",
"uri-js": "^4.2.2"
},
"funding": {
"type": "github",
"url": "https://github.com/sponsors/epoberezkin"
}
},
"node_modules/algoliasearch": {
"version": "4.10.3",
"resolved": "https://registry.npmjs.org/algoliasearch/-/algoliasearch-4.10.3.tgz",
@ -5771,6 +5787,11 @@
"url": "https://github.com/sponsors/jaydenseric"
}
},
"node_modules/fast-deep-equal": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
"integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
},
"node_modules/fast-equals": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/fast-equals/-/fast-equals-2.0.4.tgz",
@ -7454,6 +7475,11 @@
"integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==",
"license": "MIT"
},
"node_modules/json-schema-traverse": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="
},
"node_modules/json-stable-stringify": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/json-stable-stringify/-/json-stable-stringify-1.0.1.tgz",
@ -37587,6 +37613,14 @@
"node": ">=0.10.0"
}
},
"node_modules/require-from-string": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
"integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/require-main-filename": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-2.0.0.tgz",
@ -39027,6 +39061,14 @@
"tslib": "^2.0.3"
}
},
"node_modules/uri-js": {
"version": "4.4.1",
"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
"integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
"dependencies": {
"punycode": "^2.1.0"
}
},
"node_modules/url-parse-lax": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-3.0.0.tgz",
@ -42453,6 +42495,17 @@
"node-fetch": "^2.6.7"
}
},
"ajv": {
"version": "8.11.0",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.11.0.tgz",
"integrity": "sha512-wGgprdCvMalC0BztXvitD2hC04YffAvtsUn93JbGXYLAtCUO4xd17mCCZQxUOItiBwZvJScWo8NIvQMQ71rdpg==",
"requires": {
"fast-deep-equal": "^3.1.1",
"json-schema-traverse": "^1.0.0",
"require-from-string": "^2.0.2",
"uri-js": "^4.2.2"
}
},
"algoliasearch": {
"version": "4.10.3",
"resolved": "https://registry.npmjs.org/algoliasearch/-/algoliasearch-4.10.3.tgz",
@ -44096,6 +44149,11 @@
"resolved": "https://registry.npmjs.org/extract-files/-/extract-files-9.0.0.tgz",
"integrity": "sha512-CvdFfHkC95B4bBBk36hcEmvdR2awOdhhVUYH6S/zrVj3477zven/fJMYg7121h4T1xHZC+tetUpubpAhxwI7hQ=="
},
"fast-deep-equal": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
"integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
},
"fast-equals": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/fast-equals/-/fast-equals-2.0.4.tgz",
@ -45304,6 +45362,11 @@
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
"integrity": "sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w=="
},
"json-schema-traverse": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="
},
"json-stable-stringify": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/json-stable-stringify/-/json-stable-stringify-1.0.1.tgz",
@ -67926,6 +67989,11 @@
"integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=",
"dev": true
},
"require-from-string": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
"integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw=="
},
"require-main-filename": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/require-main-filename/-/require-main-filename-2.0.0.tgz",
@ -68920,6 +68988,14 @@
"tslib": "^2.0.3"
}
},
"uri-js": {
"version": "4.4.1",
"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
"integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
"requires": {
"punycode": "^2.1.0"
}
},
"url-parse-lax": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/url-parse-lax/-/url-parse-lax-3.0.0.tgz",

View File

@ -46,6 +46,7 @@
"@types/textversionjs": "^1.1.1",
"@types/tunnel": "^0.0.3",
"airtable": "^0.11.1",
"ajv": "^8.11.0",
"algoliasearch": "^4.10.3",
"autoprefixer": "^10.1.0",
"axios": "^0.25.0",

View File

@ -1,205 +0,0 @@
/* Imports */
import axios from "axios";
import { average } from "../../utils";
import { sleep } from "../utils/sleep";
import toMarkdown from "../utils/toMarkdown";
import { FetchedQuestion, Platform } from "./";
/* Definitions */
const platformName = "metaculus";
let now = new Date().toISOString();
let DEBUG_MODE = "off";
let SLEEP_TIME = 5000;
/* Support functions */
async function fetchMetaculusQuestions(next: string) {
// Numbers about a given address: how many, how much, at what price, etc.
let response;
let data;
try {
response = await axios({
url: next,
method: "GET",
headers: { "Content-Type": "application/json" },
});
data = response.data;
} catch (error) {
console.log(`Error in async function fetchMetaculusQuestions(next)`);
console.log(error);
if (axios.isAxiosError(error)) {
if (error.response?.headers["retry-after"]) {
const timeout = error.response.headers["retry-after"];
console.log(`Timeout: ${timeout}`);
await sleep(Number(timeout) * 1000 + SLEEP_TIME);
} else {
await sleep(SLEEP_TIME);
}
}
} finally {
try {
response = await axios({
url: next,
method: "GET",
headers: { "Content-Type": "application/json" },
});
data = response.data;
} catch (error) {
console.log(error);
return { results: [] };
}
}
// console.log(response)
return data;
}
async function fetchMetaculusQuestionDescription(slug: string) {
try {
let response = await axios({
method: "get",
url: "https://www.metaculus.com" + slug,
}).then((response) => response.data);
return response;
} catch (error) {
console.log(`Error in: fetchMetaculusQuestionDescription`);
console.log(
`We encountered some error when attempting to fetch a metaculus page. Trying again`
);
if (
axios.isAxiosError(error) &&
typeof error.response != "undefined" &&
typeof error.response.headers != "undefined" &&
typeof error.response.headers["retry-after"] != "undefined"
) {
const timeout = error.response.headers["retry-after"];
console.log(`Timeout: ${timeout}`);
await sleep(Number(timeout) * 1000 + SLEEP_TIME);
} else {
await sleep(SLEEP_TIME);
}
try {
let response = await axios({
method: "get",
url: "https://www.metaculus.com" + slug,
}).then((response) => response.data);
// console.log(response)
return response;
} catch (error) {
console.log(
`We encountered some error when attempting to fetch a metaculus page.`
);
console.log("Error", error);
throw "Giving up";
}
}
}
export const metaculus: Platform = {
name: platformName,
label: "Metaculus",
color: "#006669",
version: "v1",
async fetcher() {
// let metaculusQuestionsInit = await fetchMetaculusQuestions(1)
// let numQueries = Math.round(Number(metaculusQuestionsInit.count) / 20)
// console.log(`Downloading... This might take a while. Total number of queries: ${numQueries}`)
// for (let i = 4; i <= numQueries; i++) { // change numQueries to 10 if one want to just test }
let all_questions = [];
let next = "https://www.metaculus.com/api2/questions/";
let i = 1;
while (next) {
if (i % 20 == 0) {
console.log("Sleeping for 500ms");
await sleep(SLEEP_TIME);
}
console.log(`\nQuery #${i}`);
let metaculusQuestions = await fetchMetaculusQuestions(next);
let results = metaculusQuestions.results;
let j = false;
for (let result of results) {
if (result.publish_time < now && now < result.resolve_time) {
await sleep(SLEEP_TIME / 2);
let questionPage = await fetchMetaculusQuestionDescription(
result.page_url
);
if (!questionPage.includes("A public prediction by")) {
// console.log(questionPage)
let descriptionraw = questionPage.split(
`<div class="content" ng-bind-html-compile="qctrl.question.description_html">`
)[1]; //.split(`<div class="question__content">`)[1]
let descriptionprocessed1 = descriptionraw.split("</div>")[0];
let descriptionprocessed2 = toMarkdown(descriptionprocessed1);
let description = descriptionprocessed2;
let isbinary = result.possibilities.type == "binary";
let options: FetchedQuestion["options"] = [];
if (isbinary) {
let probability = Number(result.community_prediction.full.q2);
options = [
{
name: "Yes",
probability: probability,
type: "PROBABILITY",
},
{
name: "No",
probability: 1 - probability,
type: "PROBABILITY",
},
];
}
let id = `${platformName}-${result.id}`;
let interestingInfo: FetchedQuestion = {
id,
title: result.title,
url: "https://www.metaculus.com" + result.page_url,
description,
options,
qualityindicators: {
numforecasts: Number(result.number_of_predictions),
},
extra: {
resolution_data: {
publish_time: result.publish_time,
resolution: result.resolution,
close_time: result.close_time,
resolve_time: result.resolve_time,
},
},
//"status": result.status,
//"publish_time": result.publish_time,
//"close_time": result.close_time,
//"type": result.possibilities.type, // We want binary ones here.
//"last_activity_time": result.last_activity_time,
};
if (Number(result.number_of_predictions) >= 10) {
console.log(`- ${interestingInfo.title}`);
all_questions.push(interestingInfo);
if ((!j && i % 20 == 0) || DEBUG_MODE == "on") {
console.log(interestingInfo);
j = true;
}
}
} else {
console.log("- [Skipping public prediction]");
}
}
}
next = metaculusQuestions.next;
i = i + 1;
}
return all_questions;
},
calculateStars(data) {
const { numforecasts } = data.qualityindicators;
let nuno = () =>
(numforecasts || 0) > 300 ? 4 : (numforecasts || 0) > 100 ? 3 : 2;
let eli = () => 3;
let misha = () => 3;
let starsDecimal = average([nuno(), eli(), misha()]);
let starsInteger = Math.round(starsDecimal);
return starsInteger;
},
};

View File

@ -0,0 +1,213 @@
import Ajv, { JTDDataType, ValidateFunction } from "ajv/dist/jtd";
import axios from "axios";
import { sleep } from "../../utils/sleep";
// Type examples:
// - group: https://www.metaculus.com/api2/questions/9866/
// - claim: https://www.metaculus.com/api2/questions/9668/
// - subquestion forecast: https://www.metaculus.com/api2/questions/10069/
// - basic forecast: https://www.metaculus.com/api2/questions/11005/
const RETRY_SLEEP_TIME = 5000;
const commonProps = {
id: {
type: "uint32",
},
title: {
type: "string",
},
} as const;
const predictableProps = {
publish_time: {
type: "string",
},
close_time: {
type: "string",
},
resolve_time: {
type: "string",
},
resolution: {
type: "float64",
nullable: true,
},
possibilities: {
properties: {
type: {
// Enum["binary", "continuous"], via https://github.com/quantified-uncertainty/metaforecast/pull/84#discussion_r878240875
// but metaculus might add new values in the future and we don't want the fetcher to break
type: "string",
},
},
additionalProperties: true,
},
number_of_predictions: {
type: "uint32",
},
community_prediction: {
properties: {
full: {
properties: {
q1: {
type: "float64",
},
q2: {
type: "float64",
},
q3: {
type: "float64",
},
},
additionalProperties: true,
},
},
additionalProperties: true,
},
} as const;
const pageProps = {
page_url: {
type: "string",
},
group: {
type: "uint32",
nullable: true,
},
} as const;
// these are missing in /api2/questions/ requests, and building two schemas is too much pain
const optionalPageProps = {
description: {
type: "string",
},
description_html: {
type: "string",
},
} as const;
const apiQuestionSchema = {
discriminator: "type",
mapping: {
forecast: {
properties: {
...commonProps,
...pageProps,
...predictableProps,
},
optionalProperties: {
...optionalPageProps,
},
additionalProperties: true,
},
group: {
properties: {
...commonProps,
...pageProps,
sub_questions: {
elements: {
properties: {
...commonProps,
...predictableProps,
},
additionalProperties: true,
},
},
},
optionalProperties: {
...optionalPageProps,
},
additionalProperties: true,
},
// we're not interested in claims currently (but we should be?)
claim: {
properties: {
...commonProps,
...pageProps,
},
optionalProperties: {
...optionalPageProps,
},
additionalProperties: true,
},
},
} as const;
const apiMultipleQuestionsSchema = {
properties: {
results: {
elements: apiQuestionSchema,
},
next: {
type: "string",
nullable: true,
},
},
additionalProperties: true,
} as const;
export type ApiCommon = JTDDataType<{
properties: typeof commonProps;
}>;
export type ApiPredictable = JTDDataType<{
properties: typeof predictableProps;
}>;
export type ApiQuestion = JTDDataType<typeof apiQuestionSchema>;
export type ApiMultipleQuestions = JTDDataType<
typeof apiMultipleQuestionsSchema
>;
const validateApiQuestion = new Ajv().compile<ApiQuestion>(apiQuestionSchema);
const validateApiMultipleQuestions = new Ajv().compile<ApiMultipleQuestions>(
apiMultipleQuestionsSchema
);
async function fetchWithRetries<T = unknown>(url: string): Promise<T> {
try {
const response = await axios.get<T>(url);
return response.data;
} catch (error) {
console.log(`Error while fetching ${url}`);
console.log(error);
if (axios.isAxiosError(error)) {
if (error.response?.headers["retry-after"]) {
const timeout = error.response.headers["retry-after"];
console.log(`Timeout: ${timeout}`);
await sleep(Number(timeout) * 1000 + 1000);
} else {
await sleep(RETRY_SLEEP_TIME);
}
}
}
const response = await axios.get<T>(url);
return response.data;
}
const fetchAndValidate = async <T = unknown>(
url: string,
validator: ValidateFunction<T>
): Promise<T> => {
console.log(url);
const data = await fetchWithRetries<object>(url);
if (validator(data)) {
return data;
}
throw new Error(
`Response validation for url ${url} failed: ` +
JSON.stringify(validator.errors)
);
};
export async function fetchApiQuestions(
next: string
): Promise<ApiMultipleQuestions> {
return await fetchAndValidate(next, validateApiMultipleQuestions);
}
export async function fetchSingleApiQuestion(id: number): Promise<ApiQuestion> {
return await fetchAndValidate(
`https://www.metaculus.com/api2/questions/${id}/`,
validateApiQuestion
);
}

View File

@ -0,0 +1,184 @@
import { FetchedQuestion, Platform } from "..";
import { average } from "../../../utils";
import { sleep } from "../../utils/sleep";
import {
ApiCommon,
ApiMultipleQuestions,
ApiPredictable,
ApiQuestion,
fetchApiQuestions,
fetchSingleApiQuestion,
} from "./api";
const platformName = "metaculus";
const now = new Date().toISOString();
const SLEEP_TIME = 2500;
async function apiQuestionToFetchedQuestions(
apiQuestion: ApiQuestion
): Promise<FetchedQuestion[]> {
// one item can expand:
// - to 0 questions if we don't want it;
// - to 1 question if it's a simple forecast
// - to multiple questions if it's a group (see https://github.com/quantified-uncertainty/metaforecast/pull/84 for details)
await sleep(SLEEP_TIME);
const skip = (q: ApiPredictable): boolean => {
if (q.publish_time > now || now > q.resolve_time) {
return true;
}
if (q.number_of_predictions < 10) {
return true;
}
return false;
};
const buildFetchedQuestion = (
q: ApiPredictable & ApiCommon
): Omit<FetchedQuestion, "url" | "description" | "title"> => {
const isBinary = q.possibilities.type === "binary";
let options: FetchedQuestion["options"] = [];
if (isBinary) {
const probability = Number(q.community_prediction.full.q2);
options = [
{
name: "Yes",
probability: probability,
type: "PROBABILITY",
},
{
name: "No",
probability: 1 - probability,
type: "PROBABILITY",
},
];
}
return {
id: `${platformName}-${q.id}`,
options,
qualityindicators: {
numforecasts: q.number_of_predictions,
},
extra: {
resolution_data: {
publish_time: apiQuestion.publish_time,
resolution: apiQuestion.resolution,
close_time: apiQuestion.close_time,
resolve_time: apiQuestion.resolve_time,
},
},
};
};
if (apiQuestion.type === "group") {
const apiQuestionDetails = await fetchSingleApiQuestion(apiQuestion.id);
return apiQuestion.sub_questions
.filter((q) => !skip(q))
.map((sq) => {
const tmp = buildFetchedQuestion(sq);
return {
...tmp,
title: `${apiQuestion.title} (${sq.title})`,
description: apiQuestionDetails.description || "",
url: `https://www.metaculus.com${apiQuestion.page_url}?sub-question=${sq.id}`,
};
});
} else if (apiQuestion.type === "forecast") {
const apiQuestionDetails = await fetchSingleApiQuestion(apiQuestion.id);
if (apiQuestion.group) {
return []; // sub-question, should be handled on the group level
}
if (skip(apiQuestion)) {
return [];
}
const tmp = buildFetchedQuestion(apiQuestion);
return [
{
...tmp,
title: apiQuestion.title,
description: apiQuestionDetails.description || "",
url: "https://www.metaculus.com" + apiQuestion.page_url,
},
];
} else {
if (apiQuestion.type !== "claim") {
// should never happen, since `discriminator` in JTD schema causes a strict runtime check
console.log(
`Unknown metaculus question type: ${
(apiQuestion as any).type
}, skipping`
);
}
return [];
}
}
export const metaculus: Platform<"id" | "debug"> = {
name: platformName,
label: "Metaculus",
color: "#006669",
version: "v2",
fetcherArgs: ["id", "debug"],
async fetcher(opts) {
let allQuestions: FetchedQuestion[] = [];
if (opts.args?.id) {
const id = Number(opts.args.id);
const apiQuestion = await fetchSingleApiQuestion(id);
const questions = await apiQuestionToFetchedQuestions(apiQuestion);
console.log(questions);
return {
questions,
partial: true,
};
}
let next: string | null = "https://www.metaculus.com/api2/questions/";
let i = 1;
while (next) {
if (i % 20 === 0) {
console.log(`Sleeping for ${SLEEP_TIME}ms`);
await sleep(SLEEP_TIME);
}
console.log(`\nQuery #${i} - ${next}`);
const apiQuestions: ApiMultipleQuestions = await fetchApiQuestions(next);
const results = apiQuestions.results;
let j = false;
for (const result of results) {
const questions = await apiQuestionToFetchedQuestions(result);
for (const question of questions) {
console.log(`- ${question.title}`);
if ((!j && i % 20 === 0) || opts.args?.debug) {
console.log(question);
j = true;
}
allQuestions.push(question);
}
}
next = apiQuestions.next;
i += 1;
}
return {
questions: allQuestions,
partial: false,
};
},
calculateStars(data) {
const { numforecasts } = data.qualityindicators;
const nuno = () =>
(numforecasts || 0) > 300 ? 4 : (numforecasts || 0) > 100 ? 3 : 2;
const eli = () => 3;
const misha = () => 3;
const starsDecimal = average([nuno(), eli(), misha()]);
const starsInteger = Math.round(starsDecimal);
return starsInteger;
},
};