feat: firstSeen, new questions in graphql

This commit is contained in:
Vyacheslav Matyukhin 2022-05-21 00:59:33 +04:00
parent 159f9c2b45
commit 5fdea80f8b
No known key found for this signature in database
GPG Key ID: 3D2A774C5489F96C
19 changed files with 240 additions and 100 deletions

View File

@ -0,0 +1,28 @@
-- questions
ALTER TABLE "questions"
ADD COLUMN "fetched" TIMESTAMP(6),
ADD COLUMN "first_seen" TIMESTAMP(6);
UPDATE "questions"
SET "fetched" = "timestamp", "first_seen" = "timestamp";
ALTER TABLE "questions"
ALTER COLUMN "fetched" SET NOT NULL,
ALTER COLUMN "first_seen" SET NOT NULL;
-- history
ALTER TABLE "history"
ADD COLUMN "fetched" TIMESTAMP(6);
UPDATE "history" SET "fetched" = "timestamp";
ALTER TABLE "history"
ALTER COLUMN "fetched" SET NOT NULL;
-- populate first_seen
UPDATE questions
SET "first_seen" = h.fs
FROM (
SELECT id, MIN(fetched) AS fs FROM history GROUP BY id
) as h
WHERE questions.id = h.id;

View File

@ -0,0 +1,14 @@
-- CreateIndex
CREATE INDEX "history_platform_idx" ON "history"("platform");
-- CreateIndex
CREATE INDEX "history_fetched_idx" ON "history"("fetched");
-- CreateIndex
CREATE INDEX "questions_platform_idx" ON "questions"("platform");
-- CreateIndex
CREATE INDEX "questions_fetched_idx" ON "questions"("fetched");
-- CreateIndex
CREATE INDEX "questions_first_seen_idx" ON "questions"("first_seen");

View File

@ -24,24 +24,6 @@ model Dashboard {
@@map("dashboards")
}
model History {
id String
idref String?
question Question? @relation(fields: [idref], references: [id], onDelete: SetNull, onUpdate: Restrict)
title String
url String
platform String
description String
options Json
timestamp DateTime @db.Timestamp(6)
qualityindicators Json
extra Json
pk Int @id @default(autoincrement())
@@index([id])
@@map("history")
}
model Question {
/// E.g. "fantasyscotus-580"
id String @id
@ -68,7 +50,9 @@ model Question {
// }
// ]
options Json
timestamp DateTime @db.Timestamp(6)
timestamp DateTime @db.Timestamp(6) // deprecated
fetched DateTime @db.Timestamp(6)
firstSeen DateTime @map("first_seen") @db.Timestamp(6)
// {
// "numforecasts": 120,
@ -80,9 +64,33 @@ model Question {
onFrontpage FrontpageId?
history History[]
@@index([platform])
@@index([fetched])
@@index([firstSeen])
@@map("questions")
}
model History {
id String
idref String?
question Question? @relation(fields: [idref], references: [id], onDelete: SetNull, onUpdate: Restrict)
title String
url String
platform String
description String
options Json
timestamp DateTime @db.Timestamp(6) // deprecated
fetched DateTime @db.Timestamp(6)
qualityindicators Json
extra Json
pk Int @id @default(autoincrement())
@@index([id])
@@index([platform])
@@index([fetched])
@@map("history")
}
model FrontpageId {
question Question @relation(fields: [id], references: [id])
id String @unique

View File

@ -1,9 +1,12 @@
import { platforms } from "../platforms/registry";
import { getPlatforms } from "../platforms/registry";
import { executeJobByName } from "./jobs";
/* Do everything */
export async function doEverything() {
let jobNames = [...platforms.map((platform) => platform.name), "algolia"];
let jobNames = [
...getPlatforms().map((platform) => platform.name),
"algolia",
];
console.log("");
console.log("");

View File

@ -1,7 +1,7 @@
import { doEverything } from "../flow/doEverything";
import { rebuildFrontpage } from "../frontpage";
import { processPlatform } from "../platforms";
import { platforms } from "../platforms/registry";
import { getPlatforms } from "../platforms/registry";
import { rebuildAlgoliaDatabase } from "../utils/algolia";
import { sleep } from "../utils/sleep";
@ -14,7 +14,7 @@ interface Job<ArgNames extends string = ""> {
}
export const jobs: Job<string>[] = [
...platforms.map((platform) => ({
...getPlatforms().map((platform) => ({
name: platform.name,
message: `Download predictions from ${platform.name}`,
...(platform.version === "v2" ? { args: platform.fetcherArgs } : {}),

View File

@ -26,7 +26,7 @@ export async function rebuildFrontpage() {
AND questions.description != ''
AND JSONB_ARRAY_LENGTH(questions.options) > 0
GROUP BY questions.id
HAVING COUNT(DISTINCT history.timestamp) >= 7
HAVING COUNT(DISTINCT history.fetched) >= 7
ORDER BY RANDOM() LIMIT 50
`;

View File

@ -79,7 +79,7 @@ export const givewellopenphil: Platform = {
const dataWithDate = data.map((datum: any) => ({
...datum,
platform: platformName,
timestamp: new Date("2021-02-23"),
// timestamp: new Date("2021-02-23"),
}));
return dataWithDate;
},

View File

@ -2,9 +2,8 @@ import axios from "axios";
import { Question } from "@prisma/client";
import { prisma } from "../database/prisma";
import { AlgoliaQuestion } from "../utils/algolia";
import { FetchedQuestion, Platform, prepareQuestion } from "./";
import { AlgoliaQuestion, questionToAlgoliaQuestion } from "../utils/algolia";
import { FetchedQuestion, Platform, prepareQuestion, upsertSingleQuestion } from "./";
/* Definitions */
const searchEndpoint =
@ -55,10 +54,12 @@ async function search(query: string): Promise<AlgoliaQuestion[]> {
const models: any[] = response.data.hits;
const mappedModels: AlgoliaQuestion[] = models.map((model) => {
const q = modelToQuestion(model);
return {
return questionToAlgoliaQuestion({
...q,
timestamp: String(q.timestamp),
};
fetched: new Date(),
timestamp: new Date(),
firstSeen: new Date(),
});
});
// filter for duplicates. Surprisingly common.
@ -76,12 +77,8 @@ async function search(query: string): Promise<AlgoliaQuestion[]> {
const fetchQuestion = async (id: number): Promise<Question> => {
const response = await axios({ url: `${apiEndpoint}/spaces/${id}` });
let q = modelToQuestion(response.data);
return await prisma.question.upsert({
where: { id: q.id },
create: q,
update: q,
});
const q = modelToQuestion(response.data);
return await upsertSingleQuestion(q);
};
export const guesstimate: Platform & {

View File

@ -28,9 +28,14 @@ export interface QualityIndicators {
export type FetchedQuestion = Omit<
Question,
"extra" | "qualityindicators" | "timestamp" | "platform" | "options"
| "extra"
| "qualityindicators"
| "fetched"
| "firstSeen"
| "timestamp"
| "platform"
| "options"
> & {
timestamp?: Date;
extra?: object; // required in DB but annoying to return empty; also this is slightly stricter than Prisma's JsonValue
options: QuestionOption[]; // stronger type than Prisma's JsonValue
qualityindicators: Omit<QualityIndicators, "stars">; // slightly stronger type than Prisma's JsonValue
@ -78,8 +83,14 @@ export type Platform<ArgNames extends string = ""> = {
// So here we build a new type which should be ok to use both in place of prisma's Question type and as an input to its update or create methods.
type PreparedQuestion = Omit<
Question,
"extra" | "qualityindicators" | "options"
| "extra"
| "qualityindicators"
| "options"
| "fetched"
| "firstSeen"
| "timestamp"
> & {
fetched: Date;
extra: NonNullable<Question["extra"]>;
qualityindicators: NonNullable<Question["qualityindicators"]>;
options: NonNullable<Question["options"]>;
@ -91,8 +102,8 @@ export const prepareQuestion = (
): PreparedQuestion => {
return {
extra: {},
timestamp: new Date(),
...q,
fetched: new Date(),
platform: platform.name,
qualityindicators: {
...q.qualityindicators,
@ -101,6 +112,21 @@ export const prepareQuestion = (
};
};
export const upsertSingleQuestion = async (
q: PreparedQuestion
): Promise<Question> => {
return await prisma.question.upsert({
where: { id: q.id },
create: {
...q,
firstSeen: new Date(),
timestamp: q.fetched, // deprecated
},
update: q,
});
// TODO - update history?
};
export const processPlatform = async <T extends string = "">(
platform: Platform<T>,
args?: { [k in T]: string }
@ -144,9 +170,9 @@ export const processPlatform = async <T extends string = "">(
for (const q of fetchedQuestions.map((q) => prepareQuestion(q, platform))) {
if (oldIdsSet.has(q.id)) {
// TODO - check if question has changed for better performance
updatedQuestions.push(q);
} else {
// TODO - check if question has changed for better performance
createdQuestions.push(q);
}
}
@ -154,7 +180,11 @@ export const processPlatform = async <T extends string = "">(
const stats: { created?: number; updated?: number; deleted?: number } = {};
await prisma.question.createMany({
data: createdQuestions,
data: createdQuestions.map((q) => ({
...q,
firstSeen: new Date(),
timestamp: q.fetched, // deprecated
})),
});
stats.created = createdQuestions.length;
@ -181,6 +211,7 @@ export const processPlatform = async <T extends string = "">(
await prisma.history.createMany({
data: [...createdQuestions, ...updatedQuestions].map((q) => ({
...q,
timestamp: q.fetched, // deprecated
idref: q.id,
})),
});

View File

@ -17,7 +17,9 @@ import { smarkets } from "./smarkets";
import { wildeford } from "./wildeford";
import { xrisk } from "./xrisk";
export const platforms: Platform<string>[] = [
// function instead of const array, this helps to fight circular dependencies
export const getPlatforms = (): Platform<string>[] => {
return [
betfair,
fantasyscotus,
foretold,
@ -35,12 +37,23 @@ export const platforms: Platform<string>[] = [
smarkets,
wildeford,
xrisk,
];
];
};
let _nameToLabelCache: { [k: string]: string } | undefined;
export function platformNameToLabel(name: string): string {
if (!_nameToLabelCache) {
_nameToLabelCache = Object.fromEntries(
getPlatforms().map((platform) => [platform.name, platform.label])
);
}
return _nameToLabelCache[name] || name;
}
// get frontend-safe version of platforms data
export const getPlatformsConfig = (): PlatformConfig[] => {
const platformsConfig = platforms.map((platform) => ({
const platformsConfig = getPlatforms().map((platform) => ({
name: platform.name,
label: platform.label,
color: platform.color,

View File

@ -162,7 +162,6 @@ async function processEventMarkets(event: any, ctx: Context) {
url: "https://smarkets.com/event/" + market.event_id + market.slug,
description: market.description,
options,
timestamp: new Date(),
qualityindicators: {},
extra: {
contracts,

View File

@ -96,7 +96,8 @@ async function processPredictions(
url: prediction["url"],
description: prediction["Notes"] || "",
options,
timestamp: new Date(Date.parse(prediction["Prediction Date"] + "Z")),
//// TODO - use `created` field for this
// timestamp: new Date(Date.parse(prediction["Prediction Date"] + "Z")),
qualityindicators: {},
};
return result;

View File

@ -3,16 +3,23 @@ import algoliasearch from "algoliasearch";
import { Question } from "@prisma/client";
import { prisma } from "../database/prisma";
import { platforms } from "../platforms/registry";
import { platformNameToLabel } from "../platforms/registry";
let cookie = process.env.ALGOLIA_MASTER_API_KEY || "";
const cookie = process.env.ALGOLIA_MASTER_API_KEY || "";
const algoliaAppId = process.env.NEXT_PUBLIC_ALGOLIA_APP_ID || "";
const client = algoliasearch(algoliaAppId, cookie);
const index = client.initIndex("metaforecast");
export type AlgoliaQuestion = Omit<Question, "timestamp"> & {
timestamp: string;
export type AlgoliaQuestion = Omit<
Question,
"fetched" | "firstSeen" | "timestamp"
> & {
timestamp?: string; // deprecated
fetched?: string;
firstSeen?: string;
optionsstringforsearch?: string;
platformLabel: string;
objectID: string;
};
const getoptionsstringforsearch = (record: Question): string => {
@ -26,23 +33,24 @@ const getoptionsstringforsearch = (record: Question): string => {
return result;
};
export const questionToAlgoliaQuestion = (
question: Question
): AlgoliaQuestion => {
return {
...question,
fetched: question.fetched.toISOString(),
timestamp: question.timestamp.toISOString(), // deprecated
firstSeen: question.firstSeen.toISOString(),
platformLabel: platformNameToLabel(question.platform),
objectID: question.id,
optionsstringforsearch: getoptionsstringforsearch(question),
};
};
export async function rebuildAlgoliaDatabase() {
const questions = await prisma.question.findMany();
const platformNameToLabel = Object.fromEntries(
platforms.map((platform) => [platform.name, platform.label])
);
const records: AlgoliaQuestion[] = questions.map(
(question, index: number) => ({
...question,
timestamp: `${question.timestamp}`,
platformLabel:
platformNameToLabel[question.platform] || question.platform,
objectID: index,
optionsstringforsearch: getoptionsstringforsearch(question),
})
);
const records: AlgoliaQuestion[] = questions.map(questionToAlgoliaQuestion);
if (await index.exists()) {
console.log("Index exists");

View File

@ -1,5 +1,5 @@
import { prisma } from "../../backend/database/prisma";
import { platforms } from "../../backend/platforms/registry";
import { getPlatforms } from "../../backend/platforms/registry";
import { builder } from "../builder";
export const PlatformObj = builder.objectRef<string>("Platform").implement({
@ -20,7 +20,7 @@ export const PlatformObj = builder.objectRef<string>("Platform").implement({
return "Guesstimate";
}
// kinda slow and repetitive, TODO - store a map {name => platform} somewhere and `getPlatform` util function?
const platform = platforms.find((p) => p.name === platformName);
const platform = getPlatforms().find((p) => p.name === platformName);
if (!platform) {
throw new Error(`Unknown platform ${platformName}`);
}
@ -36,10 +36,10 @@ export const PlatformObj = builder.objectRef<string>("Platform").implement({
platform: platformName,
},
_max: {
timestamp: true,
fetched: true,
},
});
return res._max.timestamp;
return res._max.fetched;
},
}),
}),
@ -49,7 +49,7 @@ builder.queryField("platforms", (t) =>
t.field({
type: [PlatformObj],
resolve: async (parent, args) => {
return platforms.map((platform) => platform.name);
return getPlatforms().map((platform) => platform.name);
},
})
);

View File

@ -70,8 +70,16 @@ const QuestionShapeInterface = builder
}),
timestamp: t.field({
type: "Date",
description: "Timestamp at which metaforecast fetched the question",
resolve: (parent) => parent.timestamp,
description:
"Last timestamp at which metaforecast fetched the question",
deprecationReason: "Renamed to `fetched`",
resolve: (parent) => parent.fetched,
}),
fetched: t.field({
type: "Date",
description:
"Last timestamp at which metaforecast fetched the question",
resolve: (parent) => parent.fetched,
}),
qualityIndicators: t.field({
type: QualityIndicatorsObj,
@ -114,10 +122,15 @@ export const QuestionObj = builder.prismaObject("Question", {
resolve: (parent) => (parent.extra as any)?.visualization, // used for guesstimate only, see searchGuesstimate.ts
nullable: true,
}),
firstSeen: t.field({
type: "Date",
description: "First timestamp at which metaforecast fetched the question",
resolve: (parent) => parent.firstSeen,
}),
history: t.relation("history", {
query: () => ({
orderBy: {
timestamp: "asc",
fetched: "asc",
},
}),
}),
@ -130,7 +143,21 @@ builder.queryField("questions", (t) =>
type: "Question",
cursor: "id",
maxSize: 1000,
resolve: (query) => prisma.question.findMany({ ...query }),
args: {
orderBy: t.arg({
type: builder.enumType("QuestionsOrderBy", {
values: ["FIRST_SEEN_DESC"] as const,
}),
}),
},
resolve: (query, parent, args) => {
return prisma.question.findMany({
...query,
...(args.orderBy === "FIRST_SEEN_DESC"
? { orderBy: [{ firstSeen: "desc" }, { id: "asc" }] }
: {}), // TODO - explicit default order?
});
},
},
{},
{}

View File

@ -63,7 +63,15 @@ builder.queryField("searchQuestions", (t) =>
return results.map((q) => ({
...q,
timestamp: new Date(q.timestamp),
fetched: new Date(
q.fetched || q.timestamp || new Date().toISOString() // q.timestamp is deprecated, TODO - just use `q.fetched`
),
timestamp: new Date(
q.fetched || q.timestamp || new Date().toISOString()
),
firstSeen: new Date(
q.firstSeen || new Date().toISOString() // TODO - q.firstSeen is not yet populated in algolia
),
}));
},
})

View File

@ -1,7 +1,7 @@
import { GetServerSideProps, NextPage } from "next";
import React from "react";
import { getPlatformsConfig, platforms } from "../backend/platforms/registry";
import { getPlatforms, getPlatformsConfig } from "../backend/platforms/registry";
import { Layout } from "../web/common/Layout";
import { Props, QueryParameters, SearchScreen } from "../web/search/components/SearchScreen";
import { FrontpageDocument, SearchDocument } from "../web/search/queries.generated";
@ -19,7 +19,7 @@ export const getServerSideProps: GetServerSideProps<Props> = async (
query: "",
starsThreshold: 2,
forecastsThreshold: 0,
forecastingPlatforms: platforms.map((platform) => platform.name),
forecastingPlatforms: getPlatforms().map((platform) => platform.name),
};
const initialQueryParameters: QueryParameters = {

View File

@ -3,7 +3,7 @@
import { GetServerSideProps, NextPage } from "next";
import React from "react";
import { platforms } from "../backend/platforms/registry";
import { getPlatforms } from "../backend/platforms/registry";
import { QuestionFragment } from "../web/fragments.generated";
import { QuestionCard } from "../web/questions/components/QuestionCard";
import { SearchDocument } from "../web/search/queries.generated";
@ -23,7 +23,7 @@ export const getServerSideProps: GetServerSideProps<Props> = async (
query: "",
starsThreshold: 2,
forecastsThreshold: 0,
forecastingPlatforms: platforms.map((platform) => platform.name),
forecastingPlatforms: getPlatforms().map((platform) => platform.name),
...urlQuery,
};

View File

@ -96,6 +96,7 @@ export default async function searchWithAlgolia({
title: "No search results match your query",
url: "https://metaforecast.org",
platform: "metaforecast",
platformLabel: "metaforecast",
description: "Maybe try a broader query?",
options: [
{
@ -109,7 +110,7 @@ export default async function searchWithAlgolia({
type: "PROBABILITY",
},
],
timestamp: `${new Date().toISOString().slice(0, 10)}`,
fetched: new Date().toISOString(),
qualityindicators: {
numforecasts: 1,
numforecasters: 1,
@ -126,8 +127,10 @@ export default async function searchWithAlgolia({
title: `Did you mean: ${queryString}?`,
url: "https://metaforecast.org/recursion?bypassEasterEgg=true",
platform: "metaforecast",
platformLabel: "metaforecast",
description:
"Fatal error: Too much recursion. Click to proceed anyways",
fetched: new Date().toISOString(),
options: [
{
name: "Yes",
@ -140,7 +143,6 @@ export default async function searchWithAlgolia({
type: "PROBABILITY",
},
],
timestamp: `${new Date().toISOString().slice(0, 10)}`,
qualityindicators: {
numforecasts: 1,
numforecasters: 1,
@ -161,6 +163,7 @@ export default async function searchWithAlgolia({
title: "No search results appear to match your query",
url: "https://metaforecast.org",
platform: "metaforecast",
platformLabel: "metaforecast",
description: "Maybe try a broader query? That said, we could be wrong.",
options: [
{
@ -174,7 +177,7 @@ export default async function searchWithAlgolia({
type: "PROBABILITY",
},
],
timestamp: `${new Date().toISOString().slice(0, 10)}`,
fetched: new Date().toISOString(),
qualityindicators: {
numforecasts: 1,
numforecasters: 1,