From 57fb9baed60924f3cc1806fd51f238cdd45623c3 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Wed, 27 Apr 2022 21:27:31 -0400 Subject: [PATCH] Schedule updating user recommendations. Compute using tf-idf. --- common/recommended-contracts.ts | 84 +++++++++++++++++++++++++ functions/src/index.ts | 1 + functions/src/update-recommendations.ts | 57 +++++++++++++++++ functions/src/utils.ts | 14 +++-- 4 files changed, 152 insertions(+), 4 deletions(-) create mode 100644 functions/src/update-recommendations.ts diff --git a/common/recommended-contracts.ts b/common/recommended-contracts.ts index 7de2e501..ad85a4de 100644 --- a/common/recommended-contracts.ts +++ b/common/recommended-contracts.ts @@ -1,5 +1,7 @@ import * as _ from 'lodash' +import { Bet } from './bet' import { Contract } from './contract' +import { ClickEvent } from './tracking' import { filterDefined } from './util/array' import { addObjects } from './util/object' @@ -92,3 +94,85 @@ const contractsToWordFrequency = (contracts: Contract[]) => { return toFrequency(frequencySum) } + +export const getWordScores = ( + contracts: Contract[], + contractViewCounts: { [contractId: string]: number }, + clicks: ClickEvent[], + bets: Bet[] +) => { + const contractClicks = _.groupBy(clicks, (click) => click.contractId) + const contractBets = _.groupBy(bets, (bet) => bet.contractId) + + const yourContracts = contracts.filter( + (c) => + contractViewCounts[c.id] || contractClicks[c.id] || contractBets[c.id] + ) + const yourTfIdf = calculateContractTfIdf(yourContracts) + + const contractWordScores = _.mapValues( + yourTfIdf, + (wordsTfIdf, contractId) => { + const viewCount = contractViewCounts[contractId] ?? 0 + const clickCount = contractClicks[contractId]?.length ?? 0 + const betCount = contractBets[contractId]?.length ?? 0 + + const factor = + -1 * Math.log(viewCount + 1) + + 3 * Math.log(clickCount + 1) + + 10 * Math.log(betCount + 1) + + return _.mapValues(wordsTfIdf, (tfIdf) => tfIdf * factor) + } + ) + + const wordScores = Object.values(contractWordScores).reduce(addObjects, {}) + + console.log( + 'your word scores', + _.sortBy(_.toPairs(wordScores), ([, score]) => -score).slice(0, 10) + ) + + return wordScores +} + +export function getContractScores( + contracts: Contract[], + wordScores: { [word: string]: number } +) { + const scorePairs = contracts.map((contract) => { + const wordFrequency = contractToWordFrequency(contract) + + const score = _.sumBy(Object.keys(wordFrequency), (word) => { + const wordFreq = wordFrequency[word] ?? 0 + const weight = wordScores[word] ?? 0 + return wordFreq * weight + }) + + return [contract.id, score] as [string, number] + }) + + return _.fromPairs(scorePairs) +} + +// Caluculate Term Frequency-Inverse Document Frequency (TF-IDF): +// https://medium.datadriveninvestor.com/tf-idf-in-natural-language-processing-8db8ef4a7736 +function calculateContractTfIdf(contracts: Contract[]) { + const contractFreq = contracts.map((c) => contractToWordFrequency(c)) + const contractWords = contractFreq.map((freq) => Object.keys(freq)) + + const wordsCount: { [word: string]: number } = {} + for (const words of contractWords) { + for (const word of words) { + wordsCount[word] = (wordsCount[word] ?? 0) + 1 + } + } + + const wordIdf = _.mapValues(wordsCount, (count) => + Math.log(contracts.length / count) + ) + const contractWordsTfIdf = _.map(contractFreq, (wordFreq) => + _.mapValues(wordFreq, (freq, word) => freq * wordIdf[word]) + ) + return _.fromPairs(contracts.map((c, i) => [c.id, contractWordsTfIdf[i]])) +} diff --git a/functions/src/index.ts b/functions/src/index.ts index 541d53ad..d87e88f9 100644 --- a/functions/src/index.ts +++ b/functions/src/index.ts @@ -19,6 +19,7 @@ export * from './on-view' export * from './unsubscribe' export * from './update-contract-metrics' export * from './update-user-metrics' +export * from './update-recommendations' export * from './backup-db' export * from './change-user-info' export * from './market-close-emails' diff --git a/functions/src/update-recommendations.ts b/functions/src/update-recommendations.ts new file mode 100644 index 00000000..2bbba46f --- /dev/null +++ b/functions/src/update-recommendations.ts @@ -0,0 +1,57 @@ +import * as functions from 'firebase-functions' +import * as admin from 'firebase-admin' +import * as _ from 'lodash' + +import { getValue, getValues } from './utils' +import { Contract } from '../../common/contract' +import { Bet } from '../../common/bet' +import { User } from '../../common/user' +import { ClickEvent } from '../../common/tracking' +import { + getContractScores, + getWordScores, +} from '../../common/recommended-contracts' + +const firestore = admin.firestore() + +export const updateRecommendations = functions.pubsub + .schedule('every 24 hours') + .onRun(async () => { + const contracts = await getValues( + firestore.collection('contracts') + ) + + const users = await getValues(firestore.collection('users')) + + for (const user of users) await updateUserRecommendations(user, contracts) + }) + +export const updateUserRecommendations = async ( + user: User, + contracts: Contract[] +) => { + const [bets, viewCounts, clicks] = await Promise.all([ + getValues( + firestore.collectionGroup('bets').where('userId', '==', user.id) + ), + + getValue<{ [contractId: string]: number }>( + firestore.doc(`private-users/${user.id}/cached/viewCounts`) + ), + + getValues( + firestore + .collection(`private-users/${user.id}/events`) + .where('type', '==', 'click') + ), + ]) + + const wordScores = getWordScores(contracts, viewCounts ?? {}, clicks, bets) + const contractScores = getContractScores(contracts, wordScores) + + const cachedCollection = firestore.collection( + `private-users/${user.id}/cached` + ) + await cachedCollection.doc('wordScores').set(wordScores) + await cachedCollection.doc('contractScores').set(contractScores) +} diff --git a/functions/src/utils.ts b/functions/src/utils.ts index 88c25570..28ef5445 100644 --- a/functions/src/utils.ts +++ b/functions/src/utils.ts @@ -6,27 +6,33 @@ import { PrivateUser, User } from '../../common/user' export const isProd = admin.instanceId().app.options.projectId === 'mantic-markets' -export const getValue = async (collection: string, doc: string) => { +export const getDoc = async (collection: string, doc: string) => { const snap = await admin.firestore().collection(collection).doc(doc).get() return snap.exists ? (snap.data() as T) : undefined } +export const getValue = async (ref: admin.firestore.DocumentReference) => { + const snap = await ref.get() + + return snap.exists ? (snap.data() as T) : undefined +} + export const getValues = async (query: admin.firestore.Query) => { const snap = await query.get() return snap.docs.map((doc) => doc.data() as T) } export const getContract = (contractId: string) => { - return getValue('contracts', contractId) + return getDoc('contracts', contractId) } export const getUser = (userId: string) => { - return getValue('users', userId) + return getDoc('users', userId) } export const getPrivateUser = (userId: string) => { - return getValue('private-users', userId) + return getDoc('private-users', userId) } export const getUserByUsername = async (username: string) => {