Schedule updating user recommendations. Compute using tf-idf.

This commit is contained in:
James Grugett 2022-04-27 21:27:31 -04:00
parent d639e73284
commit 57fb9baed6
4 changed files with 152 additions and 4 deletions

View File

@ -1,5 +1,7 @@
import * as _ from 'lodash'
import { Bet } from './bet'
import { Contract } from './contract'
import { ClickEvent } from './tracking'
import { filterDefined } from './util/array'
import { addObjects } from './util/object'
@ -92,3 +94,85 @@ const contractsToWordFrequency = (contracts: Contract[]) => {
return toFrequency(frequencySum)
}
export const getWordScores = (
contracts: Contract[],
contractViewCounts: { [contractId: string]: number },
clicks: ClickEvent[],
bets: Bet[]
) => {
const contractClicks = _.groupBy(clicks, (click) => click.contractId)
const contractBets = _.groupBy(bets, (bet) => bet.contractId)
const yourContracts = contracts.filter(
(c) =>
contractViewCounts[c.id] || contractClicks[c.id] || contractBets[c.id]
)
const yourTfIdf = calculateContractTfIdf(yourContracts)
const contractWordScores = _.mapValues(
yourTfIdf,
(wordsTfIdf, contractId) => {
const viewCount = contractViewCounts[contractId] ?? 0
const clickCount = contractClicks[contractId]?.length ?? 0
const betCount = contractBets[contractId]?.length ?? 0
const factor =
-1 * Math.log(viewCount + 1) +
3 * Math.log(clickCount + 1) +
10 * Math.log(betCount + 1)
return _.mapValues(wordsTfIdf, (tfIdf) => tfIdf * factor)
}
)
const wordScores = Object.values(contractWordScores).reduce(addObjects, {})
console.log(
'your word scores',
_.sortBy(_.toPairs(wordScores), ([, score]) => -score).slice(0, 10)
)
return wordScores
}
export function getContractScores(
contracts: Contract[],
wordScores: { [word: string]: number }
) {
const scorePairs = contracts.map((contract) => {
const wordFrequency = contractToWordFrequency(contract)
const score = _.sumBy(Object.keys(wordFrequency), (word) => {
const wordFreq = wordFrequency[word] ?? 0
const weight = wordScores[word] ?? 0
return wordFreq * weight
})
return [contract.id, score] as [string, number]
})
return _.fromPairs(scorePairs)
}
// Caluculate Term Frequency-Inverse Document Frequency (TF-IDF):
// https://medium.datadriveninvestor.com/tf-idf-in-natural-language-processing-8db8ef4a7736
function calculateContractTfIdf(contracts: Contract[]) {
const contractFreq = contracts.map((c) => contractToWordFrequency(c))
const contractWords = contractFreq.map((freq) => Object.keys(freq))
const wordsCount: { [word: string]: number } = {}
for (const words of contractWords) {
for (const word of words) {
wordsCount[word] = (wordsCount[word] ?? 0) + 1
}
}
const wordIdf = _.mapValues(wordsCount, (count) =>
Math.log(contracts.length / count)
)
const contractWordsTfIdf = _.map(contractFreq, (wordFreq) =>
_.mapValues(wordFreq, (freq, word) => freq * wordIdf[word])
)
return _.fromPairs(contracts.map((c, i) => [c.id, contractWordsTfIdf[i]]))
}

View File

@ -19,6 +19,7 @@ export * from './on-view'
export * from './unsubscribe'
export * from './update-contract-metrics'
export * from './update-user-metrics'
export * from './update-recommendations'
export * from './backup-db'
export * from './change-user-info'
export * from './market-close-emails'

View File

@ -0,0 +1,57 @@
import * as functions from 'firebase-functions'
import * as admin from 'firebase-admin'
import * as _ from 'lodash'
import { getValue, getValues } from './utils'
import { Contract } from '../../common/contract'
import { Bet } from '../../common/bet'
import { User } from '../../common/user'
import { ClickEvent } from '../../common/tracking'
import {
getContractScores,
getWordScores,
} from '../../common/recommended-contracts'
const firestore = admin.firestore()
export const updateRecommendations = functions.pubsub
.schedule('every 24 hours')
.onRun(async () => {
const contracts = await getValues<Contract>(
firestore.collection('contracts')
)
const users = await getValues<User>(firestore.collection('users'))
for (const user of users) await updateUserRecommendations(user, contracts)
})
export const updateUserRecommendations = async (
user: User,
contracts: Contract[]
) => {
const [bets, viewCounts, clicks] = await Promise.all([
getValues<Bet>(
firestore.collectionGroup('bets').where('userId', '==', user.id)
),
getValue<{ [contractId: string]: number }>(
firestore.doc(`private-users/${user.id}/cached/viewCounts`)
),
getValues<ClickEvent>(
firestore
.collection(`private-users/${user.id}/events`)
.where('type', '==', 'click')
),
])
const wordScores = getWordScores(contracts, viewCounts ?? {}, clicks, bets)
const contractScores = getContractScores(contracts, wordScores)
const cachedCollection = firestore.collection(
`private-users/${user.id}/cached`
)
await cachedCollection.doc('wordScores').set(wordScores)
await cachedCollection.doc('contractScores').set(contractScores)
}

View File

@ -6,27 +6,33 @@ import { PrivateUser, User } from '../../common/user'
export const isProd =
admin.instanceId().app.options.projectId === 'mantic-markets'
export const getValue = async <T>(collection: string, doc: string) => {
export const getDoc = async <T>(collection: string, doc: string) => {
const snap = await admin.firestore().collection(collection).doc(doc).get()
return snap.exists ? (snap.data() as T) : undefined
}
export const getValue = async <T>(ref: admin.firestore.DocumentReference) => {
const snap = await ref.get()
return snap.exists ? (snap.data() as T) : undefined
}
export const getValues = async <T>(query: admin.firestore.Query) => {
const snap = await query.get()
return snap.docs.map((doc) => doc.data() as T)
}
export const getContract = (contractId: string) => {
return getValue<Contract>('contracts', contractId)
return getDoc<Contract>('contracts', contractId)
}
export const getUser = (userId: string) => {
return getValue<User>('users', userId)
return getDoc<User>('users', userId)
}
export const getPrivateUser = (userId: string) => {
return getValue<PrivateUser>('private-users', userId)
return getDoc<PrivateUser>('private-users', userId)
}
export const getUserByUsername = async (username: string) => {