Make bet denormalization script fast enough to run it on prod

This commit is contained in:
Marshall Polaris 2022-09-14 00:59:42 -07:00
parent 34bf8d771f
commit 298bc04517
2 changed files with 38 additions and 54 deletions

View File

@ -1,45 +1,23 @@
// Filling in the contract-based fields on comments.
// Filling in the user-based fields on bets.
import * as admin from 'firebase-admin'
import { initAdmin } from './script-init'
import { findDiffs, describeDiff, applyDiff } from './denormalize'
import { DocumentSnapshot, Transaction } from 'firebase-admin/firestore'
import { log } from '../utils'
import { findDiffs, describeDiff, getDiffUpdate } from './denormalize'
import { log, writeAsync } from '../utils'
initAdmin()
const firestore = admin.firestore()
async function getUsersById(transaction: Transaction) {
const users = await transaction.get(firestore.collection('users'))
const results = Object.fromEntries(users.docs.map((doc) => [doc.id, doc]))
log(`Found ${users.size} users.`)
return results
}
async function getBetsByUserId(transaction: Transaction) {
const bets = await transaction.get(firestore.collectionGroup('bets'))
const results = new Map<string, DocumentSnapshot[]>()
bets.forEach((doc) => {
const userId = doc.get('userId')
const userBets = results.get(userId) || []
userBets.push(doc)
results.set(userId, userBets)
})
log(`Found ${bets.size} bets from ${results.size} users.`)
return results
}
// not in a transaction for speed -- may need to be run more than once
async function denormalize() {
let hasMore = true
while (hasMore) {
hasMore = await admin.firestore().runTransaction(async (transaction) => {
const [usersById, betsByUserId] = await Promise.all([
getUsersById(transaction),
getBetsByUserId(transaction),
])
const mapping = Object.entries(usersById).map(([id, doc]) => {
return [doc, betsByUserId.get(id) || []] as const
})
const users = await firestore.collection('users').get()
log(`Found ${users.size} users.`)
for (const userDoc of users.docs) {
const userBets = await firestore
.collectionGroup('bets')
.where('userId', '==', userDoc.id)
.get()
const mapping = [[userDoc, userBets.docs] as const] as const
const diffs = findDiffs(
mapping,
['avatarUrl', 'userAvatarUrl'],
@ -47,15 +25,11 @@ async function denormalize() {
['username', 'userUsername']
)
log(`Found ${diffs.length} bets with mismatched user data.`)
diffs.slice(0, 500).forEach((d) => {
const updates = diffs.map((d) => {
log(describeDiff(d))
applyDiff(transaction, d)
})
if (diffs.length > 500) {
log(`Applying first 500 because of Firestore limit...`)
}
return diffs.length > 500
return getDiffUpdate(d)
})
await writeAsync(firestore, updates)
}
}

View File

@ -3,6 +3,7 @@
import { DocumentSnapshot, Transaction } from 'firebase-admin/firestore'
import { isEqual, zip } from 'lodash'
import { UpdateSpec } from '../utils'
export type DocumentValue = {
doc: DocumentSnapshot
@ -20,7 +21,10 @@ export type DocumentDiff = {
type PathPair = readonly [string, string]
export function findDiffs(docs: DocumentMapping[], ...paths: PathPair[]) {
export function findDiffs(
docs: readonly DocumentMapping[],
...paths: PathPair[]
) {
const diffs: DocumentDiff[] = []
const srcPaths = paths.map((p) => p[0])
const destPaths = paths.map((p) => p[1])
@ -46,8 +50,14 @@ export function describeDiff(diff: DocumentDiff) {
return `${describeDocVal(diff.src)} -> ${describeDocVal(diff.dest)}`
}
export function applyDiff(transaction: Transaction, diff: DocumentDiff) {
const { src, dest } = diff
const updateSpec = Object.fromEntries(zip(dest.fields, src.vals))
transaction.update(dest.doc.ref, updateSpec)
export function getDiffUpdate(diff: DocumentDiff) {
return {
doc: diff.dest.doc.ref,
fields: Object.fromEntries(zip(diff.dest.fields, diff.src.vals)),
} as UpdateSpec
}
export function applyDiff(transaction: Transaction, diff: DocumentDiff) {
const update = getDiffUpdate(diff)
transaction.update(update.doc, update.fields)
}