manifold/functions/src/scripts/contest/scrape-ea.ts

56 lines
1.5 KiB
TypeScript

// Run with `npx ts-node src/scripts/contest/scrape-ea.ts`
import * as fs from 'fs'
import * as puppeteer from 'puppeteer'
export function scrapeEA(contestLink: string, fileName: string) {
;(async () => {
const browser = await puppeteer.launch({ headless: true })
const page = await browser.newPage()
await page.goto(contestLink)
let loadMoreButton = await page.$('.LoadMore-root')
while (loadMoreButton) {
await loadMoreButton.click()
await page.waitForNetworkIdle()
loadMoreButton = await page.$('.LoadMore-root')
}
/* Run javascript inside the page */
const data = await page.evaluate(() => {
const list = []
const items = document.querySelectorAll('.PostsItem2-root')
for (const item of items) {
const link =
'https://forum.effectivealtruism.org' +
item?.querySelector('a')?.getAttribute('href')
// Replace '&' with '&'
const clean = (str: string | undefined) => str?.replace(/&/g, '&')
list.push({
title: clean(item?.querySelector('a>span>span')?.innerHTML),
author: item?.querySelector('a.UsersNameDisplay-userName')?.innerHTML,
link: link,
})
}
return list
})
fs.writeFileSync(
`./src/scripts/contest/${fileName}.ts`,
`export const data = ${JSON.stringify(data, null, 2)}`
)
console.log(data)
await browser.close()
})()
}
scrapeEA(
'https://forum.effectivealtruism.org/topics/criticism-and-red-teaming-contest',
'criticism-and-red-teaming'
)