metaforecast/src/platforms/givewellopenphil-fetch.js

62 lines
1.7 KiB
JavaScript
Raw Normal View History

2021-02-16 14:18:23 +00:00
/* Imports */
import fs from "fs"
import axios from "axios"
import toMarkdown from "../utils/toMarkdown.js"
import { calculateStars } from "../utils/stars.js"
import { upsert } from "../utils/mongo-wrapper.js"
2021-02-16 14:18:23 +00:00
/* Definitions */
let locationData = "./data/"
/* Support functions */
2021-03-02 13:29:27 +00:00
async function fetchPage(url) {
let response = await axios({
2021-02-16 14:18:23 +00:00
url: url,
method: 'GET',
2021-03-02 13:29:27 +00:00
headers: ({
'Content-Type': 'text/html',
2021-02-16 14:18:23 +00:00
}),
})
2021-03-02 13:29:27 +00:00
.then(res => res.data)
2021-02-16 14:18:23 +00:00
//console.log(response)
return response
}
/* Body */
2021-03-02 13:29:27 +00:00
async function main() {
let rawdata = fs.readFileSync("./src/input/givewellopenphil-urls.txt")
2021-03-02 13:29:27 +00:00
let data = rawdata.toString().split("\n").filter(url => url != "");
2021-04-08 19:32:03 +00:00
// console.log(data)
2021-02-16 14:18:23 +00:00
let results = []
2021-03-02 13:29:27 +00:00
for (let url of data) {
2021-04-08 19:32:03 +00:00
// console.log(url)
2021-03-02 13:29:27 +00:00
let page = await fetchPage(url)
2021-02-16 14:18:23 +00:00
// Title
let titleraw = page.split('<meta name="twitter:title" content="')[1]
let title = titleraw.split('" />')[0]
2021-03-02 13:29:27 +00:00
2021-02-16 14:18:23 +00:00
// Description
2021-03-02 13:29:27 +00:00
let internalforecasts = page.split("<h2").filter(section => section.includes("Internal forecast") || section.includes("internal forecast"))
let description = "<h2 " + internalforecasts[1]
2021-02-16 14:18:23 +00:00
let result = {
2021-03-02 13:29:27 +00:00
"title": title,
"url": url,
"platform": "GiveWell",
"description": description,
2021-04-07 20:29:21 +00:00
"timestamp": new Date().toISOString(),
"qualityindicators": {
"stars": calculateStars("GiveWell/OpenPhilanthropy", ({})),
}
2021-03-02 13:29:27 +00:00
} // Note: This requires some processing afterwards
2021-04-08 19:32:03 +00:00
// console.log(result)
2021-02-16 14:18:23 +00:00
results.push(result)
}
// let string = JSON.stringify(results, null, 2)
// fs.writeFileSync('./data/givewell-questions-unprocessed.json', string);
await upsert(results, "givewell-questions-unprocessed")
2021-02-16 14:18:23 +00:00
}
main()