Fixed annoying cset bug

This commit is contained in:
NunoSempere 2021-06-25 18:00:00 +02:00
parent d66d4b43c9
commit d733804a72

View File

@ -9,15 +9,18 @@ import {upsert} from "../utils/mongo-wrapper.js"
/* Definitions */
let htmlEndPoint = 'https://www.cset-foretell.com/questions?page='
String.prototype.replaceAll = function replaceAll(search, replace) { return this.split(search).join(replace); }
const DEBUG_MODE = "off" // "on"
/* Support functions */
async function fetchPage(page, cookie){
console.log(page)
if(page==1){
cookie=cookie.split(";")[0] // Interesting that it otherwise doesn't work :(
}
let urlEndpoint = htmlEndPoint+page
console.log(urlEndpoint)
let response = await axios({
url: htmlEndPoint+page,
url: urlEndpoint,
method: 'GET',
headers: ({
'Content-Type': 'text/html',
@ -123,7 +126,12 @@ async function fetchStats(questionUrl, cookie){
}
function isEnd(html){
return html.includes("No questions match your filter")
let isEndBool = html.includes("No questions match your filter")
if(isEndBool){
//console.log(html)
}
console.log(`IsEnd? ${isEndBool}`)
return isEndBool
}
function sleep(ms) {
@ -133,7 +141,7 @@ function sleep(ms) {
/* Body */
async function csetforetell_inner(cookie){
let i=0
let i=1
let response = await fetchPage(i, cookie)
let results = []
let init = Date.now()
@ -143,8 +151,9 @@ async function csetforetell_inner(cookie){
let htmlLines = response.split("\n")
let h4elements = htmlLines.filter(str => str.includes("<h5><a href=") || str.includes("<h4><a href="))
if(process.env.DEBUG_MODE == "on"){
console.log(response)
if(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on"){
//console.log(response)
console.log(h4elements)
}
//console.log("")
@ -152,6 +161,7 @@ async function csetforetell_inner(cookie){
//console.log(h4elements)
for(let h4element of h4elements){
//console.log(h4element)
let h4elementSplit = h4element.split('"><span>')
let url = h4elementSplit[0].split('<a href="')[1]
@ -166,12 +176,13 @@ async function csetforetell_inner(cookie){
"platform": "CSET-foretell",
...moreinfo
})
if(i % 30 == 0){
console.log(`Page #${i}`)
if(i % 30 == 0 && !(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on")){
console.log(`Page #${i}` && !(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on"))
console.log(question)
}
results.push(question)
if(process.env.DEBUG_MODE == "on"){
if(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on"){
console.log(url)
console.log(question)
}
@ -179,11 +190,12 @@ async function csetforetell_inner(cookie){
console.log(error)
console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`)
}
i=i+1
}
// console.log("Sleeping for ~5secs so as to not be as noticeable to the cset-foretell servers")
i++
//i=Number(i)+1
console.log("Sleeping for ~5secs so as to not be as noticeable to the cset-foretell servers")
await sleep(5000 + Math.random()*1000) // don't be as noticeable
try{
@ -198,7 +210,6 @@ async function csetforetell_inner(cookie){
// console.log(results)
await upsert(results, "csetforetell-questions")
let end = Date.now()
let difference = end-init
console.log(`Took ${difference/1000} seconds, or ${difference/(1000*60)} minutes.`)