Fixed annoying cset bug

This commit is contained in:
NunoSempere 2021-06-25 18:00:00 +02:00
parent d66d4b43c9
commit d733804a72

View File

@ -9,15 +9,18 @@ import {upsert} from "../utils/mongo-wrapper.js"
/* Definitions */ /* Definitions */
let htmlEndPoint = 'https://www.cset-foretell.com/questions?page=' let htmlEndPoint = 'https://www.cset-foretell.com/questions?page='
String.prototype.replaceAll = function replaceAll(search, replace) { return this.split(search).join(replace); } String.prototype.replaceAll = function replaceAll(search, replace) { return this.split(search).join(replace); }
const DEBUG_MODE = "off" // "on"
/* Support functions */ /* Support functions */
async function fetchPage(page, cookie){ async function fetchPage(page, cookie){
console.log(page)
if(page==1){ if(page==1){
cookie=cookie.split(";")[0] // Interesting that it otherwise doesn't work :( cookie=cookie.split(";")[0] // Interesting that it otherwise doesn't work :(
} }
let urlEndpoint = htmlEndPoint+page
console.log(urlEndpoint)
let response = await axios({ let response = await axios({
url: htmlEndPoint+page, url: urlEndpoint,
method: 'GET', method: 'GET',
headers: ({ headers: ({
'Content-Type': 'text/html', 'Content-Type': 'text/html',
@ -123,7 +126,12 @@ async function fetchStats(questionUrl, cookie){
} }
function isEnd(html){ function isEnd(html){
return html.includes("No questions match your filter") let isEndBool = html.includes("No questions match your filter")
if(isEndBool){
//console.log(html)
}
console.log(`IsEnd? ${isEndBool}`)
return isEndBool
} }
function sleep(ms) { function sleep(ms) {
@ -133,7 +141,7 @@ function sleep(ms) {
/* Body */ /* Body */
async function csetforetell_inner(cookie){ async function csetforetell_inner(cookie){
let i=0 let i=1
let response = await fetchPage(i, cookie) let response = await fetchPage(i, cookie)
let results = [] let results = []
let init = Date.now() let init = Date.now()
@ -143,8 +151,9 @@ async function csetforetell_inner(cookie){
let htmlLines = response.split("\n") let htmlLines = response.split("\n")
let h4elements = htmlLines.filter(str => str.includes("<h5><a href=") || str.includes("<h4><a href=")) let h4elements = htmlLines.filter(str => str.includes("<h5><a href=") || str.includes("<h4><a href="))
if(process.env.DEBUG_MODE == "on"){ if(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on"){
console.log(response) //console.log(response)
console.log(h4elements)
} }
//console.log("") //console.log("")
@ -152,6 +161,7 @@ async function csetforetell_inner(cookie){
//console.log(h4elements) //console.log(h4elements)
for(let h4element of h4elements){ for(let h4element of h4elements){
//console.log(h4element)
let h4elementSplit = h4element.split('"><span>') let h4elementSplit = h4element.split('"><span>')
let url = h4elementSplit[0].split('<a href="')[1] let url = h4elementSplit[0].split('<a href="')[1]
@ -166,12 +176,13 @@ async function csetforetell_inner(cookie){
"platform": "CSET-foretell", "platform": "CSET-foretell",
...moreinfo ...moreinfo
}) })
if(i % 30 == 0){ if(i % 30 == 0 && !(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on")){
console.log(`Page #${i}`) console.log(`Page #${i}` && !(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on"))
console.log(question) console.log(question)
} }
results.push(question) results.push(question)
if(process.env.DEBUG_MODE == "on"){ if(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on"){
console.log(url)
console.log(question) console.log(question)
} }
@ -179,11 +190,12 @@ async function csetforetell_inner(cookie){
console.log(error) console.log(error)
console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`) console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`)
} }
i=i+1
} }
// console.log("Sleeping for ~5secs so as to not be as noticeable to the cset-foretell servers") i++
//i=Number(i)+1
console.log("Sleeping for ~5secs so as to not be as noticeable to the cset-foretell servers")
await sleep(5000 + Math.random()*1000) // don't be as noticeable await sleep(5000 + Math.random()*1000) // don't be as noticeable
try{ try{
@ -198,7 +210,6 @@ async function csetforetell_inner(cookie){
// console.log(results) // console.log(results)
await upsert(results, "csetforetell-questions") await upsert(results, "csetforetell-questions")
let end = Date.now() let end = Date.now()
let difference = end-init let difference = end-init
console.log(`Took ${difference/1000} seconds, or ${difference/(1000*60)} minutes.`) console.log(`Took ${difference/1000} seconds, or ${difference/(1000*60)} minutes.`)