Fixed annoying cset bug
This commit is contained in:
		
							parent
							
								
									d66d4b43c9
								
							
						
					
					
						commit
						d733804a72
					
				| 
						 | 
					@ -9,15 +9,18 @@ import {upsert} from "../utils/mongo-wrapper.js"
 | 
				
			||||||
/* Definitions */
 | 
					/* Definitions */
 | 
				
			||||||
let htmlEndPoint = 'https://www.cset-foretell.com/questions?page='
 | 
					let htmlEndPoint = 'https://www.cset-foretell.com/questions?page='
 | 
				
			||||||
String.prototype.replaceAll = function replaceAll(search, replace) { return this.split(search).join(replace); }
 | 
					String.prototype.replaceAll = function replaceAll(search, replace) { return this.split(search).join(replace); }
 | 
				
			||||||
 | 
					const DEBUG_MODE = "off" // "on"
 | 
				
			||||||
/* Support functions */
 | 
					/* Support functions */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
async function fetchPage(page, cookie){
 | 
					async function fetchPage(page, cookie){
 | 
				
			||||||
 | 
					  console.log(page)
 | 
				
			||||||
  if(page==1){
 | 
					  if(page==1){
 | 
				
			||||||
    cookie=cookie.split(";")[0] // Interesting that it otherwise doesn't work :(
 | 
					    cookie=cookie.split(";")[0] // Interesting that it otherwise doesn't work :(
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  let urlEndpoint = htmlEndPoint+page
 | 
				
			||||||
 | 
					  console.log(urlEndpoint)
 | 
				
			||||||
  let response  = await axios({
 | 
					  let response  = await axios({
 | 
				
			||||||
    url: htmlEndPoint+page,
 | 
					    url: urlEndpoint,
 | 
				
			||||||
    method: 'GET',
 | 
					    method: 'GET',
 | 
				
			||||||
    headers: ({ 
 | 
					    headers: ({ 
 | 
				
			||||||
    'Content-Type': 'text/html',
 | 
					    'Content-Type': 'text/html',
 | 
				
			||||||
| 
						 | 
					@ -123,7 +126,12 @@ async function fetchStats(questionUrl, cookie){
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function isEnd(html){
 | 
					function isEnd(html){
 | 
				
			||||||
  return html.includes("No questions match your filter")
 | 
					  let isEndBool = html.includes("No questions match your filter")
 | 
				
			||||||
 | 
					  if(isEndBool){
 | 
				
			||||||
 | 
					    //console.log(html)
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  console.log(`IsEnd? ${isEndBool}`)
 | 
				
			||||||
 | 
					  return isEndBool
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function sleep(ms) {
 | 
					function sleep(ms) {
 | 
				
			||||||
| 
						 | 
					@ -133,7 +141,7 @@ function sleep(ms) {
 | 
				
			||||||
/* Body */
 | 
					/* Body */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
async function csetforetell_inner(cookie){
 | 
					async function csetforetell_inner(cookie){
 | 
				
			||||||
  let i=0
 | 
					  let i=1
 | 
				
			||||||
  let response = await fetchPage(i, cookie)
 | 
					  let response = await fetchPage(i, cookie)
 | 
				
			||||||
  let results = []
 | 
					  let results = []
 | 
				
			||||||
  let init = Date.now()
 | 
					  let init = Date.now()
 | 
				
			||||||
| 
						 | 
					@ -143,8 +151,9 @@ async function csetforetell_inner(cookie){
 | 
				
			||||||
    let htmlLines = response.split("\n")
 | 
					    let htmlLines = response.split("\n")
 | 
				
			||||||
    let h4elements = htmlLines.filter(str => str.includes("<h5><a href=") || str.includes("<h4><a href=")) 
 | 
					    let h4elements = htmlLines.filter(str => str.includes("<h5><a href=") || str.includes("<h4><a href=")) 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if(process.env.DEBUG_MODE == "on"){
 | 
					    if(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on"){
 | 
				
			||||||
      console.log(response)
 | 
					      //console.log(response)
 | 
				
			||||||
 | 
					      console.log(h4elements)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    //console.log("")
 | 
					    //console.log("")
 | 
				
			||||||
| 
						 | 
					@ -152,6 +161,7 @@ async function csetforetell_inner(cookie){
 | 
				
			||||||
    //console.log(h4elements)
 | 
					    //console.log(h4elements)
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    for(let h4element of h4elements){
 | 
					    for(let h4element of h4elements){
 | 
				
			||||||
 | 
					      //console.log(h4element)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      let h4elementSplit = h4element.split('"><span>')
 | 
					      let h4elementSplit = h4element.split('"><span>')
 | 
				
			||||||
      let url = h4elementSplit[0].split('<a href="')[1]
 | 
					      let url = h4elementSplit[0].split('<a href="')[1]
 | 
				
			||||||
| 
						 | 
					@ -166,12 +176,13 @@ async function csetforetell_inner(cookie){
 | 
				
			||||||
            "platform": "CSET-foretell",
 | 
					            "platform": "CSET-foretell",
 | 
				
			||||||
            ...moreinfo
 | 
					            ...moreinfo
 | 
				
			||||||
          })
 | 
					          })
 | 
				
			||||||
          if(i % 30 == 0){
 | 
					          if(i % 30 == 0 && !(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on")){
 | 
				
			||||||
            console.log(`Page #${i}`)
 | 
					            console.log(`Page #${i}` && !(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on"))
 | 
				
			||||||
            console.log(question)
 | 
					            console.log(question)
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
          results.push(question)
 | 
					          results.push(question)
 | 
				
			||||||
          if(process.env.DEBUG_MODE == "on"){
 | 
					          if(process.env.DEBUG_MODE == "on" || DEBUG_MODE == "on"){
 | 
				
			||||||
 | 
					            console.log(url)
 | 
				
			||||||
            console.log(question)
 | 
					            console.log(question)
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -179,11 +190,12 @@ async function csetforetell_inner(cookie){
 | 
				
			||||||
        console.log(error)
 | 
					        console.log(error)
 | 
				
			||||||
        console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`)
 | 
					        console.log(`We encountered some error when fetching the URL: ${url}, so it won't appear on the final json`)
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					 | 
				
			||||||
      i=i+1
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    // console.log("Sleeping for ~5secs so as to not be as noticeable to the cset-foretell servers")
 | 
					    i++
 | 
				
			||||||
 | 
					    //i=Number(i)+1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    console.log("Sleeping for ~5secs so as to not be as noticeable to the cset-foretell servers")
 | 
				
			||||||
    await sleep(5000 + Math.random()*1000) // don't be as noticeable
 | 
					    await sleep(5000 + Math.random()*1000) // don't be as noticeable
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    try{
 | 
					    try{
 | 
				
			||||||
| 
						 | 
					@ -198,7 +210,6 @@ async function csetforetell_inner(cookie){
 | 
				
			||||||
  // console.log(results)
 | 
					  // console.log(results)
 | 
				
			||||||
  await upsert(results, "csetforetell-questions")
 | 
					  await upsert(results, "csetforetell-questions")
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  let end = Date.now()
 | 
					  let end = Date.now()
 | 
				
			||||||
  let difference = end-init
 | 
					  let difference = end-init
 | 
				
			||||||
  console.log(`Took ${difference/1000} seconds, or ${difference/(1000*60)} minutes.`)
 | 
					  console.log(`Took ${difference/1000} seconds, or ${difference/(1000*60)} minutes.`)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user