Update Analysis.md

2018-11-15 23:51:45 +01:00 · 2018-11-15 23:51:45 +01:00 · bc51391b7b
commit bc51391b7b
parent 9a746b451d
1 changed files with 73 additions and 73 deletions
--- a/rat/EA-predictions/Analysis.md
+++ b/rat/EA-predictions/Analysis.md
@ -124,78 +124,78 @@ I expect to answer those questions in the near future.

 You can find [Predictions.csv](https://nunosempere.github.io/rat/EA-predictions/Predictions.csv) and [answers.csv](https://nunosempere.github.io/rat/EA-predictions/answers.csv) by following the links.

-```
-> ### We first read the data
-> DataFrame <- read.csv(file="Predictions.csv", header=TRUE, sep=",")
-> View(D)
-> 
-> ### We then create a different object for storing the cleaned up data
-> DataFrameProcessed=data.frame(matrix(nrow=35,ncol=52))
-> LowerBoundsPersoni=NULL
-> 
-> ### And clean up the data.
-> for(i in c(1:35)){
-+    as.numeric(strsplit(as.character(DataFrame[i,5]),", ")[[1]]) -> LowerBoundsPersoni
-+ as.numeric(strsplit(as.character(DataFrame[i,6]),", ")[[1]]) -> UpperBoundsPersoni
-> 
-+    for(j in c(1:26)){
-+       DataFrameProcessed[i,(j*2)-1] <- LowerBoundsPersoni[j]
-+       DataFrameProcessed[i,(j*2)] <- UpperBoundsPersoni[j]
-+   }
-+ }
-> ### It shows that I've been programming in C.
->
-> c(paste("Person-",c(1:35),sep=""))->rownames(DataFrameProcessed)
-> c(rbind(paste("Q",c(1:26),"-lower",sep=""),paste("Q",c(1:26),"-upper",sep="")))->colnames(DataFrameProcessed)
-> View(DataFrameProcessed)
->
-> answers <- read.csv(file="answers.csv", header=TRUE, sep=",")[,2]
->
-> ### Although every person answered every question, 2 anwers are not available.
-> replaceNA <-function(x,y){
-+ return( ifelse(is.na(x), y, x) )
-+ }
->
-> sum2<-function(x){ return(sum(replaceNA(x))) }
->
-> ### Because some of the answers are not available, the comparison will give a NA. So we need sum2.
-> total <- function(x){
-+ y=c(1:26)*2
-+ return(sum2(as.vector((answers>=DataFrameProcessed[x,y-1])) & as.vector(answers<=DataFrameProcessed[x,y])) ) 
-+ }
->
-> ### We can get the Brier scores:
-> Brierscore <- function(x){ return( (x*(1-0.8)^2 + (24-x)*(.8)^2)/24) }
-> 
-> ### vapply applies a function to every member of a vector. 
-> vapply(c(1:35),total,numeric(1))->DataFrameProcessed$totalcorrect
-> vapply(DataFrameProcessed$totalcorrect,Brierscore,numeric(1))->DataFrameProcessed$Brierscores
->
-> ### We can also aggregate stuff by question:
-> totalperquestion <-function(x){
-+ z=c(1:35)
-+ return(sum2(as.vector((answers[x]>=DataFrameProcessed[z,2*x-1])) & as.vector(answers[x]<=DataFrameProcessed[z,2*x])) ) 
-+ }
-> vapply(c(1:26), totalperquestion, numeric(1)) -> TotalCorrect
-> percentageperquestion <-function(x){return( totalperquestion(x)*100/35)}
-> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500)
-> 
-> ### And you can get graphics using
-> png("Scatterplot3.png", units="px", width=3200, height=3200, res=500)
-> plot(DataFrameProcessed$totalcorrect*100/24, xlab= "Persons, from 1 to 35", ylab="% of questions they got right", main="Scatterplot!")
-> abline(h=mean(DataFrameProcessed$totalcorrect)*100/24, col="red")
-> abline(h=80, col="blue")
-> text(x=20, y=56, col="red", "Actual average")
-> text(x=20, y=81, col="blue", "Target average")
-> dev.off()
-
-> ### As well as with the function hist(), whose parameter break = number allows you to control the granularity of the histogram.
-> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500)
-> plot(vapply(p, percentageperquestion, numeric(1)), ylim=c(0,100), main="Results aggregated per question", xlab="Questions, from 1 to 24", ylab= "% of participants who got that question right")
-> abline(h=80, col="blue")
-> abline(h=55.14286, col="red")
-> text(x=12.5, y=80+4, "Target average % of right-guessers per question", col="blue")
-> text(x=12.5, y=55.14286-4, "Actual average % of right-guessers per question", col="red")
-> dev.off()
+```  
+> ### We first read the data  
+> DataFrame <- read.csv(file="Predictions.csv", header=TRUE, sep=",")  
+> View(D)  
+>   
+> ### We then create a different object for storing the cleaned up data  
+> DataFrameProcessed=data.frame(matrix(nrow=35,ncol=52))  
+> LowerBoundsPersoni=NULL  
+>   
+> ### And clean up the data.  
+> for(i in c(1:35)){  
+    as.numeric(strsplit(as.character(DataFrame[i,5]),", ")[[1]]) -> LowerBoundsPersoni  
+ as.numeric(strsplit(as.character(DataFrame[i,6]),", ")[[1]]) -> UpperBoundsPersoni  
+>   
+    for(j in c(1:26)){  
+       DataFrameProcessed[i,(j*2)-1] <- LowerBoundsPersoni[j]  
+       DataFrameProcessed[i,(j*2)] <- UpperBoundsPersoni[j]  
+   }  
+ }  
+> ### It shows that I've been programming in C.  
+>  
+> c(paste("Person-",c(1:35),sep=""))->rownames(DataFrameProcessed)  
+> c(rbind(paste("Q",c(1:26),"-lower",sep=""),paste("Q",c(1:26),"-upper",sep="")))->colnames(DataFrameProcessed)  
+> View(DataFrameProcessed)  
+>  
+> answers <- read.csv(file="answers.csv", header=TRUE, sep=",")[,2]  
+>  
+> ### Although every person answered every question, 2 anwers are not available.  
+> replaceNA <-function(x,y){  
+ return( ifelse(is.na(x), y, x) )  
+ }  
+>  
+> sum2<-function(x){ return(sum(replaceNA(x))) }  
+>  
+> ### Because some of the answers are not available, the comparison will give a NA. So we need sum2.  
+> total <- function(x){  
+ y=c(1:26)*2  
+ return(sum2(as.vector((answers>=DataFrameProcessed[x,y-1])) & as.vector(answers<=DataFrameProcessed[x,y])) )   
+ }  
+>  
+> ### We can get the Brier scores:  
+> Brierscore <- function(x){ return( (x*(1-0.8)^2 + (24-x)*(.8)^2)/24) }  
+>   
+> ### vapply applies a function to every member of a vector.   
+> vapply(c(1:35),total,numeric(1))->DataFrameProcessed$totalcorrect  
+> vapply(DataFrameProcessed$totalcorrect,Brierscore,numeric(1))->DataFrameProcessed$Brierscores  
+>  
+> ### We can also aggregate stuff by question:  
+> totalperquestion <-function(x){  
+ z=c(1:35)  
+ return(sum2(as.vector((answers[x]>=DataFrameProcessed[z,2*x-1])) & as.vector(answers[x]<=DataFrameProcessed[z,2*x])) )   
+ }  
+> vapply(c(1:26), totalperquestion, numeric(1)) -> TotalCorrect  
+> percentageperquestion <-function(x){return( totalperquestion(x)*100/35)}  
+> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500)  
+>   
+> ### And you can get graphics using  
+> png("Scatterplot3.png", units="px", width=3200, height=3200, res=500)  
+> plot(DataFrameProcessed$totalcorrect*100/24, xlab= "Persons, from 1 to 35", ylab="% of questions they got right", main="Scatterplot!")  
+> abline(h=mean(DataFrameProcessed$totalcorrect)*100/24, col="red")  
+> abline(h=80, col="blue")  
+> text(x=20, y=56, col="red", "Actual average")  
+> text(x=20, y=81, col="blue", "Target average")  
+> dev.off()  
+  
+> ### As well as with the function hist(), whose parameter break = number allows you to control the granularity of the histogram.  
+> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500)  
+> plot(vapply(p, percentageperquestion, numeric(1)), ylim=c(0,100), main="Results aggregated per question", xlab="Questions, from 1 to 24", ylab= "% of participants who got that question right")  
+> abline(h=80, col="blue")  
+> abline(h=55.14286, col="red")  
+> text(x=12.5, y=80+4, "Target average % of right-guessers per question", col="blue")  
+> text(x=12.5, y=55.14286-4, "Actual average % of right-guessers per question", col="red")  
+> dev.off()  

 ´´´