diff --git a/rat/EA-predictions/Analysis.md b/rat/EA-predictions/Analysis.md index 3f9b509..676f429 100644 --- a/rat/EA-predictions/Analysis.md +++ b/rat/EA-predictions/Analysis.md @@ -124,78 +124,78 @@ I expect to answer those questions in the near future. You can find [Predictions.csv](https://nunosempere.github.io/rat/EA-predictions/Predictions.csv) and [answers.csv](https://nunosempere.github.io/rat/EA-predictions/answers.csv) by following the links. -``` -> ### We first read the data -> DataFrame <- read.csv(file="Predictions.csv", header=TRUE, sep=",") -> View(D) -> -> ### We then create a different object for storing the cleaned up data -> DataFrameProcessed=data.frame(matrix(nrow=35,ncol=52)) -> LowerBoundsPersoni=NULL -> -> ### And clean up the data. -> for(i in c(1:35)){ -+ as.numeric(strsplit(as.character(DataFrame[i,5]),", ")[[1]]) -> LowerBoundsPersoni -+ as.numeric(strsplit(as.character(DataFrame[i,6]),", ")[[1]]) -> UpperBoundsPersoni -> -+ for(j in c(1:26)){ -+ DataFrameProcessed[i,(j*2)-1] <- LowerBoundsPersoni[j] -+ DataFrameProcessed[i,(j*2)] <- UpperBoundsPersoni[j] -+ } -+ } -> ### It shows that I've been programming in C. -> -> c(paste("Person-",c(1:35),sep=""))->rownames(DataFrameProcessed) -> c(rbind(paste("Q",c(1:26),"-lower",sep=""),paste("Q",c(1:26),"-upper",sep="")))->colnames(DataFrameProcessed) -> View(DataFrameProcessed) -> -> answers <- read.csv(file="answers.csv", header=TRUE, sep=",")[,2] -> -> ### Although every person answered every question, 2 anwers are not available. -> replaceNA <-function(x,y){ -+ return( ifelse(is.na(x), y, x) ) -+ } -> -> sum2<-function(x){ return(sum(replaceNA(x))) } -> -> ### Because some of the answers are not available, the comparison will give a NA. So we need sum2. -> total <- function(x){ -+ y=c(1:26)*2 -+ return(sum2(as.vector((answers>=DataFrameProcessed[x,y-1])) & as.vector(answers<=DataFrameProcessed[x,y])) ) -+ } -> -> ### We can get the Brier scores: -> Brierscore <- function(x){ return( (x*(1-0.8)^2 + (24-x)*(.8)^2)/24) } -> -> ### vapply applies a function to every member of a vector. -> vapply(c(1:35),total,numeric(1))->DataFrameProcessed$totalcorrect -> vapply(DataFrameProcessed$totalcorrect,Brierscore,numeric(1))->DataFrameProcessed$Brierscores -> -> ### We can also aggregate stuff by question: -> totalperquestion <-function(x){ -+ z=c(1:35) -+ return(sum2(as.vector((answers[x]>=DataFrameProcessed[z,2*x-1])) & as.vector(answers[x]<=DataFrameProcessed[z,2*x])) ) -+ } -> vapply(c(1:26), totalperquestion, numeric(1)) -> TotalCorrect -> percentageperquestion <-function(x){return( totalperquestion(x)*100/35)} -> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500) -> -> ### And you can get graphics using -> png("Scatterplot3.png", units="px", width=3200, height=3200, res=500) -> plot(DataFrameProcessed$totalcorrect*100/24, xlab= "Persons, from 1 to 35", ylab="% of questions they got right", main="Scatterplot!") -> abline(h=mean(DataFrameProcessed$totalcorrect)*100/24, col="red") -> abline(h=80, col="blue") -> text(x=20, y=56, col="red", "Actual average") -> text(x=20, y=81, col="blue", "Target average") -> dev.off() - -> ### As well as with the function hist(), whose parameter break = number allows you to control the granularity of the histogram. -> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500) -> plot(vapply(p, percentageperquestion, numeric(1)), ylim=c(0,100), main="Results aggregated per question", xlab="Questions, from 1 to 24", ylab= "% of participants who got that question right") -> abline(h=80, col="blue") -> abline(h=55.14286, col="red") -> text(x=12.5, y=80+4, "Target average % of right-guessers per question", col="blue") -> text(x=12.5, y=55.14286-4, "Actual average % of right-guessers per question", col="red") -> dev.off() +``` +> ### We first read the data +> DataFrame <- read.csv(file="Predictions.csv", header=TRUE, sep=",") +> View(D) +> +> ### We then create a different object for storing the cleaned up data +> DataFrameProcessed=data.frame(matrix(nrow=35,ncol=52)) +> LowerBoundsPersoni=NULL +> +> ### And clean up the data. +> for(i in c(1:35)){ ++ as.numeric(strsplit(as.character(DataFrame[i,5]),", ")[[1]]) -> LowerBoundsPersoni ++ as.numeric(strsplit(as.character(DataFrame[i,6]),", ")[[1]]) -> UpperBoundsPersoni +> ++ for(j in c(1:26)){ ++ DataFrameProcessed[i,(j*2)-1] <- LowerBoundsPersoni[j] ++ DataFrameProcessed[i,(j*2)] <- UpperBoundsPersoni[j] ++ } ++ } +> ### It shows that I've been programming in C. +> +> c(paste("Person-",c(1:35),sep=""))->rownames(DataFrameProcessed) +> c(rbind(paste("Q",c(1:26),"-lower",sep=""),paste("Q",c(1:26),"-upper",sep="")))->colnames(DataFrameProcessed) +> View(DataFrameProcessed) +> +> answers <- read.csv(file="answers.csv", header=TRUE, sep=",")[,2] +> +> ### Although every person answered every question, 2 anwers are not available. +> replaceNA <-function(x,y){ ++ return( ifelse(is.na(x), y, x) ) ++ } +> +> sum2<-function(x){ return(sum(replaceNA(x))) } +> +> ### Because some of the answers are not available, the comparison will give a NA. So we need sum2. +> total <- function(x){ ++ y=c(1:26)*2 ++ return(sum2(as.vector((answers>=DataFrameProcessed[x,y-1])) & as.vector(answers<=DataFrameProcessed[x,y])) ) ++ } +> +> ### We can get the Brier scores: +> Brierscore <- function(x){ return( (x*(1-0.8)^2 + (24-x)*(.8)^2)/24) } +> +> ### vapply applies a function to every member of a vector. +> vapply(c(1:35),total,numeric(1))->DataFrameProcessed$totalcorrect +> vapply(DataFrameProcessed$totalcorrect,Brierscore,numeric(1))->DataFrameProcessed$Brierscores +> +> ### We can also aggregate stuff by question: +> totalperquestion <-function(x){ ++ z=c(1:35) ++ return(sum2(as.vector((answers[x]>=DataFrameProcessed[z,2*x-1])) & as.vector(answers[x]<=DataFrameProcessed[z,2*x])) ) ++ } +> vapply(c(1:26), totalperquestion, numeric(1)) -> TotalCorrect +> percentageperquestion <-function(x){return( totalperquestion(x)*100/35)} +> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500) +> +> ### And you can get graphics using +> png("Scatterplot3.png", units="px", width=3200, height=3200, res=500) +> plot(DataFrameProcessed$totalcorrect*100/24, xlab= "Persons, from 1 to 35", ylab="% of questions they got right", main="Scatterplot!") +> abline(h=mean(DataFrameProcessed$totalcorrect)*100/24, col="red") +> abline(h=80, col="blue") +> text(x=20, y=56, col="red", "Actual average") +> text(x=20, y=81, col="blue", "Target average") +> dev.off() + +> ### As well as with the function hist(), whose parameter break = number allows you to control the granularity of the histogram. +> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500) +> plot(vapply(p, percentageperquestion, numeric(1)), ylim=c(0,100), main="Results aggregated per question", xlab="Questions, from 1 to 24", ylab= "% of participants who got that question right") +> abline(h=80, col="blue") +> abline(h=55.14286, col="red") +> text(x=12.5, y=80+4, "Target average % of right-guessers per question", col="blue") +> text(x=12.5, y=55.14286-4, "Actual average % of right-guessers per question", col="red") +> dev.off() ยดยดยด