Update Analysis.md
This commit is contained in:
parent
9a746b451d
commit
bc51391b7b
|
@ -124,78 +124,78 @@ I expect to answer those questions in the near future.
|
||||||
|
|
||||||
You can find [Predictions.csv](https://nunosempere.github.io/rat/EA-predictions/Predictions.csv) and [answers.csv](https://nunosempere.github.io/rat/EA-predictions/answers.csv) by following the links.
|
You can find [Predictions.csv](https://nunosempere.github.io/rat/EA-predictions/Predictions.csv) and [answers.csv](https://nunosempere.github.io/rat/EA-predictions/answers.csv) by following the links.
|
||||||
|
|
||||||
```
|
```
|
||||||
> ### We first read the data
|
> ### We first read the data
|
||||||
> DataFrame <- read.csv(file="Predictions.csv", header=TRUE, sep=",")
|
> DataFrame <- read.csv(file="Predictions.csv", header=TRUE, sep=",")
|
||||||
> View(D)
|
> View(D)
|
||||||
>
|
>
|
||||||
> ### We then create a different object for storing the cleaned up data
|
> ### We then create a different object for storing the cleaned up data
|
||||||
> DataFrameProcessed=data.frame(matrix(nrow=35,ncol=52))
|
> DataFrameProcessed=data.frame(matrix(nrow=35,ncol=52))
|
||||||
> LowerBoundsPersoni=NULL
|
> LowerBoundsPersoni=NULL
|
||||||
>
|
>
|
||||||
> ### And clean up the data.
|
> ### And clean up the data.
|
||||||
> for(i in c(1:35)){
|
> for(i in c(1:35)){
|
||||||
+ as.numeric(strsplit(as.character(DataFrame[i,5]),", ")[[1]]) -> LowerBoundsPersoni
|
+ as.numeric(strsplit(as.character(DataFrame[i,5]),", ")[[1]]) -> LowerBoundsPersoni
|
||||||
+ as.numeric(strsplit(as.character(DataFrame[i,6]),", ")[[1]]) -> UpperBoundsPersoni
|
+ as.numeric(strsplit(as.character(DataFrame[i,6]),", ")[[1]]) -> UpperBoundsPersoni
|
||||||
>
|
>
|
||||||
+ for(j in c(1:26)){
|
+ for(j in c(1:26)){
|
||||||
+ DataFrameProcessed[i,(j*2)-1] <- LowerBoundsPersoni[j]
|
+ DataFrameProcessed[i,(j*2)-1] <- LowerBoundsPersoni[j]
|
||||||
+ DataFrameProcessed[i,(j*2)] <- UpperBoundsPersoni[j]
|
+ DataFrameProcessed[i,(j*2)] <- UpperBoundsPersoni[j]
|
||||||
+ }
|
+ }
|
||||||
+ }
|
+ }
|
||||||
> ### It shows that I've been programming in C.
|
> ### It shows that I've been programming in C.
|
||||||
>
|
>
|
||||||
> c(paste("Person-",c(1:35),sep=""))->rownames(DataFrameProcessed)
|
> c(paste("Person-",c(1:35),sep=""))->rownames(DataFrameProcessed)
|
||||||
> c(rbind(paste("Q",c(1:26),"-lower",sep=""),paste("Q",c(1:26),"-upper",sep="")))->colnames(DataFrameProcessed)
|
> c(rbind(paste("Q",c(1:26),"-lower",sep=""),paste("Q",c(1:26),"-upper",sep="")))->colnames(DataFrameProcessed)
|
||||||
> View(DataFrameProcessed)
|
> View(DataFrameProcessed)
|
||||||
>
|
>
|
||||||
> answers <- read.csv(file="answers.csv", header=TRUE, sep=",")[,2]
|
> answers <- read.csv(file="answers.csv", header=TRUE, sep=",")[,2]
|
||||||
>
|
>
|
||||||
> ### Although every person answered every question, 2 anwers are not available.
|
> ### Although every person answered every question, 2 anwers are not available.
|
||||||
> replaceNA <-function(x,y){
|
> replaceNA <-function(x,y){
|
||||||
+ return( ifelse(is.na(x), y, x) )
|
+ return( ifelse(is.na(x), y, x) )
|
||||||
+ }
|
+ }
|
||||||
>
|
>
|
||||||
> sum2<-function(x){ return(sum(replaceNA(x))) }
|
> sum2<-function(x){ return(sum(replaceNA(x))) }
|
||||||
>
|
>
|
||||||
> ### Because some of the answers are not available, the comparison will give a NA. So we need sum2.
|
> ### Because some of the answers are not available, the comparison will give a NA. So we need sum2.
|
||||||
> total <- function(x){
|
> total <- function(x){
|
||||||
+ y=c(1:26)*2
|
+ y=c(1:26)*2
|
||||||
+ return(sum2(as.vector((answers>=DataFrameProcessed[x,y-1])) & as.vector(answers<=DataFrameProcessed[x,y])) )
|
+ return(sum2(as.vector((answers>=DataFrameProcessed[x,y-1])) & as.vector(answers<=DataFrameProcessed[x,y])) )
|
||||||
+ }
|
+ }
|
||||||
>
|
>
|
||||||
> ### We can get the Brier scores:
|
> ### We can get the Brier scores:
|
||||||
> Brierscore <- function(x){ return( (x*(1-0.8)^2 + (24-x)*(.8)^2)/24) }
|
> Brierscore <- function(x){ return( (x*(1-0.8)^2 + (24-x)*(.8)^2)/24) }
|
||||||
>
|
>
|
||||||
> ### vapply applies a function to every member of a vector.
|
> ### vapply applies a function to every member of a vector.
|
||||||
> vapply(c(1:35),total,numeric(1))->DataFrameProcessed$totalcorrect
|
> vapply(c(1:35),total,numeric(1))->DataFrameProcessed$totalcorrect
|
||||||
> vapply(DataFrameProcessed$totalcorrect,Brierscore,numeric(1))->DataFrameProcessed$Brierscores
|
> vapply(DataFrameProcessed$totalcorrect,Brierscore,numeric(1))->DataFrameProcessed$Brierscores
|
||||||
>
|
>
|
||||||
> ### We can also aggregate stuff by question:
|
> ### We can also aggregate stuff by question:
|
||||||
> totalperquestion <-function(x){
|
> totalperquestion <-function(x){
|
||||||
+ z=c(1:35)
|
+ z=c(1:35)
|
||||||
+ return(sum2(as.vector((answers[x]>=DataFrameProcessed[z,2*x-1])) & as.vector(answers[x]<=DataFrameProcessed[z,2*x])) )
|
+ return(sum2(as.vector((answers[x]>=DataFrameProcessed[z,2*x-1])) & as.vector(answers[x]<=DataFrameProcessed[z,2*x])) )
|
||||||
+ }
|
+ }
|
||||||
> vapply(c(1:26), totalperquestion, numeric(1)) -> TotalCorrect
|
> vapply(c(1:26), totalperquestion, numeric(1)) -> TotalCorrect
|
||||||
> percentageperquestion <-function(x){return( totalperquestion(x)*100/35)}
|
> percentageperquestion <-function(x){return( totalperquestion(x)*100/35)}
|
||||||
> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500)
|
> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500)
|
||||||
>
|
>
|
||||||
> ### And you can get graphics using
|
> ### And you can get graphics using
|
||||||
> png("Scatterplot3.png", units="px", width=3200, height=3200, res=500)
|
> png("Scatterplot3.png", units="px", width=3200, height=3200, res=500)
|
||||||
> plot(DataFrameProcessed$totalcorrect*100/24, xlab= "Persons, from 1 to 35", ylab="% of questions they got right", main="Scatterplot!")
|
> plot(DataFrameProcessed$totalcorrect*100/24, xlab= "Persons, from 1 to 35", ylab="% of questions they got right", main="Scatterplot!")
|
||||||
> abline(h=mean(DataFrameProcessed$totalcorrect)*100/24, col="red")
|
> abline(h=mean(DataFrameProcessed$totalcorrect)*100/24, col="red")
|
||||||
> abline(h=80, col="blue")
|
> abline(h=80, col="blue")
|
||||||
> text(x=20, y=56, col="red", "Actual average")
|
> text(x=20, y=56, col="red", "Actual average")
|
||||||
> text(x=20, y=81, col="blue", "Target average")
|
> text(x=20, y=81, col="blue", "Target average")
|
||||||
> dev.off()
|
> dev.off()
|
||||||
|
|
||||||
> ### As well as with the function hist(), whose parameter break = number allows you to control the granularity of the histogram.
|
> ### As well as with the function hist(), whose parameter break = number allows you to control the granularity of the histogram.
|
||||||
> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500)
|
> png("Scatterplot-questions.png", units="px", width=3200, height=3200, res=500)
|
||||||
> plot(vapply(p, percentageperquestion, numeric(1)), ylim=c(0,100), main="Results aggregated per question", xlab="Questions, from 1 to 24", ylab= "% of participants who got that question right")
|
> plot(vapply(p, percentageperquestion, numeric(1)), ylim=c(0,100), main="Results aggregated per question", xlab="Questions, from 1 to 24", ylab= "% of participants who got that question right")
|
||||||
> abline(h=80, col="blue")
|
> abline(h=80, col="blue")
|
||||||
> abline(h=55.14286, col="red")
|
> abline(h=55.14286, col="red")
|
||||||
> text(x=12.5, y=80+4, "Target average % of right-guessers per question", col="blue")
|
> text(x=12.5, y=80+4, "Target average % of right-guessers per question", col="blue")
|
||||||
> text(x=12.5, y=55.14286-4, "Actual average % of right-guessers per question", col="red")
|
> text(x=12.5, y=55.14286-4, "Actual average % of right-guessers per question", col="red")
|
||||||
> dev.off()
|
> dev.off()
|
||||||
|
|
||||||
´´´
|
´´´
|
||||||
|
|
Loading…
Reference in New Issue
Block a user