sampleSize.knit

Sample Size

Import the workspace established at the top levels script

load(file = "C:/Users/DELL/Documents/integration-simulations/index.RData")
library(pracma)
library(ggplot2)

Set default arguments

nTrials = 60

Estimate parameters for these simulations

df = n1000
df = as.data.frame(df)
df$NLL = 0
df$NLL_alt = 0
for (i in 1:nrow(df)){
  row = which(round(freeParameters$theta, 2) == round(df$theta[i], 2) & 
                round(freeParameters$phi, 2) == round(df$phi[i], 2))
  preds = as.numeric(predictions[row, ])
  preds = rep(preds, times = nTrials/length(preds))
  data = rbind(trialSet, trialSet)
  data$Transferred = noise(preds, data$Endowment)
  
  result = tryCatch({fmincon(obj_function,x0 = initial_params,
                     lb = lower_bounds, ub = upper_bounds,
                     df = data)}, error = function(e){data.frame(par = NA)})
  resultAlt = tryCatch({optimizePerCondition(df = data)}, 
                       error = function(e){data.frame(par = NA)})
  if (is.na(resultAlt$par[1]) | is.na(result$par[1])){
    df$NLL[i] = NA; df$NLL_alt[i] = NA; next
  }
  
  data$Preds = generatePredictions(result$par, data)
  data$PredsAlt = generateAltPerCondition(resultAlt$par, data)
    
  if (sum(data$Preds == data$Transferred)==nrow(data)){
    data$Preds[sample(nrow(data), 1)] =+ 1
  }
  if (sum(data$PredsAlt == data$Transferred)==nrow(data)){
    data$PredsAlt[sample(nrow(data), 1)] =+ 1
  }
  
  df$NLL[i] = -sum(dnorm(data$Transferred/data$Endowment, 
                         mean = data$Preds/data$Endowment, 
                         sd = sd((data$Transferred - data$Preds)/data$Endowment),
                         log = T))
  df$NLL_alt[i] = -sum(dnorm(data$Transferred/data$Endowment, 
                             mean = data$PredsAlt/data$Endowment, 
                         sd = sd((data$Transferred - data$PredsAlt)/data$Endowment), log = T))
  df$thetaR[i] = result$par[1]; df$phiR[i] = result$par[2]
  df$theta0[i] = resultAlt$par[1]; df$theta5[i] = resultAlt$par[2]
  df$theta1[i] = resultAlt$par[3];
}

Count number of missing instances

which(is.na(df$NLL) | is.na(df$NLL_alt))
## integer(0)

Compute BIC

if (sum(is.na(df$NLL) | is.na(df$NLL_alt)) > 0){
  df = df[-which(is.na(df$NLL) | is.na(df$NLL_alt)), ]}
df$BIC = 2 * df$NLL + 2 * log(nTrials)
df$BIC_alt = 2 * df$NLL_alt + 3 * log(nTrials)

Now paired t-test over all values

t.test(df$BIC, df$BIC_alt, paired = T)
## 
##  Paired t-test
## 
## data:  df$BIC and df$BIC_alt
## t = -6.1442, df = 999, p-value = 1.159e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -2.78249 -1.43538
## sample estimates:
## mean difference 
##       -2.108935

Now we assess model goodness for each sample size

plottingInfoAll = data.frame(Outcome = rep(c("Incorrect & N.S.", 
                                             "Correct & N.S.", 
                                             "Incorrect & p < 0.05",
                                             "Correct & p < 0.05"), times = 7),
                             Count = 0,
                             N = rep(c(20, 40, 60, 80, 100, 150, 200), each = 4))
sampleSizes = c(20, 40, 60, 80, 100, 150, 200)
for (sampleSize in sampleSizes){
  test = vector('logical', 100000)
  better = vector('logical', 100000)
  for (i in 1:length(test)){
    subsample = df[sample(1:nrow(df), sampleSize, replace = T), ]
    better[i] = sum(subsample$BIC) < sum(subsample$BIC_alt)
    test[i] = t.test(subsample$BIC, subsample$BIC_alt, paired = T)$p.value < 0.05
  }
  plottingInfoAll$Count[which(plottingInfoAll$N == sampleSize & 
                              plottingInfoAll$Outcome == "Incorrect & N.S.")] = sum(better==F & test==F)
  plottingInfoAll$Count[which(plottingInfoAll$N == sampleSize & 
                              plottingInfoAll$Outcome == "Correct & N.S.")] = sum(better==T & test==F)
  plottingInfoAll$Count[which(plottingInfoAll$N == sampleSize & 
                              plottingInfoAll$Outcome == "Incorrect & p < 0.05")] = sum(better==F & test==T)
  plottingInfoAll$Count[which(plottingInfoAll$N == sampleSize & 
                              plottingInfoAll$Outcome == "Correct & p < 0.05")] = sum(better==T & test==T)
}

Now visualize all results

plottingInfoAll$N = factor(plottingInfoAll$N, 
                           levels = c(200, 150, 100, 80, 60, 40, 20))
plottingInfoAll$Outcome = factor(plottingInfoAll$Outcome,
                                 levels = c("Correct & p < 0.05",
                                            "Correct & N.S.", 
                                            "Incorrect & N.S.",
                                            "Incorrect & p < 0.05"))
plottingInfoAll$Proportion = (plottingInfoAll$Count/length(test))

these = which(plottingInfoAll$Outcome == "Correct & p < 0.05")
plottingInfoAll$Position[these] = 1.175

those = which(plottingInfoAll$Outcome == "Correct & N.S.")
plottingInfoAll$Position[those] = 1 - plottingInfoAll$Proportion[these] -
  plottingInfoAll$Proportion[those]/2

plottingInfoAll$Position[those[which(plottingInfoAll$Proportion[these] == 0)]] = 1.175

these = which(plottingInfoAll$Outcome == "Incorrect & p < 0.05")
plottingInfoAll$Position[these] = plottingInfoAll$Proportion[these]/2
those = which(plottingInfoAll$Outcome == "Incorrect & N.S.")
plottingInfoAll$Position[those] = plottingInfoAll$Proportion[these] +
  plottingInfoAll$Proportion[those]/2

final = plottingInfoAll[-which(plottingInfoAll$Proportion == 0),]

ggplot(data = final, aes(x = Proportion, y = N, group = Outcome,
                                   fill = Outcome)) + 
  geom_col() + theme_minimal() + 
  scale_fill_manual(values = c("green", "forestgreen", "firebrick4", "red")) +
  labs(x = "Proportion", y = "Sample Size", 
       fill = "Result",
       label = NULL) + 
  geom_label(aes(label = Proportion, x = Position)) + lims(x = c(-0.025, 1.25))

And save for other uses and verification

save.image(file = "C:/Users/DELL/Documents/integration-simulations/sampleSize.RData")