library("multcomp") library("sandwich") ####################################################### ### Source code for simulations presented in ### A Robust Procedure for Comparing Multiple Means ### under Heterscedasticity in Unbalanced Designs ### by E. Herberich, J. Sikorski, and T. Hothorn ### PLoS ONE 2010 ####################################################### ############################################### # Simulation setup for normally distributed data ############################################### ################################################################################### ### data generating process of group membership for n observations and four groups ### f: parameter controling the relation of group sizes ################################################################################### dgpX <- function(n, f){ n1 <- n + f n2 <- n + f * 2 * n n3 <- n + f * 3 * n n4 <- n + f * 4 * n x1 <- as.factor(sample(rep(1:4, c(n1, n2, n3, n4)), replace = FALSE)) X <- data.frame(X1 = x1) return(X) } ################################################################################# ### data generating process of normally distributed data in four groups ### f: parameter controling the relation of group sizes ### sigma: standard deviation of the errors ################################################################################# dgp_aov <- function(n, f, sigma, beta){ X <- dgpX(n, f) Xm <- model.matrix(~ - 1 + X1, data=X) stopifnot(ncol(Xm) == length(beta)) lp <- Xm %*% beta epsilon <- rnorm(length(X$X1), 0, 1) sigma_i <- sigma[as.numeric(X$X1)] Y <- lp + epsilon * sigma_i data.frame(Y=Y, X) } ################################################################################# ### Fit of an ANOVA model ################################################################################# fit_aov <- function(data) aov(Y ~ X1, data=data) ########################################################################################################################## ### Simulations in the unbalanced ANOVA model ### Comparison of OLS and HC3 covariance estimation for the global test ### Comparison of Tukey-Kramer test and max-t test with HC3 covariance estimation for all pairwise comparisons of groups ### r: variing group effect for calculation of power ########################################################################################################################## sim <- function(nsim, dgp, fit, beta0, r, n, f, sigma, K){ P <- matrix(0, ncol=2, nrow=nsim, byrow=TRUE) P_HC <- matrix(0, ncol=2, nrow=nsim, byrow=TRUE) Pow_Global <- numeric(nsim) Pow_Sim <- matrix(0, ncol=(length(r)), nrow=nsim, byrow=TRUE) Pow_Global_HC <- numeric(nsim) Pow_Sim_HC <- matrix(0, ncol=(length(r)), nrow=nsim, byrow=TRUE) for (i in 1:nsim){ print(i) x <- dgp(n, f, sigma, beta0) mod <- fit(x) mod_F <- aov(Y ~ -1 + X1, data = x) glht0_F <- glht(mod_F, linfct = K, rhs = rep(2,4)) glht1_F <- glht(mod_F, linfct = K, rhs = c((r[1] + 2),2,2,2)) glht0 <- glht(mod, linfct = mcp(X1="Tukey")) glht1 <- glht(mod, linfct = mcp(X1="Tukey"), rhs = r) P[i,1] <- summary(glht0_F, test=Ftest())$test$pvalue P[i,2] <- min(TukeyHSD(mod)$X1[,4]) Pow_Global[i] <- summary(glht1_F, test=Ftest())$test$pvalue Pow_Sim[i,] <- as.numeric(r > TukeyHSD(mod)$X1[,3] | r < TukeyHSD(mod)$X1[,2]) glht0_HC <- glht(mod, linfct = mcp(X1="Tukey"), vcov = vcovHC) glht1_HC <- glht(mod, linfct = mcp(X1="Tukey"), rhs = r, vcov = vcovHC) glht0_F_HC <- glht(mod_F, linfct = K, rhs = rep(2,4), vcov=vcovHC) glht1_F_HC <- glht(mod_F, linfct = K, rhs = c((r[1] + 2),2,2,2), vcov=vcovHC) P_HC[i,1] <- summary(glht0_F_HC, test=Ftest())$test$pvalue P_HC[i,2] <- min(summary(glht0_HC)$test$pvalues) Pow_Global_HC[i] <- summary(glht1_F_HC, test=Ftest())$test$pvalue Pow_Sim_HC[i,] <- summary(glht1_HC)$test$pvalue } Size_Global <- mean(P[,1] <= 0.05) FWER <- mean(P[,2] <= 0.05) Power_Global <- mean(Pow_Global <= 0.05) Power_Sim <- colMeans(Pow_Sim) Size_Global_HC <- mean(P_HC[,1] <= 0.05) FWER_HC <- mean(P_HC[,2] <= 0.05) Power_Global_HC <- mean(Pow_Global_HC <= 0.05) Power_Sim_HC <- colMeans(Pow_Sim_HC <= 0.05) ret <- c(Size_Global, FWER, Power_Global, Power_Sim, Size_Global_HC, FWER_HC, Power_Global_HC, Power_Sim_HC) return(ret) } beta0 <- c(2,2,2,2) b <- c(seq(-2,2,by=0.1)) n <- c(10, 20, 30, 40) design <- expand.grid(b, n) names(design) <- c("b", "N") nsim <- 1000 ############################################################# ### A: Normally distributed data, homogeneneous variances ### ############################################################# results <- matrix(0, nrow=nrow(design), ncol=20, byrow=T) results[,1] <- design[,1] results[,2] <- design[,2] set.seed(11803) for (j in 1:nrow(design)) { print(j) r <- c(design$b[j],design$b[j],design$b[j],0,0,0) n <- design$N[j] f <- 0.2 K <- diag(1,4) sigma <- c(2,2,2,2) results[j,3:20] <- sim(nsim, dgp_aov, fit_aov, beta0, r, n, f, sigma, K) save(results, file = "AOV_hom.Rda") } colnames(results) <- c("r", "n", "Size_Global", "FWER_Tukey", "Power_Global", "H1","H1","H1","H0","H0","H0","Size_Global_HC", "FWER_HC", "Power_Global_HC", "H1_HC","H1_HC","H1_HC","H0_HC","H0_HC","H0_HC") results save(results, file = "AOV_hom.Rda") #################################################################################################################### ### B: Normally distributed data, heterogeneous variances, smaller variances in groups with smaller sample sizes ### #################################################################################################################### results <- matrix(0, nrow=nrow(design), ncol=20, byrow=T) results[,1] <- design[,1] results[,2] <- design[,2] set.seed(21803) for (j in 1:nrow(design)) { print(j) r <- c(design$b[j],design$b[j],design$b[j],0,0,0) n <- design$N[j] f <- 0.2 K <- diag(1,4) sigma <- c(3,5,7,9) results[j,3:20] <- sim(nsim, dgp_aov, fit_aov, beta0, r, n, f, sigma, K) save(results, file = "AOV_het1.Rda") } colnames(results) <- c("r", "n", "Size_Global", "FWER_Tukey", "Power_Global", "H1","H1","H1","H0","H0","H0","Size_Global_HC", "FWER_HC", "Power_Global_HC", "H1_HC","H1_HC","H1_HC","H0_HC","H0_HC","H0_HC") save(results, file = "AOV_het1.Rda") ################################################################################################################### ### C: Normally distributed data, heterogeneous variances, smaller variances in groups with larger sample sizes ### ################################################################################################################### results <- matrix(0, nrow=nrow(design), ncol=20, byrow=T) results[,1] <- design[,1] results[,2] <- design[,2] set.seed(31803) for (j in 1:nrow(design)) { print(j) r <- c(design$b[j],design$b[j],design$b[j],0,0,0) n <- design$N[j] f <- 0.2 K <- diag(1,4) sigma <- c(9,7,5,3) results[j,3:20] <- sim(nsim, dgp_aov, fit_aov, beta0, r, n, f, sigma, K) save(results, file = "AOV_het2.Rda") } colnames(results) <- c("r", "n", "Size_Global", "FWER_Tukey", "Power_Global", "H1","H1","H1","H0","H0","H0","Size_Global_HC", "FWER_HC", "Power_Global_HC", "H1_HC","H1_HC","H1_HC","H0_HC","H0_HC","H0_HC") save(results, file = "AOV_het2.Rda") ############################################### ### Simulation setup for beta distributed data ############################################### ################################################################################### ### data generating process of group membership for n observations and four groups ### f: parameter controling the relation of group sizes ################################################################################### dgpX <- function(n, f){ n1 <- n + f * 1 * n n2 <- n + f * 2 * n n3 <- n + f * 3 * n n4 <- n + f * 4 * n x1 <- as.factor(rep(1:4, c(n1, n2, n3, n4), replace = FALSE)) X <- data.frame(X1 = x1) return(X) } ########################################################################################################################## ### Simulations in the unbalanced ANOVA model ### Comparison of OLS and HC3 covariance estimation for the global test ### Comparison of Tukey-Kramer test and max-t test with HC3 covariance estimation for all pairwise comparisons of groups ########################################################################################################################## sim <- function(nsim, dgp, fit, n, f, K){ P <- matrix(0, ncol=2, nrow=nsim, byrow=TRUE) P_HC <- matrix(0, ncol=2, nrow=nsim, byrow=TRUE) for (i in 1:nsim){ print(i) x <- dgp(n, f) mod <- fit(x) mod_F <- aov(Y ~ -1 + X1, data = x) glht0_F <- glht(mod_F, linfct = K, rhs = rep(1,4)) glht0 <- glht(mod, linfct = mcp(X1="Tukey")) P[i,1] <- summary(glht0_F, test=Ftest())$test$pvalue P[i,2] <- min(TukeyHSD(mod)$X1[,4]) # Tukey HSD minimaler adjustierter p-Wert glht0_HC <- glht(mod, linfct = mcp(X1="Tukey"), vcov = vcovHC) glht0_F_HC <- glht(mod_F, linfct = K, rhs = rep(1,4), vcov=vcovHC) P_HC[i,1] <- summary(glht0_F_HC, test=Ftest())$test$pvalue P_HC[i,2] <- min(summary(glht0_HC)$test$pvalues) } Size_Global <- mean(P[,1] <= 0.05) FWER <- mean(P[,2] <= 0.05) Size_Global_HC <- mean(P_HC[,1] <= 0.05) FWER_HC <- mean(P_HC[,2] <= 0.05) ret <- c(Size_Global, FWER, Size_Global_HC, FWER_HC) return(ret) } ################################################################################################################ ### D: Beta distributed data, heterogeneous variances, smaller variances in groups with smaller sample sizes ### ################################################################################################################ ################################################################################# ### data generating process of beta distributed data in four groups, ### smaller variances in groups with smaller sample sizes ### f: parameter controling the relation of group sizes ### sigma: standard deviation of the errors ################################################################################# dgp_aov1 <- function(n, f){ X <- dgpX(n, f) Xm <- model.matrix(~ - 1 + X1, data=X) n1 <- n + f * 1 * n n2 <- n + f * 2 * n n3 <- n + f * 3 * n n4 <- n + f * 4 * n lp1 <- rep(1.25,n1) + rbeta(n1,6,2) lp2 <- rep(5/3,n2) + rbeta(n2,2,4) lp3 <- rep(1.5,n3) + rbeta(n3,1,1) lp4 <- rep(1.5,n4) + rbeta(n4,0.5,0.5) Y <- c(lp1, lp2, lp3, lp4) data.frame(Y=Y, X) } N <- rep(n,rep(41,4)) results <- matrix(0, nrow=length(N), ncol=5, byrow=T) results[,1] <- N set.seed(11802) for (j in 1:nrow(results)) { print(j) n <- N[j] f <- 0.2 K <- diag(1,4) results[j,2:5] <- sim(nsim, dgp_aov1, fit_aov, n, f, K) save(results, file = "AOV_het1_beta.Rda") } colnames(results) <- c("n", "Size_Global", "FWER_Tukey","Size_Global_HC", "FWER_HC") save(results, file = "AOV_het1_beta.Rda") ############################################################################################################### ### E: Beta distributed data, heterogeneous variances, smaller variances in groups with larger sample sizes ### ############################################################################################################### ################################################################################# ### data generating process of beta distributed data in four groups, ### smaller variances in groups with larger sample sizes ### f: parameter controling the relation of group sizes ### sigma: standard deviation of the errors ################################################################################# dgp_aov2 <- function(n, f){ X <- dgpX(n, f) Xm <- model.matrix(~ - 1 + X1, data=X) n1 <- n + f n2 <- n + f * 2 * n n3 <- n + f * 3 * n n4 <- n + f * 4 * n lp1 <- rep(1.5,n1) + rbeta(n1,0.5,0.5) lp2 <- rep(1.5,n2) + rbeta(n2,1,1) lp3 <- rep(5/3,n3) + rbeta(n3,2,4) lp4 <- rep(1.25,n4) + rbeta(n4,6,2) Y <- c(lp1, lp2, lp3, lp4) data.frame(Y=Y, X) } results <- matrix(0, nrow=length(N), ncol=5, byrow=T) results[,1] <- N set.seed(11803) for (j in 1:length(N)) { print(j) n <- N[j] f <- 0.2 K <- diag(1,4) results[j,2:5] <- sim(nsim, dgp_aov2, fit_aov, n, f, K) save(results, file = "AOV_het2_beta.Rda") } colnames(results) <- c("n", "Size_Global", "FWER_Tukey","Size_Global_HC", "FWER_HC") save(results, file = "AOV_het2_beta.Rda")