for (mort in c(.1)) # 10% true mortality plot(x=seq(from=-.025,to=.975,by=.05), dbinom(x=0:20,20,mort), axes=F, type = "s", xlim=c(-.05,.35), ylim=c(0,.33), xlab=paste('Observed mortality, true mortality ',round(100*mort,0),"%",sep=""), ylab='probability density') axis(side=1,at=c(0,.1,.2,.3),labels=c("0%","10%","20%","30%")) axis(side=2,at=c(0,0.1,.2,.3,.4,.5,.6,.7),labels=c("0%","10%","20%","30%","40%","50%","60%","70%")) text(x=mort,y=.02+ dbinom(x=max(round(mort*20),0),20,mort),labels=paste("n=20")) for (i in c(50,200)) { # add 2 more sample sizes lines(x=seq(from=0-(0.5*1/i),to=1-(0.5*1/i),by=1/i), dbinom(x=0:i,i,mort), type = "s", lty=ifelse(i==50,2,3)) text(x=mort,y=.02+dbinom(x=max(round(mort*i),0),i,mort),labels=paste("n=",i, sep="")) } # end loop n=50,200 } # end loop mort
# Plot 4 situations: no heterogeneity to much heterogeneity # Fig 5.3 first par(mfrow = c(2,2), pty='s', mar=c(3,2,1,1)) # Make data set with 100 centers, each 20 patients, 10% mortality, variability sd 0 to 0.03 for (sdtau in c(0,.01,.02,.03)) { truemort <- rnorm(n=100,mean=0.1,sd=sdtau) mortmat <- as.matrix(cbind(1:100, sapply(truemort,FUN=function(x)rbinom(n=1,20,x))/20,truemort)) plot(x=0, y=0, pch='', xlim=c(-.2,1.2), ylim=c(-.03,.35),axes=F, xlab="", ylab='mortality') axis(side=2,at=c(0,.1,.2,.3),labels=c("0%","10%","20%","30%")) axis(side=1,at=c(0,1),labels=c("Observed","True mortality")) for (i in (1:100)) { lines(x=c(0,1), y=mortmat[i,c(2,3)]) points(x=c(0+runif(1,min=-.07,max=.07),1), y=mortmat[i,c(2,3)], pch="+") } } ## Same, for n=200 per center Fig 5.4 # Plot 4 situations: no heterogeneity to much heterogeneity par(mfrow = c(2,2), pty='s', mar=c(3,2,1,1)) # Make data set with 100 centers, each 200 patients, 10% mortality, variability sd 0 to 0.03 for (sdtau in c(0,.01,.02,.03)) { truemort <- rnorm(n=100,mean=0.1,sd=sdtau) mortmat <- as.matrix(cbind(1:100, sapply(truemort,FUN=function(x)rbinom(n=1,200,x))/200,truemort)) plot(x=0, y=0, pch='', xlim=c(-.2,1.2), ylim=c(-.03,.35),axes=F, xlab="", ylab='mortality') axis(side=2,at=c(0,.1,.2,.3),labels=c("0%","10%","20%","30%")) axis(side=1,at=c(0,1),labels=c("Observed","True mortality")) for (i in (1:100)) { lines(x=c(0,1), y=mortmat[i,c(2,3)]) points(x=c(0+runif(1,min=-.07,max=.07),1), y=mortmat[i,c(2,3)], pch="+") } }
## Under the NULL plot(function(x) dnorm(x, mean=0, sd = 1, log = F), -4, 4, xlab="Estimated effect", ylab = "Density") lines(x=c(0,0), y=c(0,.5), lty=1) x95 <- qnorm(p=(1-0.025), mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE) x995 <- qnorm(p=(1-0.0025), mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE) # Polygon for p<.05 xpol <- seq(x95,4,by=0.01) ypol <- dnorm(x=xpol, mean=0, sd = 1, log = F) xpol <- c(x95,xpol,4) ypol <- c(0,ypol,0) polygon(xpol,ypol, col = "gray") # lower tail xpol <- seq(-x95,-4,by=-0.01) ypol <- dnorm(x=xpol, mean=0, sd = 1, log = F) xpol <- c(-x95,xpol,-4) ypol <- c(0,ypol,0) polygon(xpol,ypol, col = "gray") ############## ## mean = 1 ## range -3, 5 plot(function(x) dnorm(x, mean=1, sd = 1, log = F), -3, 5, xlab="Estimated effect", ylab = "Density") lines(x=c(1,1), y=c(0,.5), lty=2) x95 <- qnorm(p=(1-0.025), mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE) x995 <- qnorm(p=(1-0.0025), mean = 0, sd = 1, lower.tail = TRUE, log.p = FALSE) # Polygon for p<.05 xpol <- seq(x95,5,by=0.01) ypol <- dnorm(x=xpol, mean=1, sd = 1, log = F) xpol <- c(x95,xpol,5) ypol <- c(0,ypol,0) polygon(xpol,ypol, col = "gray") # lower tail xpol <- seq(-x95,-3,by=-0.01) ypol <- dnorm(x=xpol, mean=1, sd = 1, log = F) xpol <- c(-x95,xpol,-3) ypol <- c(0,ypol,0) polygon(xpol,ypol, col = "gray")
# Illustrate model performance development / test / external: Fig 5.6 library(rms) # create x vars n <- 100000 x1 <- rnorm(n,sd=1) x2 <- rnorm(n,sd=.9) x3 <- rnorm(n,sd=.8) x4 <- rnorm(n,sd=.7) x5 <- rnorm(n,sd=.6) x6 <- rnorm(n,sd=.5) x7 <- rnorm(n,sd=.4) x8 <- rnorm(n,sd=.3) x9 <- rnorm(n,sd=.2) x10 <- rnorm(n,sd=.1) y <- x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 +rnorm(n,sd=3) # y in developement data y2 <- .5*x1 + 1.5*x2 + .5*x3 + 1.5*x4 + 0.5*x5 + 1.5*x6 + 0.5*x7 + 1.5*x8 + 0.5*x9 + 1.5*x10 +rnorm(n,sd=3) datax <- as.data.frame(cbind(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,y,y2)) datax[1:5,] describe(y) describe(y2) colnames(datax) <- Cs(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,Y,Y2) # gold standard fits ols(y~x1) ols(y~x1 + x2 + x3 + x4 + x5 + x6 + x6 + x7 + x8 + x9 + x9 + x10) ols(y2~x1 + x2 + x3 + x4 + x5 + x6 + x6 + x7 + x8 + x9 + x9 + x10) # function for MSE MSE <- function(y, fit,newdata) {mean((y-predict(fit,newdata=newdata))^2)} ####################### ## Start simulations ## ####################### sim <- 1000 nstudy <- 50 resultsmat <- matrix(nrow=sim,ncol=45) for (i in 1:sim) { cat("\nSimulation #", i) # create x vars n <- 10000 x1 <- rnorm(n,sd=1) x2 <- rnorm(n,sd=.9) x3 <- rnorm(n,sd=.8) x4 <- rnorm(n,sd=.7) x5 <- rnorm(n,sd=.6) x6 <- rnorm(n,sd=.5) x7 <- rnorm(n,sd=.4) x8 <- rnorm(n,sd=.3) x9 <- rnorm(n,sd=.2) x10 <- rnorm(n,sd=.1) y <- x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 +rnorm(n,sd=3) y2 <- .5*x1 + 1.5*x2 + .5*x3 + 1.5*x4 + 0.5*x5 + 1.5*x6 + 0.5*x7 + 1.5*x8 + 0.5*x9 + 1.5*x10 +rnorm(n,sd=3) datax <- as.data.frame(cbind(x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,y,y2)) colnames(datax) <- Cs(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,Y,Y2) j <- sample(x=1:n, size=nstudy) dataj <- datax[j,] datamj <- datax[-j,] Yj <- datax$Y[j] Ymj <- datax$Y[-j] Y2mj <- datax$Y2[-j] fit1 <- ols(Y~X1, data=dataj) fit2 <- ols(Y~X1 + X2, data=dataj) fit3 <- ols(Y~X1 + X2 + X3, data=dataj) fit4 <- ols(Y~X1 + X2 + X3 + X4, data=dataj) fit5 <- ols(Y~X1 + X2 + X3 + X4 + X5, data=dataj) fit6 <- ols(Y~X1 + X2 + X3 + X4 + X5 + X6, data=dataj) fit7 <- ols(Y~X1 + X2 + X3 + X4 + X5 + X6 + X7, data=dataj) fit8 <- ols(Y~X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data=dataj) fit9 <- ols(Y~X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 , data=dataj) fit10 <- ols(Y~X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, data=dataj) # coefs in matrix resultsmat[i,1:15] <- c(coef(fit5)[2:6],coef(fit10)[2:11]) # test MSE resultsmat[i,16:25] <- c(MSE(y=Ymj, fit=fit1, newdata=datamj), MSE(y=Ymj, fit=fit2, newdata=datamj), MSE(y=Ymj, fit=fit3, newdata=datamj), MSE(y=Ymj, fit=fit4, newdata=datamj), MSE(y=Ymj, fit=fit5, newdata=datamj), MSE(y=Ymj, fit=fit6, newdata=datamj), MSE(y=Ymj, fit=fit7, newdata=datamj), MSE(y=Ymj, fit=fit8, newdata=datamj), MSE(y=Ymj, fit=fit9, newdata=datamj), MSE(y=Ymj, fit=fit10, newdata=datamj)) # external validation resultsmat[i,26:35] <- c(MSE(y=Y2mj, fit=fit1, newdata=datamj), MSE(y=Y2mj, fit=fit2, newdata=datamj), MSE(y=Y2mj, fit=fit3, newdata=datamj), MSE(y=Y2mj, fit=fit4, newdata=datamj), MSE(y=Y2mj, fit=fit5, newdata=datamj), MSE(y=Y2mj, fit=fit6, newdata=datamj), MSE(y=Y2mj, fit=fit7, newdata=datamj), MSE(y=Y2mj, fit=fit8, newdata=datamj), MSE(y=Y2mj, fit=fit9, newdata=datamj), MSE(y=Y2mj, fit=fit10, newdata=datamj)) # apparent validation resultsmat[i,36:45] <- c(MSE(y=Yj, fit=fit1, newdata=dataj), MSE(y=Yj, fit=fit2, newdata=dataj), MSE(y=Yj, fit=fit3, newdata=dataj), MSE(y=Yj, fit=fit4, newdata=dataj), MSE(y=Yj, fit=fit5, newdata=dataj), MSE(y=Yj, fit=fit6, newdata=dataj), MSE(y=Yj, fit=fit7, newdata=dataj), MSE(y=Yj, fit=fit8, newdata=dataj), MSE(y=Yj, fit=fit9, newdata=dataj), MSE(y=Yj, fit=fit10, newdata=dataj)) cat(".. end") } # end simulations sim ## With n=50 resultsmatn50 <- resultsmat resultsn50 <- apply(resultsmatn50,2,mean) perfmat <- matrix(resultsn50[16:45],nrow=10,ncol=3) plot(spline(x=1:10,y=perfmat[,3]),lty=1,lwd=1, xlim=c(0.5,10.5),ylim=c(5,15), type="l", xlab="Number of predictors", ylab="Mean squared error",cex.lab=1.2) # Apparent lines(spline(x=1:10,y=perfmat[,1]),lty=2,lwd=2) # Internal lines(smooth.spline(x=1:10,y=perfmat[,2]),lty=3,lwd=3) # External text(x=c(7,7,7),y=c(8.4,10.4,12.5),label=Cs(Apparent, Internal, External)) \\