naplot2 function in Word; it has minor improvements over naplot in rms
################################### # Ewout Steyerberg, March 8, 2015 # # Missing values: illustrate MCAR, MAR, MNAR mechanism # Use simple linear models ################################### library(rms) # Harrell's library with many useful functions # library(mice) # source('C:/Program Files/R/R-2.3.1/library/Hmisc/R/new.s') ######################### set.seed(1) # For identical results at repetition n <- 1000 # arbitrary sample size x2 <- rnorm(n=n, mean=0, sd=1) # x2 standard normal x1 <- rnorm(n=n, mean=0, sd=1) # Uncorrelated x1 # x1 <- sqrt(.5) * x2 + rnorm(n=n, mean=0, sd=sqrt(1-.5)) # x2 correlated with x1 y1 <- 1 * x1 + 1 * x2 + rnorm(n=n, mean=0, sd=sqrt(1-0)) # generate y # var of y1 larger with correlated x1 - x2 plot(x=x1,y=x2,pch='.',xlim=c(-4,4),ylim=c(-4,4),ps=.1) abline(ols(x2~x1)) # Make approx half missing # MCAR, MAR and MNAR mechanisms x1MCAR <- ifelse(runif(n) < .5, x1, NA) # MCAR mechanism for 50% of x1 x1MARx <- ifelse(rnorm(n=n,sd=.8) < x2, x1, NA) # MAR on x2, R2 50%, 50% missing (since mean x2==0) x1MARy <- ifelse(rnorm(n=n,sd=(sqrt(3)*.8)) >y1, x1, NA) # MAR on y, R2 50%, 50% missing (since mean y1==0) x1MNAR <- ifelse(rnorm(n=n,sd=.8) < x1, x1, NA) # MNAR on x1, R2 50%, 50% missing (since mean x1==0) # MCAR plot(x=x1,y=x2,pch='.',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # Orig abline(ols(x2~x1), lty=2) points(x=x1MCAR,y=x2,pch='o',ps=.1) # MCAR abline(ols(x2~x1MCAR), lty=1, lwd=2) title('MCAR') # x1MARx plot(x=x1,y=x2,pch='.',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # Orig abline(ols(x2~x1), lty=2) points(x=x1MARx,y=x2,pch='o',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # MCAR abline(ols(x2~x1MARx), lty=1, lwd=2) title('x1 MAR on x2') # x1MARy plot(x=x1,y=x2,pch='.',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # Orig abline(ols(x2~x1), lty=2) points(x=x1MARy,y=x2,pch='o',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # MCAR abline(ols(x2~x1MARy), lty=1, lwd=2) title('x1 MAR on y') # x1MNAR plot(x=x1,y=x2,pch='.',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # Orig abline(ols(x2~x1), lty=2) points(x=x1MNAR,y=x2,pch='o',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # MCAR abline(ols(x2~x1MNAR), lty=1, lwd=2) title('x1 MNAR') ################# ## CC analysis ## # MCAR plot(x=x1,y=y1,pch='.',xlim=c(-4,4),ylim=c(-6,6),ps=.1) # Orig abline(ols(y1~x1), lty=2) points(x=x1MCAR,y=y1,pch='o',ps=.1) # MCAR abline(ols(y1~x1MCAR), lty=1, lwd=2) title('MCAR') # x1MARx plot(x=x1,y=y1,pch='.',xlim=c(-4,4),ylim=c(-6,6),ps=.1) # Orig abline(ols(y1~x1), lty=2) points(x=x1MARx,y=y1,pch='o',ps=.1) # MCAR abline(ols(y1~x1MARx), lty=1, lwd=2) title('x1 MAR on x2') # x1MARy plot(x=x1,y=y1,pch='.',xlim=c(-4,4),ylim=c(-6,6),ps=.1) # Orig abline(ols(y1~x1), lty=2) points(x=x1MARy,y=y1,pch='o',ps=.1) # MCAR abline(ols(y1~x1MARy), lty=1, lwd=2) title('x1 MAR on y') # x1MNAR plot(x=x1,y=y1,pch='.',xlim=c(-4,4),ylim=c(-6,6),ps=.1) # Orig abline(ols(y1~x1), lty=2) points(x=x1MNAR,y=y1,pch='o',ps=.1) # MCAR abline(ols(y1~x1MNAR), lty=1, lwd=2) title('x1 MNAR') ############# # Compare fits in the various selections fx1.CC <- ols(y1~x1+x2) fx1MARx.CC <- ols(y1~x1MARx+x2) fx1MARy.CC <- ols(y1~x1MARy+x2) fx1MNAR.CC <- ols(y1~x1MNAR+x2) # Regression coefficients print(rbind(coef(fx1.CC), coef(fx1MARx.CC), coef(fx1MARy.CC), coef(fx1MNAR.CC)), digits=2) # Imputation; make data sets with different types of missings d <- as.data.frame(cbind(y1,x1,x2,x1MCAR, x1MARx,x1MARy,x1MNAR)) dMCAR <- d[,c(1,3,4)] # data set with x1 missings according to MCAR dMARx <- d[,c(1,3,5)] # x1 MAR on x2 dMARy <- d[,c(1,3,6)] # x1 MAR on y dMNAR <- d[,c(1,3,7)] # x1 MNAR at x1 ## MI using the aregImpute function from rms ## help(areImpute) for more info g <- aregImpute(~ y1+ x1MCAR + x2, n.impute=5, data=d, pr=F, type='pmm') ## fit models per imputed set and combine results using Rubin's rules ## Use fit.mult.impute() function from rms fx1MCAR.MI <- fit.mult.impute(y1 ~ x1MCAR + x2, ols, xtrans=g, data=d, pr=F) g <- aregImpute(~ y1+ x1MARx + x2, n.impute=5, data=d, pr=F, type='pmm') fx1MARx.MI <- fit.mult.impute(y1 ~ x1MARx + x2, ols, xtrans=g, data=d, pr=F) ## areg g <- aregImpute(~ y1+ x1MARy + x2, n.impute=5, data=d, pr=F, type='pmm') fx1MARy.MI <- fit.mult.impute(y1 ~ x1MARy + x2, ols, xtrans=g, data=d, pr=F) ## areg g <- aregImpute(~ y1+ x1MNAR + x2, n.impute=5, data=d, pr=F, type='pmm') fx1MNAR.MI <- fit.mult.impute(y1 ~ x1MNAR + x2, ols, xtrans=g, data=d, pr=F) ## areg # Regression coefficients after MI with aregImpute print(rbind(coef(fx1MCAR.MI), coef(fx1MARx.MI), coef(fx1MARy.MI), coef(fx1MNAR.MI)), digits=3) #### End illustration missing values and imputation ####