# 13. Modern estimation methods

## Shrinkage calculations: RStudio site

A nice example of the R code plus results with the a GUSTO-I sample is presented here.

## Shrinkage calculations: R code with glmnet package

The R code to calculate shrunk regression coefficients for the GUSTO example is as follows:

```# Small program, illustrating shrinkage techniques in logistic models for gusto subsample
# Oct 2018: use glmnet

library(foreign)
library(rms)

# Import sample5
sample5 <- as.data.frame(read.spss('/Users/ewsteyerberg/Documents/Externe harde schijf/Book/SPSS/sample5.sav'))
sample4 <- as.data.frame(read.spss('/Users/ewsteyerberg/Documents/Externe harde schijf/Book/SPSS/sample4.sav'))

# gustos  <- sample5
gustos  <- sample4  # choose one of the samples for illustration
describe(gustos)

# Fit a full model in gustos; 8 / 17 predictors
full <- lrm(DAY30~SHO+A65+ANT+DIA+HYP+HRT+TTR+SEX+PMI+HEI+WEI+HTN+SMK+LIP+PAN+FAM+ST4,
data=gustos,x=T,y=T,linear.predictors=F)
full8 <- lrm(DAY30~SHO+A65+HIG+DIA+HYP+HRT+TTR+SEX, data=gustos,x=T,y=T,linear.predictors=T)
val.full <- validate(full, B=500)     # 500 replications for bootstrap
val.full8 <- validate(full8, B=500)   # 500 replications for bootstrap
full
full8

# Make shrunk models
full8.shrunk <- full8
full8.shrunk\$coef <- val.full8[4,5] * full8.shrunk\$coef  # use result from bootstrapping
shrinkage8 <- (full8\$stats[3]-full8\$stats[4])/full8\$stats[3]
full8.shrunk\$coef <- shrinkage8 * full8\$coef
# val.full[4,5] is shrinkage factor; heuristic estimate (LR - df) / LR = (62.6-8)/62.6=0.87

# Estimate new intercept, with shrunk lp as offset variable, i.e. coef fixed at unity
full8.shrunk\$coef[1] <- lrm.fit(y=full8\$y, offset= full8\$x %*% full8.shrunk\$coef[2:9])\$coef[1]
full8.shrunk\$coef /full8\$coef
signif(full8.shrunk\$coef,2)
full8.shrunk

# Make a penalized model
p8	<- pentrace(full8, c(0,1,2,3,4,5,6,7,8,10,12,14,20, 24, 32,40))
p	<- pentrace(full, c(0,2,4,6,8,10,12,14,16,18,20,22, 24, 28, 32,40))
## Fig 13.1 ##
plot(x=p8\$results.all[,1],y=p8\$results.all[,5],type="l",xlim=c(0,40), ylim=c(40,48),
xlab="Penalty", ylab="AIC", cex.lab=1.2, col = Cs(darkgreen) )
lines(x=p\$results.all[,1],y=p\$results.all[,5], lty=2, col = Cs(darkred))
text(x=20,y=47,"8 predictors", adj=0, col = Cs(darkgreen))
text(x=15,y=44,"17 predictors", adj=0, col = Cs(darkred))

###############################################################################
### Now use glmnet for all forms of penalization: L1, L2, L1+L2 (elastic net) #
library(glmnet)

# ridge regression: L2
# Find the best lambda using cross-validation
set.seed(123)
cv <- cv.glmnet(x=full8\$x, y=full8\$y, alpha = 0, family=c("binomial"))
# Display the best lambda value
cv\$lambda.min # 0.01
# Fit the model on the training data
model8.L2 <- glmnet(x=full8\$x, y=full8\$y, alpha = 0, lambda = cv\$lambda.min, family=c("binomial"))
# Display regression coefficients
coef(model8.L2) # this is identical to what rms produces with pentrace
# end ridge

# lasso regression: L1
# Find the best lambda using cross-validation
set.seed(123)
cv <- cv.glmnet(x=full8\$x, y=full8\$y, alpha = 1, family=c("binomial"))
# Display the best lambda value
cv\$lambda.min # 0.0026
# Fit the model on the training data
model8.L1 <- glmnet(x=full8\$x, y=full8\$y, alpha = 1, lambda = cv\$lambda.min, family=c("binomial"))
# plot(x=glmnet(x=full8\$x, y=full8\$y, alpha = 1, family=c("binomial")), xvar = c("la"))

# Display regression coefficients
coef(model8.L1) # this is very similar to what glmpath produced
# end lasso
####

# elastic net regression
## Use alpha / lambda optimization
library("glmnetUtils")
set.seed(10)
cvL1L2 <- cva.glmnet(x=full\$x, y=full8\$y, alpha = seq(0, 1, len = 11)^3, family="binomial", nfolds = 10)
minlossplot(cvL1L2, cv.type='min')  # best alpha = 0, so L2 penalty
## end Elastic Net  ##

## Firth regression ##
library(brglm)
# Default: bias-reducing modified scores
model8.Firth <- brglm(DAY30~SHO+A65+HIG+DIA+HYP+HRT+TTR+SEX, data=gustos, family=binomial, pl=F)
# Set pl=T for Jeffreys invariant prior
model8.Firth <- brglm(DAY30~SHO+A65+HIG+DIA+HYP+HRT+TTR+SEX, data=gustos, family=binomial, pl=T)
## Identical results

# Inspect coefs
coef(model8.Firth)
coef(model8.Firth) / coef(full8) # effective shrinkage quite limited

###################################
## Plot densities of predictions ##
###################################
# ML fit
plot(density(full8\$linear.predictor, bw=.4), main="", ylim=c(0,.4), xlim=c(-6.1,2), lwd=2,
xlab="Linear predictor", cex.lab=1.2)

lpshrunk  <- full8\$x  %*% full8.shrunk\$coef[2:9] + full8.shrunk\$coef[1]
mean(plogis(lpshrunk)) # shrinkage
lines(density(lpshrunk, bw=.4), lty=2, lwd=2)

lpL2  <- as.numeric(full8\$x  %*% model8.L2\$beta + model8.L2\$a0)
mean(plogis(lpL2)) # ridge
lines(density(lpL2, bw=.4), lty=3, lwd=2)

lpFirth  <- predict(model8.Firth)
lines(density(lpFirth, bw=.4), lty=4, lwd=2)

lpL1  <- as.numeric(full8\$x  %*% model8.L1\$beta + model8.L1\$a0)
mean(plogis(lpL1)) # Lasso
lines(density(lpL1, bw=.4), lty=5, lwd=2)

legend(-2.4, .4, c("Standard ML", "Uniform shrinkage", "Ridge","Firth", "Lasso"),
bty="n", lty = c(1:5), lwd=2)

## End plot densities of predictions ##
#######################################

## Effective shrinkage by each method ##
Cal.slope <- c(coef(lrm(full8\$y~full8\$linear.predictor))[2],
coef(lrm(full8\$y~lpshrunk))[2],
coef(lrm(full8\$y~lpL2))[2],
coef(lrm(full8\$y~lpFirth))[2],
coef(lrm(full8\$y~lpL1))[2]) # 1.00, 1.14, 1.14, 1.02, 1.10
round(1/Cal.slope,2)
# full8 lpshrunk[1]       lpL2     lpFirth        lpL1
# 1.00        0.87        0.87        0.98        0.91

#####################################
##### May repeat for 17 var model ###
#####################################
```

## Fig 13.1

# Make a penalized model

```p8	<- pentrace(full8, c(0,1,2,3,4,5,6,7,8,10,12,14,20, 24, 32,40))
p	<- pentrace(full, c(0,2,4,6,8,10,12,14,16,18,20,22, 24, 28, 32,40))
p8
p
plot(x=p8\$results.all[,1],y=p8\$results.all[,5],type="l",xlim=c(0,40), ylim=c(40,48),
xlab="Penalty", ylab="AIC", cex.lab=1.2, col = Cs(darkgreen) )
lines(x=p\$results.all[,1],y=p\$results.all[,5], lty=2, col = Cs(darkred))
text(x=20,y=47,"8 predictors", adj=0, col = Cs(darkgreen))
text(x=15,y=44,"17 predictors", adj=0, col = Cs(darkred))
```

## Fig 13.2

################################################# #### Check penalty in bootstrap ### #################################################

```## Bootstrap procedure to find optimal penalty ##
Pen.boot  <- function(fit, dataset, pen=c(0,2,4,6,8,12,16,20,24,30,36,44), B=40) {
nrowB	<- nrow(fit\$x)
nrowmat <- B*length(pen)
# Matrix for results
matB <- matrix(NA,nrow=nrowmat,ncol=4)
dimnames(matB) <- list(1:nrowmat, Cs(Penalty, R2test, Ctest, Slopetest))

# Start loop
for (i in 1:B) {
cat("Start Bootstrap sample nr", i, "\n")
Brows <- sample(nrowB,replace=T)
dataBoot  <- dataset[Brows,]
j <- (i-1)*length(pen)
for (p in pen) {
j <- j+1
fit.penB <- update(fit, data=dataBoot, penalty=p, maxit=99)  # fit in Boot sample
matB[j,1] <- p
# lp in test
lp  <- fit\$x  %*% coef(fit.penB)[-1]       # performance in original sample: test
matB[j,2:4]  <- val.prob(y=fit\$y, logit=lp, pl=F, smooth=F, logistic.cal=F)[c(3,2,13)]
} # end loop over penalties
} # End loop over B
cat("\n\nEnd over bootstraps\n\n")
return(matB)
} # end function Pen.boot

## Search penalty in B
set.seed(1)
sample4.pen17 <- Pen.boot(fit=full, dataset=gustos, B=200, pen=seq(0,60, by=6))
Pen17 <- aggregate(sample4.pen17[,2:4], list(Pen = sample4.pen17[,1]), function(x)mean(x,trim=0.95))
Pen17

set.seed(1)
sample4.pen8 <- Pen.boot(fit=full8, dataset=gustos, B=500, pen=c(0,4,6,8,10,12,16,20,36))
Pen8 <- aggregate(sample4.pen8[,2:4], list(Pen = sample4.pen8[,1]), function(x)mean(x,trim=0.95))
Pen8

### Plot of bootstrapped penalties vs slope, C stat, R2
plot(x=Pen17[,1], y=Pen17[,4], type="l", xlim=c(0,37),ylim=c(.7,1.42),
xlab="Penalty", ylab="Slope of linear predictor", cex.lab=1.2, col = Cs(darkred))
lines(x=Pen8[,1], y=Pen8[,4], type="l", lty=2, col = Cs(darkgreen))
abline(a=1, b=0)
text(x=c(36,36), y=c(Pen17[Pen17[,1]==36,4],Pen8[nrow(Pen8),4])-.02, label=c("17", "8"), col = Cs(darkred, darkgreen), adj=0)

# C stat
plot(x=Pen17[,1], y=Pen17[,3], type="l", xlim=c(0,37),ylim=c(.76,.8),
xlab="Penalty", ylab="C statistic", cex.lab=1.2, col = Cs(darkred))
lines(x=Pen8[,1], y=Pen8[,3], type="l", lty=2, col = Cs(darkgreen))
text(x=c(36,36), y=c(Pen17[nrow(Pen17),3],Pen8[nrow(Pen8),3])-.002, label=c("17", "8"), col = Cs(darkred, darkgreen), adj=0)
```

## End ##

rcode_and_data/chapter13.txt · Last modified: 2018/11/04 10:37 by ewsteyerberg