Logistic Regression using optim() give "L-BFGS-B" error, please help

#1
Dear R Users/Experts,

I am using a function called logitreg() originally described in MASS (the book 4th Ed.) by Venebles & Ripley, p445. I used the code as provided but made couple of changes to run a 'constrained' logistic regression, I set the method = "L-BFGS-B", set lower/upper values for the variables.

Here is the function,

logitregVR <- function(x, y, wt = rep(1, length(y)), intercept = T, start = rep(0, p), ...)
{
#-------------------------------------------#
# A function to be minimized (or maximized) #
#-------------------------------------------#
fmin <- function(beta, X, y, w)
{
p <- plogis(X %*% beta)
#p <- ifelse(1-p < 1e-6, 1-1e-6, p)
cat("----------", "\n")
cat(paste("X = "), X, "\n")
cat(paste("length(X) ="), length(X), "\n")
cat(paste("beta = "), beta, "\n")
cat(paste("p = "), p, "\n")
cat(paste("1-p = "), 1-p, "\n")
cat(paste("y = "), y, "\n")
cat(paste("length(p) ="), length(p), "\n")
cat(paste("class(p) ="), class(p), "\n")
cat("----------", "\n")

-sum(2*w*ifelse(y, log(p), log(1-p))) # Residual Deviance: D = -2[Likelihood Ratio]
print(-sum(2*w*ifelse(y, log(p), log(1-p))))
}

#----------------------------------------------------------------------#
# A function to return the gradient for the "BFGS", "L-BFGS-B" methods #
#----------------------------------------------------------------------#
gmin <- function(beta, X, y, w)
{
eta <- X %*% beta; p <- plogis(eta)
#p <- ifelse(1-p < 1e-6, 1-1e-6, p)
-2*matrix(w*dlogis(eta)*ifelse(y, 1/p, -1/(1-p)), 1) %*% X # Gradient
print(-2*matrix(w*dlogis(eta)*ifelse(y, 1/p, -1/(1-p)), 1) %*% X)
}

if(is.null(dim(x))) dim(x) <- c(length(x), 1)
dn <- dimnames(x)[[2]]
if(!length(dn)) dn <- paste("Slope", 1:ncol(x), sep="")
p <- ncol(x) + intercept # 1 + 1
if(intercept) {x <- cbind(1, x); dn <- c("(Intercept)", dn)}
if(is.factor(y)) y <- (unclass(y) != 1)

#fit <- optim(start, fmin, gmin, X = x, y = y, w = wt, method = "L-BFGS-B", lower = c(-Inf, 0.05), upper = c(Inf, Inf), ...)
fit <- optim(start, fmin, gmin, X = x, y = y, w = wt, control = list(trace = 6), method = "L-BFGS-B", lower = c(-Inf, 0.05), upper = c(Inf, Inf), ...)

names(fit$par) <- dn
cat("\nCoefficients:\n"); print(fit$par)
cat("\nResidual Deviance:", format(fit$value), "\n")
if(fit$convergence > 0) cat("\nConvergence code:", fit$convergence, "\n")
return(list(fitpar = fit$par))
invisible(fit)
}


And here is my data,

# ----------------------
# Dose 0 1 emp.prop
# ----------------------
# 1 3 0 0.000
# 2 1 0 0.000
# 3.3 4 0 0.000
# 5.016 3 0 0.000
# 7.0224 4 0 0.000
# 9.3398 2 0 0.000
# 12.4219 2 0 0.000
# 16.5211 47 1 0.021
# 21.9731 1 1 0.500
# ----------------------

I have used the glm(family = binomial) and the lrm() function. But my interest is to constraint the slope parameter (beta) to some specified value (say, a non negative number, 0.05 for instance). If you notice in the above code by Venebles and Ripley, for the optim() part I chose method = "L-BFGS-B" and set lower and upper values for my two parameters. Here is the code and the error I am getting,

y <- c(rep(0,3), rep(0,1), rep(0,4), rep(0,3), rep(0,4), rep(0,2), rep(0,2), rep(0,47), rep(1,1), rep(0,1), rep(1,1))
x <- c(rep(1,3), rep(2,1), rep(3.3,4), rep(5.016,3), rep(7.0224,4), rep(9.3398,2), rep(12.4219,2), rep(16.5211,48), rep(21.9731,2))

length(x)
length(y)

#------------------#
# Method 1 - Works #
#------------------#
glm(y~x, family = binomial)$coefficients

# summary(glm(y~x, family = binomial))

#------------------#
# Method 2 - Works #
#------------------#
library(Design)
lrm(y ~ x)$coef

#-------------------#
# Method 3 - Works #
#-------------------#
lgtreg <- function(beta, x, y)
{
eta <- exp(beta[1] + beta[2]*x)
p <- eta/(1+eta)
return(-sum(ifelse(y,log(p),log(1-p)))) # This is the log-likelihood
}
optim(c(0,0), lgtreg, x = x, y = y, method = "BFGS", hessian = TRUE)$par

#--------------------------------#
# Method 4 - Veneble and Ripley's #
#--------------------------------#
logitregVR(x, y) # No error
logitregVR(x, y, start = rep(0.1, 2)) # Gives error
logitregVR(x, y, start = rep(0.5, 2)) # No error
logitregVR(x, y, start = rep(1, 2)) # Gives error


I am getting this error,

Error in optim(start, fmin, gmin, X = x, y = y, w = wt, method = "L-BFGS-B", : L-BFGS-B needs finite values of 'fn'
I am really not sure whats causing this error, can someone please help me. Any comments/replies highly appreciated,

Thank you very much,
Ashky
 
Last edited:
#2
The problem is caused by p = 1 and then log(1-p) reporting -Inf. A quick and effective hack to avoid this problem is the following code:

#source("logitreg.R")
#
# direct maximum likelihood of logistic regression as Venables & Ripley
# More Applied Statisitcs with S 2002 p 445
# See also ashokkrish on 09-29-2008 http://www.talkstats.com/showthread.php/
# 5717-Logistic-Regression-using-optim()-give-quot-L-BFGS-B-quot-error-please-help
#
# Version 1.0, 30 September 2014 by Tjeerd Dijkstra
# Tested with R 3.1 and JAGS 3.3 under OS X 10.9.5 on an i7-2635QM@2GHz

logitreg <- function(X, y, wt=rep(1, length(y)), intercept=TRUE,
init.par=rep(0, p), optim.method="BFGS", ...)
{ # residual deviance D = -2 [Likelihood Ratio]
fmin <- function(beta, X, y, w) {
p <- plogis(X %*% beta)
# improvement over Venables and Ripley by avoiding 1
# note that 1-plogis(37) = 0, whereas plogis(-710) = 0
p <- ifelse(p >= (1-.Machine$double.eps), (1-.Machine$double.eps), p)
return(-sum(2*w*ifelse(y, log(p), log(1-p))))
}
# gradient of residual deviance
gmin <- function(beta, X, y, w) {
eta <- X %*% beta; p <- plogis(eta)
p <- ifelse(p >= (1-.Machine$double.eps), (1-.Machine$double.eps), p)
return(-2*matrix(w*dlogis(eta)*ifelse(y, 1/p, -1/(1-p)), 1) %*% X)
}

if (is.null(dim(X))) dim(X) <- c(length(X), 1)
dn <- dimnames(X)[[2]]
if(!length(dn)) dn <- paste("Slope", 1:ncol(X), sep="")

p <- ncol(X) + intercept
if (intercept) {X <- cbind(1, X); dn <- c("(Intercept)", dn)}
if (is.factor(y)) y <- (unclass(y) != 1)

fit <- optim(init.par, fmin, gmin, X=as.matrix(X), y=y, w=wt, method=optim.method, ...)
names(fit$par) <- dn

cat("\nlogitreg coefficients:\n"); print(fit$par)
cat("residual deviance:", format(fit$value), "\n\n")
if (fit$convergence > 0) cat("\nconvergence code:", fit$convergence, "\n")

return(list(coefficients=fit$par, deviance=fit$value,
fitted=plogis(as.matrix(X) %*% fit$par)))
}

y <- c(rep(0,3), rep(0,1), rep(0,4), rep(0,3), rep(0,4), rep(0,2),
rep(0,2), rep(0,47), rep(1,1), rep(0,1), rep(1,1))
x <- c(rep(1,3), rep(2,1), rep(3.3,4), rep(5.016,3), rep(7.0224,4),
rep(9.3398,2), rep(12.4219,2), rep(16.5211,48), rep(21.9731,2))

# No error in original Venables and Ripley code and no error here
logitreg(x, y, init.par=rep(0.0, 2), optim.method="L-BFGS-B", lower=c(-Inf, 0.05), upper=c(Inf, Inf))
# Gives error in original Venables and Ripley code and no error here
logitreg(x, y, init.par=rep(0.1, 2), optim.method="L-BFGS-B", lower=c(-Inf, 0.05), upper=c(Inf, Inf))
# No error in original Venables and Ripley code and no error here
logitreg(x, y, init.par=rep(0.5, 2), optim.method="L-BFGS-B", lower=c(-Inf, 0.05), upper=c(Inf, Inf))
# Gives error in original Venables and Ripley code and no error here
logitreg(x, y, init.par=rep(1.0, 2), optim.method="L-BFGS-B", lower=c(-Inf, 0.05), upper=c(Inf, Inf))