csminwel <- function(fcn,x0,H0,...,grad=NULL,crit=1e-7,nit,Verbose=TRUE,Long=FALSE) { ### fcn: the objective function to be minimized ### x0: initial value of the parameter vector ### H0: initial value for the inverse Hessian. Must be positive definite. ### grad: A function that calculates the gradient, or the null matrix, or a numerical ### vector. If it's not a function, numerical gradients are used. If it's numerical, the ### first iteration uses the numerical vector as the initial gradient. This is useful when the algorithm ### is restarted and the gradient calculation is slow, since the program stores the final gradient when it quits. ### If it's a function, it must return a list, with elements named g and badg. g is the gradient and badg ### is logical, TRUE if the gradient routine ran into trouble so its return value should not be used. ### crit: Convergence criterion. Iteration will cease when it proves impossible to improve the ### function value by more than crit. ### nit: Maximum number of iterations. ### ...: A list of optional length of additional parameters that get handed off to fcn each ### time it is called. ### Note that if the program ends abnormally, it is possible to retrieve the current x, ### f, and H from the files g1.mat and H.mat that are written at each iteration and at each ### hessian update, respectively. (When the routine hits certain kinds of difficulty, it ### write g2.mat and g3.mat as well. If all were written at about the same time, any of them ### may be a decent starting point. One can also start from the one with best function value.) dots <- list(...) # (need this to save these arguments in case of cliffs) nx <- length(x0) NumGrad <- !is.function(grad) done <- 0 itct <- 0 fcount <- 0 snit <- 100 badg1 <- badg2 <- badg3 <- FALSE f0 <- fcn(x0,...) ## browser() if( f0 > 1e50 ) { stop("Bad initial parameter.") } if( NumGrad ) { if( !is.numeric(grad) ) { gbadg <- numgrad(fcn,x0,...) g <- gbadg$g badg <- gbadg$badg } else { badg <- any(grad==0) g <- grad } } else { gbadg <- grad(x0,...) badg <-gbadg$badg g <- gbadg$g } retcode3 <- 101 x <- x0 f <- f0 H <- H0 cliff <- 0 while( !done ) { g1 <- NULL; g2 <- NULL; g3 <- NULL ##addition fj. 7/6/94 for control cat('-----------------\n') cat('-----------------\n') cat(sprintf('f at the beginning of new iteration, %20.10f',f),"\n") if (!Long && Verbose) { # set Long=TRUE if parameter vector printouts too long cat("x =\n") print(x,digits=12) } ##------------------------- itct <- itct+1 itout <- csminit(fcn,x,f,g,badg,H,...) f1 <- itout$fhat x1 <- itout$xhat fc <- itout$fcount retcode1 <- itout$retcode fcount <- fcount+fc if( retcode1 != 1 ) { # Not gradient convergence ## if( retcode1==2 || retcode1==4) { ## wall1 <- TRUE; badg1 <- TRUE ## } else { # not a back-forth wall, so check gradient if( NumGrad ) { gbadg <- numgrad(fcn, x1,...) } else { gbadg <- grad(x1,...) } g1 <- gbadg$g badg1 <- gbadg$badg wall1 <- (badg1 || retcode1==2 || retcode1 == 4) # A wall is back-forth line search close or bad gradient ## g1 save(file="g1", g1, x1, f1, dots) ## } if( wall1 && dim(H)[1] > 1) { ## Bad gradient or back and forth on step length. Possibly at ## cliff edge. Try perturbing search direction, if problem is not unidimensional ## Hcliff <- H+diag(diag(H) * rnorm(nx)) cat("Cliff. Perturbing search direction. \n") itout <- csminit(fcn,x,f,g,badg,Hcliff,...) f2 <- itout$fhat x2 <- itout$xhat fc <- itout$fcount retcode2 <- itout$retcode fcount <- fcount+fc # put by Jinill ## if( f2 < f ) { ## if( retcode2 == 2 || retcode2==4 ){ ## wall2 <- 1 ## badg2 <- 1 ## } else { if( NumGrad ){ gbadg <- numgrad(fcn, x2,...) }else{ gbadg <- grad(x2,...) } g2 <- gbadg$g badg2 <- gbadg$badg wall2 <- (badg2 || retcode2==2 || retcode2==4) ## g2 ## badg2 save(file="g2", g2, x2, f2, dots) if( wall2 ) { cat('Cliff again. Try traversing\n') normdx <- sqrt(sum(x2-x1)^2) if( normdx < 1e-13 ) { # two trial x's too close, can't traverse f3 <- f; x3 <- x; badg3 <- 1;retcode3 <- 101 } else { ## as.numeric below for robustness against f's being 1x1 arrays gcliff <- (x2 - x1) * (as.numeric(f2 - f1)/(normdx^2)) dim(gcliff) <- c(nx,1) itout <- csminit(fcn,x,f,gcliff,0,diag(nx),...) f3 <- itout$fhat x3 <- itout$xhat fc <- itout$fc retcode3 <- itout$retcode fcount <- fcount+fc # put by Jinill ## if( retcode3==2 || retcode3==4 ) { ## wall3 <- 1 ## badg3 <- 1 ## } else { if( NumGrad ) { gbadg <- numgrad(fcn, x3,...) }else{ gbadg <- grad(x3,...) } g3 <- gbadg$g badg3 <- gbadg$badg wall3 <- (badg3 || retcode3==2 || retcode3==4) ## g3 ## badg3 save(file="g3", g3, x3, f3, dots) } } else { # wall1, but not wall2, so pack f3, etc with initial values f3 <- f x3 <- x badg3 <- 1 retcode3 <- 101 } } else { # no walls, or one-dimensional, so no perturbations f3 <- f x3 <- x badg3 <- TRUE retcode3 <- 101 f2 <- f x2 <- f retcode2 <- 101 } } else { # gradient convergence --- csminit just looked at gradient and quit f1 <- f2 <- f3 <- f; g1 <- g <- FALSE; badg2 <- badg3 <- TRUE; retcode1 <- 1; retcode2 <- 101; retcode3 <- 101 } ## how to pick gh and xh: ## pick first fj that improves by crit and has badg==FALSE, starting with j=3 ## may pick other than min fj to stay away from cliff if( f3 < f-crit & badg3==0 ) { ih <- 3 fh <- f3;xh <- x3;gh <- g3;badgh <- badg3;retcodeh <- retcode3 } else { if( f2 < f-crit && badg2==0 ) { ih <- 2 fh <- f2;xh <- x2;gh <- g2;badgh <- badg2;retcodeh <- retcode2 } else { if( f1 < f-crit && badg1==0 ) { ih <- 1 fh <- f1;xh <- x1;gh <- g1;badgh <- badg1;retcodeh <- retcode1 } else { ## none qualify, so pick the one that improves most. fh <- min(f1,f2,f3) ih <- which.min(c(f1,f2,f3)) cat("ih =",ih,"\n") xh <- switch(ih,x1,x2,x3) retcodeh <- switch(ih,retcode1,retcode2,retcode3) nogh <- (!exists("gh",inherits=FALSE) || is.null(gh)) if( nogh ) { if( NumGrad ) { gbadg <- numgrad(fcn,xh,...) } else { gbadg <- grad( xh,...) } gh <- gbadg$gh badgh <- gbadg$badg } badgh <- 1 } } } ## end of picking ##ih ##fh ##xh ##gh ##badgh stuck <- (abs(fh-f) < crit) if ( (!badg) && (!badgh) && (!stuck) ) { H <- bfgsi(H,gh-g,xh-x) } ## if( Verbose ) { cat("----\n") cat(sprintf('Improvement on iteration %8.0f = %18.9f\n',itct,f-fh)) ##} if( itct > nit ) { cat("iteration count termination\n") done <- 1 } else { if( stuck ) { cat("improvement < crit termination\n") done <- 1 } rc <- retcodeh switch( rc , cat("zero gradient\n"), #1 cat("back and forth on step length never finished\n"), #2 cat("smallest step still improving too slow\n"), #3 cat("back and forth on step length never finished\n"), #4 cat("largest step still improving too fast\n"), #5 cat("smallest step still improving too slow, reversed gradient\n"), #6 cat("warning: possible inaccuracy in H matrix\n"), #7 ) } f <- fh x <- xh g <- gh badg <- badgh } # while !done return(list(fh=fh,xh=xh,gh=gh,H=H,itct=itct,fcount=fcount,retcodeh=retcodeh,...)) }