Read the instruction carefully and think about how to develop R code to answer each questions.
\[ f(\mathbf{x}) = (x_1 − 4)^4 +(x_2 − 3)^2+4 \cdot(x_3 + 5)^4 \]
### SOLUTION TO QUESTION 1A ###
f.expr <- expression( (x1 - 4)^4 +(x2 - 3)^2+4*(x3 + 5)^4 )
f.fix <- function(x1,x2,x3){}
body(f.fix) <- f.expr
x2Grid <- seq(0,8,0.1)
x3Grid <- seq(-9,-1,0.1)
objGrd <- outer(x2Grid,x3Grid,f.fix,x1=4)
contour(x2Grid,x3Grid,objGrd,levels=c(1,5,10,20,50,80,100,500),col="blue")
Why plot? Visualization is very important tool to gain insights and confirm the result. Therefore, a practitioner should always plot/graph/visual possible solution and composition before any attempt to solve the problem.
deriv3()
### SOLUTION TO QUESTION 1B ###
f.fun <- function(x1,x2,x3){}
body(f.fun) <- deriv3( f.expr,c("x1","x2","x3"))
f.eval <- f.fun(4,2,-1)
f.grad <- as.vector(attr(f.eval,"gradient"))
f.hess <- matrix(attr(f.eval,"hessian"),ncol=3)
f.grad
## [1] 0 -2 1024
f.hess
## [,1] [,2] [,3]
## [1,] 0 0 0
## [2,] 0 2 0
## [3,] 0 0 768
##-- this code block is NOT executed
source("steepestDescent.R")
curPt <- c(4,2,-1)
varName <- c("x1","x2","x3")
gradMeth(f.expr,varName,init =curPt,tmax = 5 )
Steepest Descent Method uses negative Gradient (a.k.a steepest descent direction) as direction to move with optimal stepsize (a.k.a before function increases again). Given values of point \((\mathbf{x}_i)\) and gradient at that point \((\nabla f(\mathbf{x}_i))\), Steepest Descent Method converts function \(f(\mathbf{x})\) into \(f(\gamma| \mathbf{x}_i, \nabla f(\mathbf{x}_i) )\).
\[ \mathbf{x}_{1} = \mathbf{x}_0 - \gamma_0 \nabla f(\mathbf{x}_0) \qquad \Rightarrow \qquad \begin{bmatrix} 4 - \gamma_0 (0) \\ 2- \gamma_0 (-2) \\ -1- \gamma_0 (1024) \\ \end{bmatrix} \]
\[ \begin{eqnarray*} f(\mathbf{x}_{i+1}) &=& f \Big(\mathbf{x}_i - \gamma_i \nabla f(\mathbf{x}_i) \Big)\\ &=& f \Big( \gamma \Big| \mathbf{x}_i, \nabla f(\mathbf{x}_i) \Big) \\ &=& f \Big( \gamma_0 \Big| \mathbf{x}_0, \nabla f(\mathbf{x}_0) \Big) \\ &=& \Big( (4 - 0\gamma_0) − 4 \Big)^4 + \Big( (2+2 \gamma_0) − 3 \Big)^2+4 \cdot \Big( (-1- 1024\gamma_0 ) + 5 \Big)^4 \\ \end{eqnarray*} \]
\[ \gamma_k = \frac{2}{\lambda_{\max}( \nabla^2 f(\mathbf{x_k}) )} \]
\(\lambda_{\max}( \nabla^2 f(\mathbf{x_k}) )\) = the largest eigenvalue
##-- initialize points as current point
curPt <- c(4,2,-1)
sol <- NULL
for(i in 1:5){
## i <- 1 ## for debug
f.eval <- f.fun(curPt[1],curPt[2],curPt[3])
##-- evaluate
f.obj <- f.eval[1]
f.grad <- as.vector(attr(f.eval,"gradient"))
f.hess <- matrix(attr(f.eval,"hessian"),ncol=3)
##-- compute fixed stepsize
eig.max <- max(eigen(f.hess)$value)
direc <- -1*f.grad
stepsize<- 2/eig.max
##-- record each step
sol <- rbind.data.frame(sol,c(i,f.obj,curPt))
##-- update the current point
curPt <- curPt + stepsize * as.vector(direc)
}
colnames(sol) <- c("i","obj","x1","x2","x3")
sol
## i obj x1 x2 x3
## 1 1 1025.0000000 4 2.000000 -1.000000
## 2 2 13.6315858 4 2.005208 -3.666667
## 3 3 1.0550827 4 2.051839 -4.555556
## 4 4 0.3024013 4 2.451845 -4.851852
## 5 5 0.3008159 4 3.548155 -4.903876
note:
\[ \gamma_k = \frac{\mathbf{d}_k^T \mathbf{d}_k}{ \mathbf{d}_k^T \mathbf{Q} \mathbf{d}_k } \] ##### where,
\(\mathbf{d}_k\) is direction at iterative \(k\)
optim()
optim()
f2.fun <- function(x){ ## modify f.fun(1) to match with optim()
f.value <- f.fun(x[1],x[2],x[3])
return(f.value[1])
}
f2.fun(curPt)
## [1] 0.3006545
optim(curPt,f2.fun)
## $par
## [1] 4.004684 3.000043 -4.997964
##
## $value
## [1] 2.420056e-09
##
## $counts
## function gradient
## 74 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
note
optimx::optimx()
, instead
of base::optim()
source("pureNewton.R")
curPt <- c(4,2,-1)
varName <- c("x1","x2","x3")
newtMeth(f.expr,varName,init =curPt,tmax = 5 )
Newton’s Method approximates the original function (\(f(\mathbf{x})\)) using Tyler’s serier upto the first three terms, i.e., \(q(\mathbf{x})\), and applies FOC, \(\frac{\partial}{\partial \mathbf{x} } q(\mathbf{x}) = \mathbf{0}\). This leads
\[ \mathbf{x}_{i+1} = \mathbf{x}_i - \nabla f(\mathbf{x})^T \big[ \nabla^2 f(\mathbf{x}) \big]^{-1} \]
Here is the first iternation, when \(\mathbf{x}_0 = [4,2,-1]^T\), \(\nabla f(\mathbf{x}_0 = [0,-2,1024]^T\), and \(\nabla^2 f(\mathbf{x}_0 = \begin{bmatrix} 0 & 0 & 0 \\ 0 & 2 & 0 \\ 0 & 0 &798 \end{bmatrix}\)
Since \(f(\mathbf{x})\) is NOT a quaradic function (unlike its approximation \(q(\mathbf{x})\)), many includes stepsize \(\gamma_i\) to speed algorthm and make the result more desirable (hints: what can go wrong)
\[ f(\mathbf{x}) = (x_1 + 10x_2)^2 +5(x_3 - x_4)^2+(x_2 - 2x_3)^4+10(x_1 - x_4)^4 \]
\[ \gamma_k = \max \{ \frac{1}{2^n} : f(\mathbf{x} - \frac{1}{2^n} \nabla f(\mathbf{x_k}) ) \leq\ f(\mathbf{x_k}), n \in \mathbb{Z^+} \} \]
##-- this code block is NOT executed
##-- setup main function
f2.expr <- expression( (x1 + 10*x2)^2 +5*(x3 - x4)^2+(x2 - 2*x3)^4+10*(x1 - x4)^4 )
f2.fun <- function(x1,x2,x3,x4){}
body(f2.fun) <- deriv3( f2.expr,c("x1","x2","x3","x4"))
##-- initialize point as current points
curPt <- c(3, -1, 0, 1)
sol <- NULL
for(i in 1:3){
## i <- 1 ## debug
f2.eval <- f2.fun(curPt[1],curPt[2],curPt[3],curPt[4])
f2.obj <- f2.eval[1]
f2.grad <- as.vector(attr(f2.eval,"gradient"))
f2.hess <- matrix(attr(f2.eval,"hessian"),ncol=4)
##--- why direction is negative of Gradiant vector
direc <- -1*f2.grad
##-- start with 2 so that repeat will begin with 1.0
stepsize <- 2
repeat{
stepsize <- stepsize*0.5
tempPt <- curPt + stepsize * as.vector(direc)
temp.eval<- f2.fun(tempPt[1],tempPt[2],tempPt[3],tempPt[4])
temp.obj <- temp.eval[1]
if(temp.obj < f2.obj){
break
}
}
curPt <- tempPt
f2.obj<- temp.obj
sol <- rbind.data.frame(sol,c(i,f2.obj,stepsize,curPt))
colnames(sol) <- c("i","obj","stepsize","x1","x2","x3","x4")
}
solGrad <- sol
\[ \gamma_k = \max \{ \frac{1}{2^n} : f(\mathbf{x} - \frac{1}{2^n} \nabla f(\mathbf{x_k}) ) \leq\ f(\mathbf{x_k}), n \in \mathbb{Z^+} \} \]
##-- this code block is NOT executed
sol <- NULL
for(i in 1:3){
## i <- 1 ## debug
f2.eval <- f2.fun(curPt[1],curPt[2],curPt[3],curPt[4])
f2.obj <- f2.eval[1]
f2.grad <- as.vector(attr(f2.eval,"gradient"))
f2.hess <- matrix(attr(f2.eval,"hessian"),ncol=4)
##--- this is major change. what does it mean?
direc <- -1*solve(f2.hess) %*%f2.grad
##-- start with 2 so that repeat will begin with 1.0
stepsize <- 2
repeat{
stepsize <- stepsize*0.5
tempPt <- curPt + stepsize * as.vector(direc)
temp.eval<- f2.fun(tempPt[1],tempPt[2],tempPt[3],tempPt[4])
temp.obj <- temp.eval[1]
if(temp.obj < f2.obj){
break
}
}
curPt <- tempPt
f2.obj<- temp.obj
sol <- rbind.data.frame(sol,c(i,f2.obj,stepsize,curPt))
colnames(sol) <- c("i","obj","stepsize","x1","x2","x3","x4")
}
solNewton <- sol
##-- this code block is NOT executed
View(cbind(solGrad,solNewton))
note
optim()
optim()
num exper (\(i\)) | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |
---|---|---|---|---|---|---|---|---|---|---|
factor (\(t^i\)) | 0.1 | 1.2 | 2.1 | 4.6 | 5.6 | 5.9 | 6.2 | 6.8 | 8.1 | 9.3 |
response (\(y^i\)) | 1.816 | 0.267 | -0.304 | -0.849 | -1.053 | -1.200 | -1.260 | -1.294 | -1.628 | -1.881 |
y.experi <- c(1.816,0.267,-0.304,-0.849,-1.053,-1.200,-1.260,-1.294,-1.628,-1.881)
t.experi <- c(0.1,1.2,2.1,4.6,5.6,5.9,6.2,6.8,8.1,9.3)
\[ \hat{y} = x_1 + x_2 \, t + x_3 \, e^{−x_4 t} \]
\[ \begin{eqnarray} f(x_1,x_2,x_3,x_4) &=& \left( 1.816 - (x_1+ x2 \cdot 0.1 + x3 \cdot e^{-x_4 \cdot 0.1}) \right)^2 \\ & &+ \left( 0.267 - (x_1+ x2 \cdot 1.2 + x3 \cdot e^{-x_4 \cdot 1.2}) \right)^2\\ & &+ \left( 0.304 - (x_1+ x2 \cdot 2.1 + x3 \cdot e^{-x_4 \cdot 2.1}) \right)^2\\ & &+ \left(-0.849 - (x_1+ x2 \cdot 4.6 + x3 \cdot e^{-x_4 \cdot 4.6}) \right)^2\\ & &+ \left(-1.053 - (x_1+ x2 \cdot 5.6 + x3 \cdot e^{-x_4 \cdot 5.6}) \right)^2\\ & &+ \left(-1.200 - (x_1+ x2 \cdot 5.9 + x3 \cdot e^{-x_4 \cdot 5.9}) \right)^2\\ & &+ \left(-1.260 - (x_1+ x2 \cdot 6.2 + x3 \cdot e^{-x_4 \cdot 6.2}) \right)^2\\ & &+ \left(-1.294 - (x_1+ x2 \cdot 6.8 + x3 \cdot e^{-x_4 \cdot 6.8}) \right)^2\\ & &+ \left(-1.628 - (x_1+ x2 \cdot 8.1 + x3 \cdot e^{-x_4 \cdot 8.1}) \right)^2\\ & &+ \left(-1.881 - (x_1+ x2 \cdot 9.3 + x3 \cdot e^{-x_4 \cdot 9.3}) \right)^2 \end{eqnarray} \]
##-- this code block is NOT executed
minSum.expr <- expression(
(1.816 - (x1+ x2*0.1 + x3*exp(-x4 * 0.1) ) )^2
+ ( 0.267 - (x1+ x2*1.2 + x3*exp(-x4 * 1.2) ) )^2
+ (-0.304 - (x1+ x2*2.1 + x3*exp(-x4 * 2.1) ) )^2
+ (-0.849 - (x1+ x2*4.6 + x3*exp(-x4 * 4.6) ) )^2
+ (-1.053 - (x1+ x2*5.6 + x3*exp(-x4 * 5.6) ) )^2
+ (-1.200 - (x1+ x2*5.9 + x3*exp(-x4 * 5.9) ) )^2
+ (-1.260 - (x1+ x2*6.2 + x3*exp(-x4 * 6.2) ) )^2
+ (-1.294 - (x1+ x2*6.8 + x3*exp(-x4 * 6.8) ) )^2
+ (-1.628 - (x1+ x2*8.1 + x3*exp(-x4 * 8.1) ) )^2
+ (-1.881 - (x1+ x2*9.3 + x3*exp(-x4 * 9.3) ) )^2
)
minSum <- function (x1,x2,x3,x4){}
body(minSum) <- deriv3(minSum.expr,c("x1","x2","x3","x4"))
curPara <- c(0.1,-0.2,2.0, 1.0)
for(i in 1:5){
minSum.fun <- minSum(curPara[1],curPara[2],curPara[3],curPara[4])
minSum.value <- minSum.fun[1]
minSum.grad <- as.vector(attr(minSum.fun,"gradient"))
minSum.hess <- matrix(attr(minSum.fun,"hessian"),ncol=4)
dirc <- -1* solve(minSum.hess) %*% minSum.grad
stepsize <- 2
repeat{
stepsize <- stepsize *0.5
nxtPara <- curPara + stepsize*dirc
temp.value<- minSum(nxtPara[1],nxtPara[2],nxtPara[3],nxtPara[4])
if(temp.value<minSum.value){
break
}
}
print( paste(c(i,curPara,temp.value,stepsize,nxtPara,minSum.grad),collapse=", "))
curPara <- curPara + stepsize*dirc
}
This is an implementation of Newton’s method to solve for parameters, similar to previous questions. Please note that function is always quadratic function; hence, gradient vector and hessian matrix are always in certain format.
Recall:
\(\nabla f(\mathbf{x}) = 2 \mathbf{R}(\mathbf{x})^T \cdot \nabla \mathbf{R}(\mathbf{x})\)
\(\nabla^2 f(\mathbf{x}) \approx 2 \nabla \mathbf{R}(\mathbf{x})^T \cdot \nabla \mathbf{R}(\mathbf{x})\)
where,
\[ \begin{eqnarray*} f(\mathbf{x}) &=& \sum_{i=1}^{n} \Big( r(\mathbf{x}) \Big)^2 \\ &=& \mathbf{R}(\mathbf{x})^T \mathbf{R}(\mathbf{x}) \end{eqnarray*} \]
### SOLUTION TO QUESTION 6D ###
calErr <- function(y,t,x1,x2,x3,x4){
y - (x1 + x2*t +x3* exp(-1*x4*t))
}
calRMat <- function(y,t,x1,x2,x3,x4){
as.matrix(calErr(y,t,x1,x2,x3,x4),ncol=1)
}
\[ \mathbf{R}(\mathbf{x}) \equiv \begin{bmatrix} r(\mathbf{x})_1 \\ r(\mathbf{x})_2 \\ \vdots \\ r(\mathbf{x})_n \\ \end{bmatrix} = \begin{bmatrix} 1.816 - (x_1+ x2 \cdot 0.1 + x3 \cdot e^{-x_4 \cdot 0.1}) \\ 0.267 - (x_1+ x2 \cdot 1.2 + x3 \cdot e^{-x_4 \cdot 1.2})\\ \vdots\\ -1.881 - (x_1+ x2 \cdot 9.3 + x3 \cdot e^{-x_4 \cdot 9.3}) \end{bmatrix} \]
\[ \nabla \mathbf{R}(\mathbf{x}) \equiv \begin{bmatrix} \frac{\partial}{\partial x_1} r(\mathbf{x})_1 & \frac{\partial}{\partial x_2} r(\mathbf{x})_1 & \cdots & \frac{\partial}{\partial x_m} r(\mathbf{x})_1 \\ \frac{\partial}{\partial x_1} r(\mathbf{x})_2 & \frac{\partial}{\partial x_2} r(\mathbf{x})_2 & \cdots & \frac{\partial}{\partial x_m} r(\mathbf{x})_2 \\ \vdots & \vdots & \cdots & \vdots \\ \frac{\partial}{\partial x_1} r(\mathbf{x})_n & \frac{\partial}{\partial x_2} r(\mathbf{x})_n & \cdots & \frac{\partial}{\partial x_m} r(\mathbf{x})_n \\ \end{bmatrix} = \begin{bmatrix} -1& - 0.1 & - e^{-x_4 \cdot 0.1} & 0.1 x_3 e^{-x_4 \cdot 0.1} \\ -1 & - 1.2 & - e^{-x_4 \cdot 1.2} & 1.2 x_3 e^{-x_4 \cdot 1.2} \\ \vdots & \vdots & \cdots & \vdots \\ -1 & -9.3 & -e^{-x_4 \cdot 9.3} & 9.3 x_3 e^{-x_4 \cdot 9.3} \\ \end{bmatrix} \]
### SOLUTION TO QUESTION 6D ###
calRNebalMat <- function(y,t,x1,x2,x3,x4){
express <- expression( y - (x1+x2*t+x3*exp(-1*x4*t) ))
rNebal1 <- function(y,t,x1,x2,x3,x4){}
rNebal2 <- function(y,t,x1,x2,x3,x4){}
rNebal3 <- function(y,t,x1,x2,x3,x4){}
rNebal4 <- function(y,t,x1,x2,x3,x4){}
body(rNebal1) <- D(express,"x1")
body(rNebal2) <- D(express,"x2")
body(rNebal3) <- D(express,"x3")
body(rNebal4) <- D(express,"x4")
rNebalMat <- cbind(rNebal1(y.experi,t.experi,x1,x2,x3,x4)
,rNebal2(y.experi,t.experi,x1,x2,x3,x4)
,rNebal3(y.experi,t.experi,x1,x2,x3,x4)
,rNebal4(y.experi,t.experi,x1,x2,x3,x4)
)
return(rNebalMat)
}
curPara <- c(0.1,-0.2,2.0, 1.0)
result <- data.frame()
for(i in 1:5){
R <- calRMat(y.experi,t.experi,curPara[1],curPara[2]
,curPara[3],curPara[4])
nabalR<- calRNebalMat(y.experi,t.experi,curPara[1],curPara[2]
,curPara[3],curPara[4])
minSum.value <- t(R) %*% R
minSum.grad <- 2*t(nabalR) %*% R
minSum.hess <- 2*t(nabalR) %*% nabalR
dirc <- -1* solve(minSum.hess) %*% minSum.grad
stepsize <- 2
repeat{
stepsize <- stepsize *0.5
nxtPara <- curPara + stepsize*dirc
nxtR <- calRMat(y.experi,t.experi,nxtPara[1],nxtPara[2]
,nxtPara[3],nxtPara[4])
temp.value<- t(nxtR) %*% nxtR
if(temp.value<minSum.value){
break
}
}
tempResult <- c(i,curPara,temp.value,stepsize)
result <- rbind.data.frame(result,tempResult)
#print( paste(c(i,curPara,temp.value,stepsize,nxtPara,minSum.grad),collapse=", "))
curPara <- curPara + stepsize*dirc
}
colnames(result) <- c("i",paste("x",1:4,sep="."),"obj","stepsize" )
result
## i x.1 x.2 x.3 x.4 obj stepsize
## 1 1 0.10000000 -0.2000000 2.000000 1.000000 0.01686616 1
## 2 2 0.10126952 -0.2123479 1.981981 1.291342 0.01428903 1
## 3 3 0.06382786 -0.2075567 2.025220 1.311786 0.01428844 1
## 4 4 0.06277781 -0.2074102 2.025804 1.309101 0.01428843 1
## 5 5 0.06290852 -0.2074285 2.025741 1.309429 0.01428843 1
nls()
nls()
### SOLUTION TO QUESTION 6D ###
nls(y.experi~x1+x2*t.experi+x3* exp(-1*x4*t.experi) ,
start=list(x1=0.1,x2=-0.2,x3=2.0,x4=1.0))
## Nonlinear regression model
## model: y.experi ~ x1 + x2 * t.experi + x3 * exp(-1 * x4 * t.experi)
## data: parent.frame()
## x1 x2 x3 x4
## 0.06289 -0.20743 2.02575 1.30939
## residual sum-of-squares: 0.01429
##
## Number of iterations to convergence: 6
## Achieved convergence tolerance: 1.517e-06
Similar to all classical methods/algorithm,
nls()
has been implemented in R. We can
call it.
Find the location of single warehouse that minimize the total weight distances from the following locations.
## x y weight
## [1,] 8 2 9
## [2,] 3 10 7
## [3,] 8 15 2
## [4,] 4 3 6
## [5,] 16 8 7
## [6,] 4 5 5
### SOLUTION TO QUESTION 6A ###
plot(-10,-10,xlim=c(0,16),ylim=c(0,16),xlab="x-axis",ylab="y-axis")
nSize <- nrow(data)
points(data[,1],data[,2],cex=0.5,pch=16)
for(i in 1:nSize){
## i <- 1 #debug
custtext <- paste( c("i (",data[i,3],")"),collapse="")
text(data[i,1]+0.2,data[i,2],custtext,cex=0.8,col=i+1)
}
\[\hat{TC}(X) = \sum_i 2 w_i \cdot d(p_i,X)\] , where \(d(p_i,X) = \sqrt{ (p_i - X)_x^2 + (p_i - X)_y^2}\)
### SOLUTION TO QUESTION 6A ###
eucDist <- function(p1,p2){ sqrt(sum((p1-p2)^2)) }
objFn <- function(p,data){
nLoc <- nrow(data) ## num of locations
wtDist<- 0
for( i in 1:nLoc){
wtDist <- wtDist + 2*data[i,3]*eucDist(p,data[i,1:2])
}
return(wtDist)
}
objFn(c(9,5),data)
## weight
## 427.7068
optim(c(9,5),objFn,data=data)
## $par
## [1] 5.850783 4.697410
##
## $value
## [1] 386.7746
##
## $counts
## function gradient
## 51 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
### SOLUTION TO QUESTION 6C ###
xRange <- seq(1,16,0.1)
yRange <- seq(1,16,0.1)
objVal <- matrix(NA,ncol=length(xRange),nrow=length(yRange))
for(i in 1:length(xRange)){
for(j in 1:length(yRange)){
objVal[i,j] <- objFn(c(xRange[i],yRange[j]),data)
}
}
contour(xRange,yRange,objVal,nlevels=20)
\[ TC(X) = \sum_i w_i d(p_i,X) +\max \{w_i^{1/2} \cdot d(p_i,X)^{3/2} , w_i^{3/2} \cdot d(p_i,X)^{1/2} \} \]
### SOLUTION TO QUESTION 6D ###
objFn <- function(p,data){
nLoc <- nrow(data) ## num of locations
wtDist<- 0
for( i in 1:nLoc){
aDist <- eucDist(p,data[i,1:2])
wtDist <- wtDist + data[i,3]*aDist
+ max( data[i,3]^(3/2)*aDist^(1/2) ,data[i,3]^(1/2)*aDist^(3/2))
}
return(wtDist)
}
objFn(c(9,5),data)
## weight
## 213.8534
optim(c(9,5),objFn,data=data)
## $par
## [1] 5.850783 4.697410
##
## $value
## [1] 193.3873
##
## $counts
## function gradient
## 51 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
set.seed(17)
nFac <- 20
data <- data.frame(x=runif(nFac)*20
,y=runif(nFac)*30
,weight=abs( rnorm(nFac)*5) )
head(data)
Copyright 2019 Oran Kittithreerapronchai. All Rights Reserved. Last modified: 2023-51-13,