1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math3.optim.nonlinear.vector.jacobian; 18 19 import java.util.Arrays; 20 import org.apache.commons.math3.exception.ConvergenceException; 21 import org.apache.commons.math3.exception.MathUnsupportedOperationException; 22 import org.apache.commons.math3.exception.util.LocalizedFormats; 23 import org.apache.commons.math3.optim.PointVectorValuePair; 24 import org.apache.commons.math3.optim.ConvergenceChecker; 25 import org.apache.commons.math3.linear.RealMatrix; 26 import org.apache.commons.math3.util.Precision; 27 import org.apache.commons.math3.util.FastMath; 28 29 30 /** 31 * This class solves a least-squares problem using the Levenberg-Marquardt 32 * algorithm. 33 * <br/> 34 * Constraints are not supported: the call to 35 * {@link #optimize(OptimizationData[]) optimize} will throw 36 * {@link MathUnsupportedOperationException} if bounds are passed to it. 37 * 38 * <p>This implementation <em>should</em> work even for over-determined systems 39 * (i.e. systems having more point than equations). Over-determined systems 40 * are solved by ignoring the point which have the smallest impact according 41 * to their jacobian column norm. Only the rank of the matrix and some loop bounds 42 * are changed to implement this.</p> 43 * 44 * <p>The resolution engine is a simple translation of the MINPACK <a 45 * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor 46 * changes. The changes include the over-determined resolution, the use of 47 * inherited convergence checker and the Q.R. decomposition which has been 48 * rewritten following the algorithm described in the 49 * P. Lascaux and R. Theodor book <i>Analyse numérique matricielle 50 * appliquée à l'art de l'ingénieur</i>, Masson 1986.</p> 51 * <p>The authors of the original fortran version are: 52 * <ul> 53 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 54 * <li>Burton S. Garbow</li> 55 * <li>Kenneth E. Hillstrom</li> 56 * <li>Jorge J. More</li> 57 * </ul> 58 * The redistribution policy for MINPACK is available <a 59 * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it 60 * is reproduced below.</p> 61 * 62 * <table border="0" width="80%" cellpadding="10" align="center" bgcolor="#E0E0E0"> 63 * <tr><td> 64 * Minpack Copyright Notice (1999) University of Chicago. 65 * All rights reserved 66 * </td></tr> 67 * <tr><td> 68 * Redistribution and use in source and binary forms, with or without 69 * modification, are permitted provided that the following conditions 70 * are met: 71 * <ol> 72 * <li>Redistributions of source code must retain the above copyright 73 * notice, this list of conditions and the following disclaimer.</li> 74 * <li>Redistributions in binary form must reproduce the above 75 * copyright notice, this list of conditions and the following 76 * disclaimer in the documentation and/or other materials provided 77 * with the distribution.</li> 78 * <li>The end-user documentation included with the redistribution, if any, 79 * must include the following acknowledgment: 80 * <code>This product includes software developed by the University of 81 * Chicago, as Operator of Argonne National Laboratory.</code> 82 * Alternately, this acknowledgment may appear in the software itself, 83 * if and wherever such third-party acknowledgments normally appear.</li> 84 * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" 85 * WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE 86 * UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND 87 * THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR 88 * IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES 89 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE 90 * OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY 91 * OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR 92 * USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF 93 * THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) 94 * DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION 95 * UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL 96 * BE CORRECTED.</strong></li> 97 * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT 98 * HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF 99 * ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, 100 * INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF 101 * ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF 102 * PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER 103 * SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT 104 * (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, 105 * EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE 106 * POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li> 107 * <ol></td></tr> 108 * </table> 109 * 110 * @since 2.0 111 * @deprecated All classes and interfaces in this package are deprecated. 112 * The optimizers that were provided here were moved to the 113 * {@link org.apache.commons.math3.fitting.leastsquares} package 114 * (cf. MATH-1008). 115 */ 116 @Deprecated 117 public class LevenbergMarquardtOptimizer 118 extends AbstractLeastSquaresOptimizer { 119 /** Twice the "epsilon machine". */ 120 private static final double TWO_EPS = 2 * Precision.EPSILON; 121 /** Number of solved point. */ 122 private int solvedCols; 123 /** Diagonal elements of the R matrix in the Q.R. decomposition. */ 124 private double[] diagR; 125 /** Norms of the columns of the jacobian matrix. */ 126 private double[] jacNorm; 127 /** Coefficients of the Householder transforms vectors. */ 128 private double[] beta; 129 /** Columns permutation array. */ 130 private int[] permutation; 131 /** Rank of the jacobian matrix. */ 132 private int rank; 133 /** Levenberg-Marquardt parameter. */ 134 private double lmPar; 135 /** Parameters evolution direction associated with lmPar. */ 136 private double[] lmDir; 137 /** Positive input variable used in determining the initial step bound. */ 138 private final double initialStepBoundFactor; 139 /** Desired relative error in the sum of squares. */ 140 private final double costRelativeTolerance; 141 /** Desired relative error in the approximate solution parameters. */ 142 private final double parRelativeTolerance; 143 /** Desired max cosine on the orthogonality between the function vector 144 * and the columns of the jacobian. */ 145 private final double orthoTolerance; 146 /** Threshold for QR ranking. */ 147 private final double qrRankingThreshold; 148 /** Weighted residuals. */ 149 private double[] weightedResidual; 150 /** Weighted Jacobian. */ 151 private double[][] weightedJacobian; 152 153 /** 154 * Build an optimizer for least squares problems with default values 155 * for all the tuning parameters (see the {@link 156 * #LevenbergMarquardtOptimizer(double,double,double,double,double) 157 * other contructor}. 158 * The default values for the algorithm settings are: 159 * <ul> 160 * <li>Initial step bound factor: 100</li> 161 * <li>Cost relative tolerance: 1e-10</li> 162 * <li>Parameters relative tolerance: 1e-10</li> 163 * <li>Orthogonality tolerance: 1e-10</li> 164 * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> 165 * </ul> 166 */ LevenbergMarquardtOptimizer()167 public LevenbergMarquardtOptimizer() { 168 this(100, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); 169 } 170 171 /** 172 * Constructor that allows the specification of a custom convergence 173 * checker. 174 * Note that all the usual convergence checks will be <em>disabled</em>. 175 * The default values for the algorithm settings are: 176 * <ul> 177 * <li>Initial step bound factor: 100</li> 178 * <li>Cost relative tolerance: 1e-10</li> 179 * <li>Parameters relative tolerance: 1e-10</li> 180 * <li>Orthogonality tolerance: 1e-10</li> 181 * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> 182 * </ul> 183 * 184 * @param checker Convergence checker. 185 */ LevenbergMarquardtOptimizer(ConvergenceChecker<PointVectorValuePair> checker)186 public LevenbergMarquardtOptimizer(ConvergenceChecker<PointVectorValuePair> checker) { 187 this(100, checker, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); 188 } 189 190 /** 191 * Constructor that allows the specification of a custom convergence 192 * checker, in addition to the standard ones. 193 * 194 * @param initialStepBoundFactor Positive input variable used in 195 * determining the initial step bound. This bound is set to the 196 * product of initialStepBoundFactor and the euclidean norm of 197 * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} 198 * itself. In most cases factor should lie in the interval 199 * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. 200 * @param checker Convergence checker. 201 * @param costRelativeTolerance Desired relative error in the sum of 202 * squares. 203 * @param parRelativeTolerance Desired relative error in the approximate 204 * solution parameters. 205 * @param orthoTolerance Desired max cosine on the orthogonality between 206 * the function vector and the columns of the Jacobian. 207 * @param threshold Desired threshold for QR ranking. If the squared norm 208 * of a column vector is smaller or equal to this threshold during QR 209 * decomposition, it is considered to be a zero vector and hence the rank 210 * of the matrix is reduced. 211 */ LevenbergMarquardtOptimizer(double initialStepBoundFactor, ConvergenceChecker<PointVectorValuePair> checker, double costRelativeTolerance, double parRelativeTolerance, double orthoTolerance, double threshold)212 public LevenbergMarquardtOptimizer(double initialStepBoundFactor, 213 ConvergenceChecker<PointVectorValuePair> checker, 214 double costRelativeTolerance, 215 double parRelativeTolerance, 216 double orthoTolerance, 217 double threshold) { 218 super(checker); 219 this.initialStepBoundFactor = initialStepBoundFactor; 220 this.costRelativeTolerance = costRelativeTolerance; 221 this.parRelativeTolerance = parRelativeTolerance; 222 this.orthoTolerance = orthoTolerance; 223 this.qrRankingThreshold = threshold; 224 } 225 226 /** 227 * Build an optimizer for least squares problems with default values 228 * for some of the tuning parameters (see the {@link 229 * #LevenbergMarquardtOptimizer(double,double,double,double,double) 230 * other contructor}. 231 * The default values for the algorithm settings are: 232 * <ul> 233 * <li>Initial step bound factor}: 100</li> 234 * <li>QR ranking threshold}: {@link Precision#SAFE_MIN}</li> 235 * </ul> 236 * 237 * @param costRelativeTolerance Desired relative error in the sum of 238 * squares. 239 * @param parRelativeTolerance Desired relative error in the approximate 240 * solution parameters. 241 * @param orthoTolerance Desired max cosine on the orthogonality between 242 * the function vector and the columns of the Jacobian. 243 */ LevenbergMarquardtOptimizer(double costRelativeTolerance, double parRelativeTolerance, double orthoTolerance)244 public LevenbergMarquardtOptimizer(double costRelativeTolerance, 245 double parRelativeTolerance, 246 double orthoTolerance) { 247 this(100, 248 costRelativeTolerance, parRelativeTolerance, orthoTolerance, 249 Precision.SAFE_MIN); 250 } 251 252 /** 253 * The arguments control the behaviour of the default convergence checking 254 * procedure. 255 * Additional criteria can defined through the setting of a {@link 256 * ConvergenceChecker}. 257 * 258 * @param initialStepBoundFactor Positive input variable used in 259 * determining the initial step bound. This bound is set to the 260 * product of initialStepBoundFactor and the euclidean norm of 261 * {@code diag * x} if non-zero, or else to {@code initialStepBoundFactor} 262 * itself. In most cases factor should lie in the interval 263 * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. 264 * @param costRelativeTolerance Desired relative error in the sum of 265 * squares. 266 * @param parRelativeTolerance Desired relative error in the approximate 267 * solution parameters. 268 * @param orthoTolerance Desired max cosine on the orthogonality between 269 * the function vector and the columns of the Jacobian. 270 * @param threshold Desired threshold for QR ranking. If the squared norm 271 * of a column vector is smaller or equal to this threshold during QR 272 * decomposition, it is considered to be a zero vector and hence the rank 273 * of the matrix is reduced. 274 */ LevenbergMarquardtOptimizer(double initialStepBoundFactor, double costRelativeTolerance, double parRelativeTolerance, double orthoTolerance, double threshold)275 public LevenbergMarquardtOptimizer(double initialStepBoundFactor, 276 double costRelativeTolerance, 277 double parRelativeTolerance, 278 double orthoTolerance, 279 double threshold) { 280 super(null); // No custom convergence criterion. 281 this.initialStepBoundFactor = initialStepBoundFactor; 282 this.costRelativeTolerance = costRelativeTolerance; 283 this.parRelativeTolerance = parRelativeTolerance; 284 this.orthoTolerance = orthoTolerance; 285 this.qrRankingThreshold = threshold; 286 } 287 288 /** {@inheritDoc} */ 289 @Override doOptimize()290 protected PointVectorValuePair doOptimize() { 291 checkParameters(); 292 293 final int nR = getTarget().length; // Number of observed data. 294 final double[] currentPoint = getStartPoint(); 295 final int nC = currentPoint.length; // Number of parameters. 296 297 // arrays shared with the other private methods 298 solvedCols = FastMath.min(nR, nC); 299 diagR = new double[nC]; 300 jacNorm = new double[nC]; 301 beta = new double[nC]; 302 permutation = new int[nC]; 303 lmDir = new double[nC]; 304 305 // local point 306 double delta = 0; 307 double xNorm = 0; 308 double[] diag = new double[nC]; 309 double[] oldX = new double[nC]; 310 double[] oldRes = new double[nR]; 311 double[] oldObj = new double[nR]; 312 double[] qtf = new double[nR]; 313 double[] work1 = new double[nC]; 314 double[] work2 = new double[nC]; 315 double[] work3 = new double[nC]; 316 317 final RealMatrix weightMatrixSqrt = getWeightSquareRoot(); 318 319 // Evaluate the function at the starting point and calculate its norm. 320 double[] currentObjective = computeObjectiveValue(currentPoint); 321 double[] currentResiduals = computeResiduals(currentObjective); 322 PointVectorValuePair current = new PointVectorValuePair(currentPoint, currentObjective); 323 double currentCost = computeCost(currentResiduals); 324 325 // Outer loop. 326 lmPar = 0; 327 boolean firstIteration = true; 328 final ConvergenceChecker<PointVectorValuePair> checker = getConvergenceChecker(); 329 while (true) { 330 incrementIterationCount(); 331 332 final PointVectorValuePair previous = current; 333 334 // QR decomposition of the jacobian matrix 335 qrDecomposition(computeWeightedJacobian(currentPoint)); 336 337 weightedResidual = weightMatrixSqrt.operate(currentResiduals); 338 for (int i = 0; i < nR; i++) { 339 qtf[i] = weightedResidual[i]; 340 } 341 342 // compute Qt.res 343 qTy(qtf); 344 345 // now we don't need Q anymore, 346 // so let jacobian contain the R matrix with its diagonal elements 347 for (int k = 0; k < solvedCols; ++k) { 348 int pk = permutation[k]; 349 weightedJacobian[k][pk] = diagR[pk]; 350 } 351 352 if (firstIteration) { 353 // scale the point according to the norms of the columns 354 // of the initial jacobian 355 xNorm = 0; 356 for (int k = 0; k < nC; ++k) { 357 double dk = jacNorm[k]; 358 if (dk == 0) { 359 dk = 1.0; 360 } 361 double xk = dk * currentPoint[k]; 362 xNorm += xk * xk; 363 diag[k] = dk; 364 } 365 xNorm = FastMath.sqrt(xNorm); 366 367 // initialize the step bound delta 368 delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm); 369 } 370 371 // check orthogonality between function vector and jacobian columns 372 double maxCosine = 0; 373 if (currentCost != 0) { 374 for (int j = 0; j < solvedCols; ++j) { 375 int pj = permutation[j]; 376 double s = jacNorm[pj]; 377 if (s != 0) { 378 double sum = 0; 379 for (int i = 0; i <= j; ++i) { 380 sum += weightedJacobian[i][pj] * qtf[i]; 381 } 382 maxCosine = FastMath.max(maxCosine, FastMath.abs(sum) / (s * currentCost)); 383 } 384 } 385 } 386 if (maxCosine <= orthoTolerance) { 387 // Convergence has been reached. 388 setCost(currentCost); 389 return current; 390 } 391 392 // rescale if necessary 393 for (int j = 0; j < nC; ++j) { 394 diag[j] = FastMath.max(diag[j], jacNorm[j]); 395 } 396 397 // Inner loop. 398 for (double ratio = 0; ratio < 1.0e-4;) { 399 400 // save the state 401 for (int j = 0; j < solvedCols; ++j) { 402 int pj = permutation[j]; 403 oldX[pj] = currentPoint[pj]; 404 } 405 final double previousCost = currentCost; 406 double[] tmpVec = weightedResidual; 407 weightedResidual = oldRes; 408 oldRes = tmpVec; 409 tmpVec = currentObjective; 410 currentObjective = oldObj; 411 oldObj = tmpVec; 412 413 // determine the Levenberg-Marquardt parameter 414 determineLMParameter(qtf, delta, diag, work1, work2, work3); 415 416 // compute the new point and the norm of the evolution direction 417 double lmNorm = 0; 418 for (int j = 0; j < solvedCols; ++j) { 419 int pj = permutation[j]; 420 lmDir[pj] = -lmDir[pj]; 421 currentPoint[pj] = oldX[pj] + lmDir[pj]; 422 double s = diag[pj] * lmDir[pj]; 423 lmNorm += s * s; 424 } 425 lmNorm = FastMath.sqrt(lmNorm); 426 // on the first iteration, adjust the initial step bound. 427 if (firstIteration) { 428 delta = FastMath.min(delta, lmNorm); 429 } 430 431 // Evaluate the function at x + p and calculate its norm. 432 currentObjective = computeObjectiveValue(currentPoint); 433 currentResiduals = computeResiduals(currentObjective); 434 current = new PointVectorValuePair(currentPoint, currentObjective); 435 currentCost = computeCost(currentResiduals); 436 437 // compute the scaled actual reduction 438 double actRed = -1.0; 439 if (0.1 * currentCost < previousCost) { 440 double r = currentCost / previousCost; 441 actRed = 1.0 - r * r; 442 } 443 444 // compute the scaled predicted reduction 445 // and the scaled directional derivative 446 for (int j = 0; j < solvedCols; ++j) { 447 int pj = permutation[j]; 448 double dirJ = lmDir[pj]; 449 work1[j] = 0; 450 for (int i = 0; i <= j; ++i) { 451 work1[i] += weightedJacobian[i][pj] * dirJ; 452 } 453 } 454 double coeff1 = 0; 455 for (int j = 0; j < solvedCols; ++j) { 456 coeff1 += work1[j] * work1[j]; 457 } 458 double pc2 = previousCost * previousCost; 459 coeff1 /= pc2; 460 double coeff2 = lmPar * lmNorm * lmNorm / pc2; 461 double preRed = coeff1 + 2 * coeff2; 462 double dirDer = -(coeff1 + coeff2); 463 464 // ratio of the actual to the predicted reduction 465 ratio = (preRed == 0) ? 0 : (actRed / preRed); 466 467 // update the step bound 468 if (ratio <= 0.25) { 469 double tmp = 470 (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5; 471 if ((0.1 * currentCost >= previousCost) || (tmp < 0.1)) { 472 tmp = 0.1; 473 } 474 delta = tmp * FastMath.min(delta, 10.0 * lmNorm); 475 lmPar /= tmp; 476 } else if ((lmPar == 0) || (ratio >= 0.75)) { 477 delta = 2 * lmNorm; 478 lmPar *= 0.5; 479 } 480 481 // test for successful iteration. 482 if (ratio >= 1.0e-4) { 483 // successful iteration, update the norm 484 firstIteration = false; 485 xNorm = 0; 486 for (int k = 0; k < nC; ++k) { 487 double xK = diag[k] * currentPoint[k]; 488 xNorm += xK * xK; 489 } 490 xNorm = FastMath.sqrt(xNorm); 491 492 // tests for convergence. 493 if (checker != null && checker.converged(getIterations(), previous, current)) { 494 setCost(currentCost); 495 return current; 496 } 497 } else { 498 // failed iteration, reset the previous values 499 currentCost = previousCost; 500 for (int j = 0; j < solvedCols; ++j) { 501 int pj = permutation[j]; 502 currentPoint[pj] = oldX[pj]; 503 } 504 tmpVec = weightedResidual; 505 weightedResidual = oldRes; 506 oldRes = tmpVec; 507 tmpVec = currentObjective; 508 currentObjective = oldObj; 509 oldObj = tmpVec; 510 // Reset "current" to previous values. 511 current = new PointVectorValuePair(currentPoint, currentObjective); 512 } 513 514 // Default convergence criteria. 515 if ((FastMath.abs(actRed) <= costRelativeTolerance && 516 preRed <= costRelativeTolerance && 517 ratio <= 2.0) || 518 delta <= parRelativeTolerance * xNorm) { 519 setCost(currentCost); 520 return current; 521 } 522 523 // tests for termination and stringent tolerances 524 if (FastMath.abs(actRed) <= TWO_EPS && 525 preRed <= TWO_EPS && 526 ratio <= 2.0) { 527 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_COST_RELATIVE_TOLERANCE, 528 costRelativeTolerance); 529 } else if (delta <= TWO_EPS * xNorm) { 530 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_PARAMETERS_RELATIVE_TOLERANCE, 531 parRelativeTolerance); 532 } else if (maxCosine <= TWO_EPS) { 533 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_ORTHOGONALITY_TOLERANCE, 534 orthoTolerance); 535 } 536 } 537 } 538 } 539 540 /** 541 * Determine the Levenberg-Marquardt parameter. 542 * <p>This implementation is a translation in Java of the MINPACK 543 * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a> 544 * routine.</p> 545 * <p>This method sets the lmPar and lmDir attributes.</p> 546 * <p>The authors of the original fortran function are:</p> 547 * <ul> 548 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 549 * <li>Burton S. Garbow</li> 550 * <li>Kenneth E. Hillstrom</li> 551 * <li>Jorge J. More</li> 552 * </ul> 553 * <p>Luc Maisonobe did the Java translation.</p> 554 * 555 * @param qy array containing qTy 556 * @param delta upper bound on the euclidean norm of diagR * lmDir 557 * @param diag diagonal matrix 558 * @param work1 work array 559 * @param work2 work array 560 * @param work3 work array 561 */ determineLMParameter(double[] qy, double delta, double[] diag, double[] work1, double[] work2, double[] work3)562 private void determineLMParameter(double[] qy, double delta, double[] diag, 563 double[] work1, double[] work2, double[] work3) { 564 final int nC = weightedJacobian[0].length; 565 566 // compute and store in x the gauss-newton direction, if the 567 // jacobian is rank-deficient, obtain a least squares solution 568 for (int j = 0; j < rank; ++j) { 569 lmDir[permutation[j]] = qy[j]; 570 } 571 for (int j = rank; j < nC; ++j) { 572 lmDir[permutation[j]] = 0; 573 } 574 for (int k = rank - 1; k >= 0; --k) { 575 int pk = permutation[k]; 576 double ypk = lmDir[pk] / diagR[pk]; 577 for (int i = 0; i < k; ++i) { 578 lmDir[permutation[i]] -= ypk * weightedJacobian[i][pk]; 579 } 580 lmDir[pk] = ypk; 581 } 582 583 // evaluate the function at the origin, and test 584 // for acceptance of the Gauss-Newton direction 585 double dxNorm = 0; 586 for (int j = 0; j < solvedCols; ++j) { 587 int pj = permutation[j]; 588 double s = diag[pj] * lmDir[pj]; 589 work1[pj] = s; 590 dxNorm += s * s; 591 } 592 dxNorm = FastMath.sqrt(dxNorm); 593 double fp = dxNorm - delta; 594 if (fp <= 0.1 * delta) { 595 lmPar = 0; 596 return; 597 } 598 599 // if the jacobian is not rank deficient, the Newton step provides 600 // a lower bound, parl, for the zero of the function, 601 // otherwise set this bound to zero 602 double sum2; 603 double parl = 0; 604 if (rank == solvedCols) { 605 for (int j = 0; j < solvedCols; ++j) { 606 int pj = permutation[j]; 607 work1[pj] *= diag[pj] / dxNorm; 608 } 609 sum2 = 0; 610 for (int j = 0; j < solvedCols; ++j) { 611 int pj = permutation[j]; 612 double sum = 0; 613 for (int i = 0; i < j; ++i) { 614 sum += weightedJacobian[i][pj] * work1[permutation[i]]; 615 } 616 double s = (work1[pj] - sum) / diagR[pj]; 617 work1[pj] = s; 618 sum2 += s * s; 619 } 620 parl = fp / (delta * sum2); 621 } 622 623 // calculate an upper bound, paru, for the zero of the function 624 sum2 = 0; 625 for (int j = 0; j < solvedCols; ++j) { 626 int pj = permutation[j]; 627 double sum = 0; 628 for (int i = 0; i <= j; ++i) { 629 sum += weightedJacobian[i][pj] * qy[i]; 630 } 631 sum /= diag[pj]; 632 sum2 += sum * sum; 633 } 634 double gNorm = FastMath.sqrt(sum2); 635 double paru = gNorm / delta; 636 if (paru == 0) { 637 paru = Precision.SAFE_MIN / FastMath.min(delta, 0.1); 638 } 639 640 // if the input par lies outside of the interval (parl,paru), 641 // set par to the closer endpoint 642 lmPar = FastMath.min(paru, FastMath.max(lmPar, parl)); 643 if (lmPar == 0) { 644 lmPar = gNorm / dxNorm; 645 } 646 647 for (int countdown = 10; countdown >= 0; --countdown) { 648 649 // evaluate the function at the current value of lmPar 650 if (lmPar == 0) { 651 lmPar = FastMath.max(Precision.SAFE_MIN, 0.001 * paru); 652 } 653 double sPar = FastMath.sqrt(lmPar); 654 for (int j = 0; j < solvedCols; ++j) { 655 int pj = permutation[j]; 656 work1[pj] = sPar * diag[pj]; 657 } 658 determineLMDirection(qy, work1, work2, work3); 659 660 dxNorm = 0; 661 for (int j = 0; j < solvedCols; ++j) { 662 int pj = permutation[j]; 663 double s = diag[pj] * lmDir[pj]; 664 work3[pj] = s; 665 dxNorm += s * s; 666 } 667 dxNorm = FastMath.sqrt(dxNorm); 668 double previousFP = fp; 669 fp = dxNorm - delta; 670 671 // if the function is small enough, accept the current value 672 // of lmPar, also test for the exceptional cases where parl is zero 673 if ((FastMath.abs(fp) <= 0.1 * delta) || 674 ((parl == 0) && (fp <= previousFP) && (previousFP < 0))) { 675 return; 676 } 677 678 // compute the Newton correction 679 for (int j = 0; j < solvedCols; ++j) { 680 int pj = permutation[j]; 681 work1[pj] = work3[pj] * diag[pj] / dxNorm; 682 } 683 for (int j = 0; j < solvedCols; ++j) { 684 int pj = permutation[j]; 685 work1[pj] /= work2[j]; 686 double tmp = work1[pj]; 687 for (int i = j + 1; i < solvedCols; ++i) { 688 work1[permutation[i]] -= weightedJacobian[i][pj] * tmp; 689 } 690 } 691 sum2 = 0; 692 for (int j = 0; j < solvedCols; ++j) { 693 double s = work1[permutation[j]]; 694 sum2 += s * s; 695 } 696 double correction = fp / (delta * sum2); 697 698 // depending on the sign of the function, update parl or paru. 699 if (fp > 0) { 700 parl = FastMath.max(parl, lmPar); 701 } else if (fp < 0) { 702 paru = FastMath.min(paru, lmPar); 703 } 704 705 // compute an improved estimate for lmPar 706 lmPar = FastMath.max(parl, lmPar + correction); 707 708 } 709 } 710 711 /** 712 * Solve a*x = b and d*x = 0 in the least squares sense. 713 * <p>This implementation is a translation in Java of the MINPACK 714 * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a> 715 * routine.</p> 716 * <p>This method sets the lmDir and lmDiag attributes.</p> 717 * <p>The authors of the original fortran function are:</p> 718 * <ul> 719 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 720 * <li>Burton S. Garbow</li> 721 * <li>Kenneth E. Hillstrom</li> 722 * <li>Jorge J. More</li> 723 * </ul> 724 * <p>Luc Maisonobe did the Java translation.</p> 725 * 726 * @param qy array containing qTy 727 * @param diag diagonal matrix 728 * @param lmDiag diagonal elements associated with lmDir 729 * @param work work array 730 */ determineLMDirection(double[] qy, double[] diag, double[] lmDiag, double[] work)731 private void determineLMDirection(double[] qy, double[] diag, 732 double[] lmDiag, double[] work) { 733 734 // copy R and Qty to preserve input and initialize s 735 // in particular, save the diagonal elements of R in lmDir 736 for (int j = 0; j < solvedCols; ++j) { 737 int pj = permutation[j]; 738 for (int i = j + 1; i < solvedCols; ++i) { 739 weightedJacobian[i][pj] = weightedJacobian[j][permutation[i]]; 740 } 741 lmDir[j] = diagR[pj]; 742 work[j] = qy[j]; 743 } 744 745 // eliminate the diagonal matrix d using a Givens rotation 746 for (int j = 0; j < solvedCols; ++j) { 747 748 // prepare the row of d to be eliminated, locating the 749 // diagonal element using p from the Q.R. factorization 750 int pj = permutation[j]; 751 double dpj = diag[pj]; 752 if (dpj != 0) { 753 Arrays.fill(lmDiag, j + 1, lmDiag.length, 0); 754 } 755 lmDiag[j] = dpj; 756 757 // the transformations to eliminate the row of d 758 // modify only a single element of Qty 759 // beyond the first n, which is initially zero. 760 double qtbpj = 0; 761 for (int k = j; k < solvedCols; ++k) { 762 int pk = permutation[k]; 763 764 // determine a Givens rotation which eliminates the 765 // appropriate element in the current row of d 766 if (lmDiag[k] != 0) { 767 768 final double sin; 769 final double cos; 770 double rkk = weightedJacobian[k][pk]; 771 if (FastMath.abs(rkk) < FastMath.abs(lmDiag[k])) { 772 final double cotan = rkk / lmDiag[k]; 773 sin = 1.0 / FastMath.sqrt(1.0 + cotan * cotan); 774 cos = sin * cotan; 775 } else { 776 final double tan = lmDiag[k] / rkk; 777 cos = 1.0 / FastMath.sqrt(1.0 + tan * tan); 778 sin = cos * tan; 779 } 780 781 // compute the modified diagonal element of R and 782 // the modified element of (Qty,0) 783 weightedJacobian[k][pk] = cos * rkk + sin * lmDiag[k]; 784 final double temp = cos * work[k] + sin * qtbpj; 785 qtbpj = -sin * work[k] + cos * qtbpj; 786 work[k] = temp; 787 788 // accumulate the tranformation in the row of s 789 for (int i = k + 1; i < solvedCols; ++i) { 790 double rik = weightedJacobian[i][pk]; 791 final double temp2 = cos * rik + sin * lmDiag[i]; 792 lmDiag[i] = -sin * rik + cos * lmDiag[i]; 793 weightedJacobian[i][pk] = temp2; 794 } 795 } 796 } 797 798 // store the diagonal element of s and restore 799 // the corresponding diagonal element of R 800 lmDiag[j] = weightedJacobian[j][permutation[j]]; 801 weightedJacobian[j][permutation[j]] = lmDir[j]; 802 } 803 804 // solve the triangular system for z, if the system is 805 // singular, then obtain a least squares solution 806 int nSing = solvedCols; 807 for (int j = 0; j < solvedCols; ++j) { 808 if ((lmDiag[j] == 0) && (nSing == solvedCols)) { 809 nSing = j; 810 } 811 if (nSing < solvedCols) { 812 work[j] = 0; 813 } 814 } 815 if (nSing > 0) { 816 for (int j = nSing - 1; j >= 0; --j) { 817 int pj = permutation[j]; 818 double sum = 0; 819 for (int i = j + 1; i < nSing; ++i) { 820 sum += weightedJacobian[i][pj] * work[i]; 821 } 822 work[j] = (work[j] - sum) / lmDiag[j]; 823 } 824 } 825 826 // permute the components of z back to components of lmDir 827 for (int j = 0; j < lmDir.length; ++j) { 828 lmDir[permutation[j]] = work[j]; 829 } 830 } 831 832 /** 833 * Decompose a matrix A as A.P = Q.R using Householder transforms. 834 * <p>As suggested in the P. Lascaux and R. Theodor book 835 * <i>Analyse numérique matricielle appliquée à 836 * l'art de l'ingénieur</i> (Masson, 1986), instead of representing 837 * the Householder transforms with u<sub>k</sub> unit vectors such that: 838 * <pre> 839 * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup> 840 * </pre> 841 * we use <sub>k</sub> non-unit vectors such that: 842 * <pre> 843 * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup> 844 * </pre> 845 * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>. 846 * The beta<sub>k</sub> coefficients are provided upon exit as recomputing 847 * them from the v<sub>k</sub> vectors would be costly.</p> 848 * <p>This decomposition handles rank deficient cases since the tranformations 849 * are performed in non-increasing columns norms order thanks to columns 850 * pivoting. The diagonal elements of the R matrix are therefore also in 851 * non-increasing absolute values order.</p> 852 * 853 * @param jacobian Weighted Jacobian matrix at the current point. 854 * @exception ConvergenceException if the decomposition cannot be performed 855 */ qrDecomposition(RealMatrix jacobian)856 private void qrDecomposition(RealMatrix jacobian) throws ConvergenceException { 857 // Code in this class assumes that the weighted Jacobian is -(W^(1/2) J), 858 // hence the multiplication by -1. 859 weightedJacobian = jacobian.scalarMultiply(-1).getData(); 860 861 final int nR = weightedJacobian.length; 862 final int nC = weightedJacobian[0].length; 863 864 // initializations 865 for (int k = 0; k < nC; ++k) { 866 permutation[k] = k; 867 double norm2 = 0; 868 for (int i = 0; i < nR; ++i) { 869 double akk = weightedJacobian[i][k]; 870 norm2 += akk * akk; 871 } 872 jacNorm[k] = FastMath.sqrt(norm2); 873 } 874 875 // transform the matrix column after column 876 for (int k = 0; k < nC; ++k) { 877 878 // select the column with the greatest norm on active components 879 int nextColumn = -1; 880 double ak2 = Double.NEGATIVE_INFINITY; 881 for (int i = k; i < nC; ++i) { 882 double norm2 = 0; 883 for (int j = k; j < nR; ++j) { 884 double aki = weightedJacobian[j][permutation[i]]; 885 norm2 += aki * aki; 886 } 887 if (Double.isInfinite(norm2) || Double.isNaN(norm2)) { 888 throw new ConvergenceException(LocalizedFormats.UNABLE_TO_PERFORM_QR_DECOMPOSITION_ON_JACOBIAN, 889 nR, nC); 890 } 891 if (norm2 > ak2) { 892 nextColumn = i; 893 ak2 = norm2; 894 } 895 } 896 if (ak2 <= qrRankingThreshold) { 897 rank = k; 898 return; 899 } 900 int pk = permutation[nextColumn]; 901 permutation[nextColumn] = permutation[k]; 902 permutation[k] = pk; 903 904 // choose alpha such that Hk.u = alpha ek 905 double akk = weightedJacobian[k][pk]; 906 double alpha = (akk > 0) ? -FastMath.sqrt(ak2) : FastMath.sqrt(ak2); 907 double betak = 1.0 / (ak2 - akk * alpha); 908 beta[pk] = betak; 909 910 // transform the current column 911 diagR[pk] = alpha; 912 weightedJacobian[k][pk] -= alpha; 913 914 // transform the remaining columns 915 for (int dk = nC - 1 - k; dk > 0; --dk) { 916 double gamma = 0; 917 for (int j = k; j < nR; ++j) { 918 gamma += weightedJacobian[j][pk] * weightedJacobian[j][permutation[k + dk]]; 919 } 920 gamma *= betak; 921 for (int j = k; j < nR; ++j) { 922 weightedJacobian[j][permutation[k + dk]] -= gamma * weightedJacobian[j][pk]; 923 } 924 } 925 } 926 rank = solvedCols; 927 } 928 929 /** 930 * Compute the product Qt.y for some Q.R. decomposition. 931 * 932 * @param y vector to multiply (will be overwritten with the result) 933 */ qTy(double[] y)934 private void qTy(double[] y) { 935 final int nR = weightedJacobian.length; 936 final int nC = weightedJacobian[0].length; 937 938 for (int k = 0; k < nC; ++k) { 939 int pk = permutation[k]; 940 double gamma = 0; 941 for (int i = k; i < nR; ++i) { 942 gamma += weightedJacobian[i][pk] * y[i]; 943 } 944 gamma *= beta[pk]; 945 for (int i = k; i < nR; ++i) { 946 y[i] -= gamma * weightedJacobian[i][pk]; 947 } 948 } 949 } 950 951 /** 952 * @throws MathUnsupportedOperationException if bounds were passed to the 953 * {@link #optimize(OptimizationData[]) optimize} method. 954 */ checkParameters()955 private void checkParameters() { 956 if (getLowerBound() != null || 957 getUpperBound() != null) { 958 throw new MathUnsupportedOperationException(LocalizedFormats.CONSTRAINT); 959 } 960 } 961 } 962