1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math3.fitting.leastsquares; 18 19 import java.util.Arrays; 20 21 import org.apache.commons.math3.fitting.leastsquares.LeastSquaresProblem.Evaluation; 22 import org.apache.commons.math3.linear.ArrayRealVector; 23 import org.apache.commons.math3.linear.RealMatrix; 24 import org.apache.commons.math3.exception.ConvergenceException; 25 import org.apache.commons.math3.exception.util.LocalizedFormats; 26 import org.apache.commons.math3.optim.ConvergenceChecker; 27 import org.apache.commons.math3.util.Incrementor; 28 import org.apache.commons.math3.util.Precision; 29 import org.apache.commons.math3.util.FastMath; 30 31 32 /** 33 * This class solves a least-squares problem using the Levenberg-Marquardt 34 * algorithm. 35 * 36 * <p>This implementation <em>should</em> work even for over-determined systems 37 * (i.e. systems having more point than equations). Over-determined systems 38 * are solved by ignoring the point which have the smallest impact according 39 * to their jacobian column norm. Only the rank of the matrix and some loop bounds 40 * are changed to implement this.</p> 41 * 42 * <p>The resolution engine is a simple translation of the MINPACK <a 43 * href="http://www.netlib.org/minpack/lmder.f">lmder</a> routine with minor 44 * changes. The changes include the over-determined resolution, the use of 45 * inherited convergence checker and the Q.R. decomposition which has been 46 * rewritten following the algorithm described in the 47 * P. Lascaux and R. Theodor book <i>Analyse numérique matricielle 48 * appliquée à l'art de l'ingénieur</i>, Masson 1986.</p> 49 * <p>The authors of the original fortran version are: 50 * <ul> 51 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 52 * <li>Burton S. Garbow</li> 53 * <li>Kenneth E. Hillstrom</li> 54 * <li>Jorge J. More</li> 55 * </ul> 56 * The redistribution policy for MINPACK is available <a 57 * href="http://www.netlib.org/minpack/disclaimer">here</a>, for convenience, it 58 * is reproduced below.</p> 59 * 60 * <table border="0" width="80%" cellpadding="10" align="center" bgcolor="#E0E0E0"> 61 * <tr><td> 62 * Minpack Copyright Notice (1999) University of Chicago. 63 * All rights reserved 64 * </td></tr> 65 * <tr><td> 66 * Redistribution and use in source and binary forms, with or without 67 * modification, are permitted provided that the following conditions 68 * are met: 69 * <ol> 70 * <li>Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer.</li> 72 * <li>Redistributions in binary form must reproduce the above 73 * copyright notice, this list of conditions and the following 74 * disclaimer in the documentation and/or other materials provided 75 * with the distribution.</li> 76 * <li>The end-user documentation included with the redistribution, if any, 77 * must include the following acknowledgment: 78 * <code>This product includes software developed by the University of 79 * Chicago, as Operator of Argonne National Laboratory.</code> 80 * Alternately, this acknowledgment may appear in the software itself, 81 * if and wherever such third-party acknowledgments normally appear.</li> 82 * <li><strong>WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" 83 * WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE 84 * UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND 85 * THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR 86 * IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES 87 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE 88 * OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY 89 * OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR 90 * USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF 91 * THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) 92 * DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION 93 * UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL 94 * BE CORRECTED.</strong></li> 95 * <li><strong>LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT 96 * HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF 97 * ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, 98 * INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF 99 * ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF 100 * PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER 101 * SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT 102 * (INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, 103 * EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE 104 * POSSIBILITY OF SUCH LOSS OR DAMAGES.</strong></li> 105 * <ol></td></tr> 106 * </table> 107 * 108 * @since 3.3 109 */ 110 public class LevenbergMarquardtOptimizer implements LeastSquaresOptimizer { 111 112 /** Twice the "epsilon machine". */ 113 private static final double TWO_EPS = 2 * Precision.EPSILON; 114 115 /* configuration parameters */ 116 /** Positive input variable used in determining the initial step bound. */ 117 private final double initialStepBoundFactor; 118 /** Desired relative error in the sum of squares. */ 119 private final double costRelativeTolerance; 120 /** Desired relative error in the approximate solution parameters. */ 121 private final double parRelativeTolerance; 122 /** Desired max cosine on the orthogonality between the function vector 123 * and the columns of the jacobian. */ 124 private final double orthoTolerance; 125 /** Threshold for QR ranking. */ 126 private final double qrRankingThreshold; 127 128 /** Default constructor. 129 * <p> 130 * The default values for the algorithm settings are: 131 * <ul> 132 * <li>Initial step bound factor: 100</li> 133 * <li>Cost relative tolerance: 1e-10</li> 134 * <li>Parameters relative tolerance: 1e-10</li> 135 * <li>Orthogonality tolerance: 1e-10</li> 136 * <li>QR ranking threshold: {@link Precision#SAFE_MIN}</li> 137 * </ul> 138 **/ LevenbergMarquardtOptimizer()139 public LevenbergMarquardtOptimizer() { 140 this(100, 1e-10, 1e-10, 1e-10, Precision.SAFE_MIN); 141 } 142 143 /** 144 * Construct an instance with all parameters specified. 145 * 146 * @param initialStepBoundFactor initial step bound factor 147 * @param costRelativeTolerance cost relative tolerance 148 * @param parRelativeTolerance parameters relative tolerance 149 * @param orthoTolerance orthogonality tolerance 150 * @param qrRankingThreshold threshold in the QR decomposition. Columns with a 2 151 * norm less than this threshold are considered to be 152 * all 0s. 153 */ LevenbergMarquardtOptimizer( final double initialStepBoundFactor, final double costRelativeTolerance, final double parRelativeTolerance, final double orthoTolerance, final double qrRankingThreshold)154 public LevenbergMarquardtOptimizer( 155 final double initialStepBoundFactor, 156 final double costRelativeTolerance, 157 final double parRelativeTolerance, 158 final double orthoTolerance, 159 final double qrRankingThreshold) { 160 this.initialStepBoundFactor = initialStepBoundFactor; 161 this.costRelativeTolerance = costRelativeTolerance; 162 this.parRelativeTolerance = parRelativeTolerance; 163 this.orthoTolerance = orthoTolerance; 164 this.qrRankingThreshold = qrRankingThreshold; 165 } 166 167 /** 168 * @param newInitialStepBoundFactor Positive input variable used in 169 * determining the initial step bound. This bound is set to the 170 * product of initialStepBoundFactor and the euclidean norm of 171 * {@code diag * x} if non-zero, or else to {@code newInitialStepBoundFactor} 172 * itself. In most cases factor should lie in the interval 173 * {@code (0.1, 100.0)}. {@code 100} is a generally recommended value. 174 * of the matrix is reduced. 175 * @return a new instance. 176 */ withInitialStepBoundFactor(double newInitialStepBoundFactor)177 public LevenbergMarquardtOptimizer withInitialStepBoundFactor(double newInitialStepBoundFactor) { 178 return new LevenbergMarquardtOptimizer( 179 newInitialStepBoundFactor, 180 costRelativeTolerance, 181 parRelativeTolerance, 182 orthoTolerance, 183 qrRankingThreshold); 184 } 185 186 /** 187 * @param newCostRelativeTolerance Desired relative error in the sum of squares. 188 * @return a new instance. 189 */ withCostRelativeTolerance(double newCostRelativeTolerance)190 public LevenbergMarquardtOptimizer withCostRelativeTolerance(double newCostRelativeTolerance) { 191 return new LevenbergMarquardtOptimizer( 192 initialStepBoundFactor, 193 newCostRelativeTolerance, 194 parRelativeTolerance, 195 orthoTolerance, 196 qrRankingThreshold); 197 } 198 199 /** 200 * @param newParRelativeTolerance Desired relative error in the approximate solution 201 * parameters. 202 * @return a new instance. 203 */ withParameterRelativeTolerance(double newParRelativeTolerance)204 public LevenbergMarquardtOptimizer withParameterRelativeTolerance(double newParRelativeTolerance) { 205 return new LevenbergMarquardtOptimizer( 206 initialStepBoundFactor, 207 costRelativeTolerance, 208 newParRelativeTolerance, 209 orthoTolerance, 210 qrRankingThreshold); 211 } 212 213 /** 214 * Modifies the given parameter. 215 * 216 * @param newOrthoTolerance Desired max cosine on the orthogonality between 217 * the function vector and the columns of the Jacobian. 218 * @return a new instance. 219 */ withOrthoTolerance(double newOrthoTolerance)220 public LevenbergMarquardtOptimizer withOrthoTolerance(double newOrthoTolerance) { 221 return new LevenbergMarquardtOptimizer( 222 initialStepBoundFactor, 223 costRelativeTolerance, 224 parRelativeTolerance, 225 newOrthoTolerance, 226 qrRankingThreshold); 227 } 228 229 /** 230 * @param newQRRankingThreshold Desired threshold for QR ranking. 231 * If the squared norm of a column vector is smaller or equal to this 232 * threshold during QR decomposition, it is considered to be a zero vector 233 * and hence the rank of the matrix is reduced. 234 * @return a new instance. 235 */ withRankingThreshold(double newQRRankingThreshold)236 public LevenbergMarquardtOptimizer withRankingThreshold(double newQRRankingThreshold) { 237 return new LevenbergMarquardtOptimizer( 238 initialStepBoundFactor, 239 costRelativeTolerance, 240 parRelativeTolerance, 241 orthoTolerance, 242 newQRRankingThreshold); 243 } 244 245 /** 246 * Gets the value of a tuning parameter. 247 * @see #withInitialStepBoundFactor(double) 248 * 249 * @return the parameter's value. 250 */ getInitialStepBoundFactor()251 public double getInitialStepBoundFactor() { 252 return initialStepBoundFactor; 253 } 254 255 /** 256 * Gets the value of a tuning parameter. 257 * @see #withCostRelativeTolerance(double) 258 * 259 * @return the parameter's value. 260 */ getCostRelativeTolerance()261 public double getCostRelativeTolerance() { 262 return costRelativeTolerance; 263 } 264 265 /** 266 * Gets the value of a tuning parameter. 267 * @see #withParameterRelativeTolerance(double) 268 * 269 * @return the parameter's value. 270 */ getParameterRelativeTolerance()271 public double getParameterRelativeTolerance() { 272 return parRelativeTolerance; 273 } 274 275 /** 276 * Gets the value of a tuning parameter. 277 * @see #withOrthoTolerance(double) 278 * 279 * @return the parameter's value. 280 */ getOrthoTolerance()281 public double getOrthoTolerance() { 282 return orthoTolerance; 283 } 284 285 /** 286 * Gets the value of a tuning parameter. 287 * @see #withRankingThreshold(double) 288 * 289 * @return the parameter's value. 290 */ getRankingThreshold()291 public double getRankingThreshold() { 292 return qrRankingThreshold; 293 } 294 295 /** {@inheritDoc} */ optimize(final LeastSquaresProblem problem)296 public Optimum optimize(final LeastSquaresProblem problem) { 297 // Pull in relevant data from the problem as locals. 298 final int nR = problem.getObservationSize(); // Number of observed data. 299 final int nC = problem.getParameterSize(); // Number of parameters. 300 // Counters. 301 final Incrementor iterationCounter = problem.getIterationCounter(); 302 final Incrementor evaluationCounter = problem.getEvaluationCounter(); 303 // Convergence criterion. 304 final ConvergenceChecker<Evaluation> checker = problem.getConvergenceChecker(); 305 306 // arrays shared with the other private methods 307 final int solvedCols = FastMath.min(nR, nC); 308 /* Parameters evolution direction associated with lmPar. */ 309 double[] lmDir = new double[nC]; 310 /* Levenberg-Marquardt parameter. */ 311 double lmPar = 0; 312 313 // local point 314 double delta = 0; 315 double xNorm = 0; 316 double[] diag = new double[nC]; 317 double[] oldX = new double[nC]; 318 double[] oldRes = new double[nR]; 319 double[] qtf = new double[nR]; 320 double[] work1 = new double[nC]; 321 double[] work2 = new double[nC]; 322 double[] work3 = new double[nC]; 323 324 325 // Evaluate the function at the starting point and calculate its norm. 326 evaluationCounter.incrementCount(); 327 //value will be reassigned in the loop 328 Evaluation current = problem.evaluate(problem.getStart()); 329 double[] currentResiduals = current.getResiduals().toArray(); 330 double currentCost = current.getCost(); 331 double[] currentPoint = current.getPoint().toArray(); 332 333 // Outer loop. 334 boolean firstIteration = true; 335 while (true) { 336 iterationCounter.incrementCount(); 337 338 final Evaluation previous = current; 339 340 // QR decomposition of the jacobian matrix 341 final InternalData internalData 342 = qrDecomposition(current.getJacobian(), solvedCols); 343 final double[][] weightedJacobian = internalData.weightedJacobian; 344 final int[] permutation = internalData.permutation; 345 final double[] diagR = internalData.diagR; 346 final double[] jacNorm = internalData.jacNorm; 347 348 //residuals already have weights applied 349 double[] weightedResidual = currentResiduals; 350 for (int i = 0; i < nR; i++) { 351 qtf[i] = weightedResidual[i]; 352 } 353 354 // compute Qt.res 355 qTy(qtf, internalData); 356 357 // now we don't need Q anymore, 358 // so let jacobian contain the R matrix with its diagonal elements 359 for (int k = 0; k < solvedCols; ++k) { 360 int pk = permutation[k]; 361 weightedJacobian[k][pk] = diagR[pk]; 362 } 363 364 if (firstIteration) { 365 // scale the point according to the norms of the columns 366 // of the initial jacobian 367 xNorm = 0; 368 for (int k = 0; k < nC; ++k) { 369 double dk = jacNorm[k]; 370 if (dk == 0) { 371 dk = 1.0; 372 } 373 double xk = dk * currentPoint[k]; 374 xNorm += xk * xk; 375 diag[k] = dk; 376 } 377 xNorm = FastMath.sqrt(xNorm); 378 379 // initialize the step bound delta 380 delta = (xNorm == 0) ? initialStepBoundFactor : (initialStepBoundFactor * xNorm); 381 } 382 383 // check orthogonality between function vector and jacobian columns 384 double maxCosine = 0; 385 if (currentCost != 0) { 386 for (int j = 0; j < solvedCols; ++j) { 387 int pj = permutation[j]; 388 double s = jacNorm[pj]; 389 if (s != 0) { 390 double sum = 0; 391 for (int i = 0; i <= j; ++i) { 392 sum += weightedJacobian[i][pj] * qtf[i]; 393 } 394 maxCosine = FastMath.max(maxCosine, FastMath.abs(sum) / (s * currentCost)); 395 } 396 } 397 } 398 if (maxCosine <= orthoTolerance) { 399 // Convergence has been reached. 400 return new OptimumImpl( 401 current, 402 evaluationCounter.getCount(), 403 iterationCounter.getCount()); 404 } 405 406 // rescale if necessary 407 for (int j = 0; j < nC; ++j) { 408 diag[j] = FastMath.max(diag[j], jacNorm[j]); 409 } 410 411 // Inner loop. 412 for (double ratio = 0; ratio < 1.0e-4;) { 413 414 // save the state 415 for (int j = 0; j < solvedCols; ++j) { 416 int pj = permutation[j]; 417 oldX[pj] = currentPoint[pj]; 418 } 419 final double previousCost = currentCost; 420 double[] tmpVec = weightedResidual; 421 weightedResidual = oldRes; 422 oldRes = tmpVec; 423 424 // determine the Levenberg-Marquardt parameter 425 lmPar = determineLMParameter(qtf, delta, diag, 426 internalData, solvedCols, 427 work1, work2, work3, lmDir, lmPar); 428 429 // compute the new point and the norm of the evolution direction 430 double lmNorm = 0; 431 for (int j = 0; j < solvedCols; ++j) { 432 int pj = permutation[j]; 433 lmDir[pj] = -lmDir[pj]; 434 currentPoint[pj] = oldX[pj] + lmDir[pj]; 435 double s = diag[pj] * lmDir[pj]; 436 lmNorm += s * s; 437 } 438 lmNorm = FastMath.sqrt(lmNorm); 439 // on the first iteration, adjust the initial step bound. 440 if (firstIteration) { 441 delta = FastMath.min(delta, lmNorm); 442 } 443 444 // Evaluate the function at x + p and calculate its norm. 445 evaluationCounter.incrementCount(); 446 current = problem.evaluate(new ArrayRealVector(currentPoint)); 447 currentResiduals = current.getResiduals().toArray(); 448 currentCost = current.getCost(); 449 currentPoint = current.getPoint().toArray(); 450 451 // compute the scaled actual reduction 452 double actRed = -1.0; 453 if (0.1 * currentCost < previousCost) { 454 double r = currentCost / previousCost; 455 actRed = 1.0 - r * r; 456 } 457 458 // compute the scaled predicted reduction 459 // and the scaled directional derivative 460 for (int j = 0; j < solvedCols; ++j) { 461 int pj = permutation[j]; 462 double dirJ = lmDir[pj]; 463 work1[j] = 0; 464 for (int i = 0; i <= j; ++i) { 465 work1[i] += weightedJacobian[i][pj] * dirJ; 466 } 467 } 468 double coeff1 = 0; 469 for (int j = 0; j < solvedCols; ++j) { 470 coeff1 += work1[j] * work1[j]; 471 } 472 double pc2 = previousCost * previousCost; 473 coeff1 /= pc2; 474 double coeff2 = lmPar * lmNorm * lmNorm / pc2; 475 double preRed = coeff1 + 2 * coeff2; 476 double dirDer = -(coeff1 + coeff2); 477 478 // ratio of the actual to the predicted reduction 479 ratio = (preRed == 0) ? 0 : (actRed / preRed); 480 481 // update the step bound 482 if (ratio <= 0.25) { 483 double tmp = 484 (actRed < 0) ? (0.5 * dirDer / (dirDer + 0.5 * actRed)) : 0.5; 485 if ((0.1 * currentCost >= previousCost) || (tmp < 0.1)) { 486 tmp = 0.1; 487 } 488 delta = tmp * FastMath.min(delta, 10.0 * lmNorm); 489 lmPar /= tmp; 490 } else if ((lmPar == 0) || (ratio >= 0.75)) { 491 delta = 2 * lmNorm; 492 lmPar *= 0.5; 493 } 494 495 // test for successful iteration. 496 if (ratio >= 1.0e-4) { 497 // successful iteration, update the norm 498 firstIteration = false; 499 xNorm = 0; 500 for (int k = 0; k < nC; ++k) { 501 double xK = diag[k] * currentPoint[k]; 502 xNorm += xK * xK; 503 } 504 xNorm = FastMath.sqrt(xNorm); 505 506 // tests for convergence. 507 if (checker != null && checker.converged(iterationCounter.getCount(), previous, current)) { 508 return new OptimumImpl(current, evaluationCounter.getCount(), iterationCounter.getCount()); 509 } 510 } else { 511 // failed iteration, reset the previous values 512 currentCost = previousCost; 513 for (int j = 0; j < solvedCols; ++j) { 514 int pj = permutation[j]; 515 currentPoint[pj] = oldX[pj]; 516 } 517 tmpVec = weightedResidual; 518 weightedResidual = oldRes; 519 oldRes = tmpVec; 520 // Reset "current" to previous values. 521 current = previous; 522 } 523 524 // Default convergence criteria. 525 if ((FastMath.abs(actRed) <= costRelativeTolerance && 526 preRed <= costRelativeTolerance && 527 ratio <= 2.0) || 528 delta <= parRelativeTolerance * xNorm) { 529 return new OptimumImpl(current, evaluationCounter.getCount(), iterationCounter.getCount()); 530 } 531 532 // tests for termination and stringent tolerances 533 if (FastMath.abs(actRed) <= TWO_EPS && 534 preRed <= TWO_EPS && 535 ratio <= 2.0) { 536 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_COST_RELATIVE_TOLERANCE, 537 costRelativeTolerance); 538 } else if (delta <= TWO_EPS * xNorm) { 539 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_PARAMETERS_RELATIVE_TOLERANCE, 540 parRelativeTolerance); 541 } else if (maxCosine <= TWO_EPS) { 542 throw new ConvergenceException(LocalizedFormats.TOO_SMALL_ORTHOGONALITY_TOLERANCE, 543 orthoTolerance); 544 } 545 } 546 } 547 } 548 549 /** 550 * Holds internal data. 551 * This structure was created so that all optimizer fields can be "final". 552 * Code should be further refactored in order to not pass around arguments 553 * that will modified in-place (cf. "work" arrays). 554 */ 555 private static class InternalData { 556 /** Weighted Jacobian. */ 557 private final double[][] weightedJacobian; 558 /** Columns permutation array. */ 559 private final int[] permutation; 560 /** Rank of the Jacobian matrix. */ 561 private final int rank; 562 /** Diagonal elements of the R matrix in the QR decomposition. */ 563 private final double[] diagR; 564 /** Norms of the columns of the jacobian matrix. */ 565 private final double[] jacNorm; 566 /** Coefficients of the Householder transforms vectors. */ 567 private final double[] beta; 568 569 /** 570 * @param weightedJacobian Weighted Jacobian. 571 * @param permutation Columns permutation array. 572 * @param rank Rank of the Jacobian matrix. 573 * @param diagR Diagonal elements of the R matrix in the QR decomposition. 574 * @param jacNorm Norms of the columns of the jacobian matrix. 575 * @param beta Coefficients of the Householder transforms vectors. 576 */ InternalData(double[][] weightedJacobian, int[] permutation, int rank, double[] diagR, double[] jacNorm, double[] beta)577 InternalData(double[][] weightedJacobian, 578 int[] permutation, 579 int rank, 580 double[] diagR, 581 double[] jacNorm, 582 double[] beta) { 583 this.weightedJacobian = weightedJacobian; 584 this.permutation = permutation; 585 this.rank = rank; 586 this.diagR = diagR; 587 this.jacNorm = jacNorm; 588 this.beta = beta; 589 } 590 } 591 592 /** 593 * Determines the Levenberg-Marquardt parameter. 594 * 595 * <p>This implementation is a translation in Java of the MINPACK 596 * <a href="http://www.netlib.org/minpack/lmpar.f">lmpar</a> 597 * routine.</p> 598 * <p>This method sets the lmPar and lmDir attributes.</p> 599 * <p>The authors of the original fortran function are:</p> 600 * <ul> 601 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 602 * <li>Burton S. Garbow</li> 603 * <li>Kenneth E. Hillstrom</li> 604 * <li>Jorge J. More</li> 605 * </ul> 606 * <p>Luc Maisonobe did the Java translation.</p> 607 * 608 * @param qy Array containing qTy. 609 * @param delta Upper bound on the euclidean norm of diagR * lmDir. 610 * @param diag Diagonal matrix. 611 * @param internalData Data (modified in-place in this method). 612 * @param solvedCols Number of solved point. 613 * @param work1 work array 614 * @param work2 work array 615 * @param work3 work array 616 * @param lmDir the "returned" LM direction will be stored in this array. 617 * @param lmPar the value of the LM parameter from the previous iteration. 618 * @return the new LM parameter 619 */ determineLMParameter(double[] qy, double delta, double[] diag, InternalData internalData, int solvedCols, double[] work1, double[] work2, double[] work3, double[] lmDir, double lmPar)620 private double determineLMParameter(double[] qy, double delta, double[] diag, 621 InternalData internalData, int solvedCols, 622 double[] work1, double[] work2, double[] work3, 623 double[] lmDir, double lmPar) { 624 final double[][] weightedJacobian = internalData.weightedJacobian; 625 final int[] permutation = internalData.permutation; 626 final int rank = internalData.rank; 627 final double[] diagR = internalData.diagR; 628 629 final int nC = weightedJacobian[0].length; 630 631 // compute and store in x the gauss-newton direction, if the 632 // jacobian is rank-deficient, obtain a least squares solution 633 for (int j = 0; j < rank; ++j) { 634 lmDir[permutation[j]] = qy[j]; 635 } 636 for (int j = rank; j < nC; ++j) { 637 lmDir[permutation[j]] = 0; 638 } 639 for (int k = rank - 1; k >= 0; --k) { 640 int pk = permutation[k]; 641 double ypk = lmDir[pk] / diagR[pk]; 642 for (int i = 0; i < k; ++i) { 643 lmDir[permutation[i]] -= ypk * weightedJacobian[i][pk]; 644 } 645 lmDir[pk] = ypk; 646 } 647 648 // evaluate the function at the origin, and test 649 // for acceptance of the Gauss-Newton direction 650 double dxNorm = 0; 651 for (int j = 0; j < solvedCols; ++j) { 652 int pj = permutation[j]; 653 double s = diag[pj] * lmDir[pj]; 654 work1[pj] = s; 655 dxNorm += s * s; 656 } 657 dxNorm = FastMath.sqrt(dxNorm); 658 double fp = dxNorm - delta; 659 if (fp <= 0.1 * delta) { 660 lmPar = 0; 661 return lmPar; 662 } 663 664 // if the jacobian is not rank deficient, the Newton step provides 665 // a lower bound, parl, for the zero of the function, 666 // otherwise set this bound to zero 667 double sum2; 668 double parl = 0; 669 if (rank == solvedCols) { 670 for (int j = 0; j < solvedCols; ++j) { 671 int pj = permutation[j]; 672 work1[pj] *= diag[pj] / dxNorm; 673 } 674 sum2 = 0; 675 for (int j = 0; j < solvedCols; ++j) { 676 int pj = permutation[j]; 677 double sum = 0; 678 for (int i = 0; i < j; ++i) { 679 sum += weightedJacobian[i][pj] * work1[permutation[i]]; 680 } 681 double s = (work1[pj] - sum) / diagR[pj]; 682 work1[pj] = s; 683 sum2 += s * s; 684 } 685 parl = fp / (delta * sum2); 686 } 687 688 // calculate an upper bound, paru, for the zero of the function 689 sum2 = 0; 690 for (int j = 0; j < solvedCols; ++j) { 691 int pj = permutation[j]; 692 double sum = 0; 693 for (int i = 0; i <= j; ++i) { 694 sum += weightedJacobian[i][pj] * qy[i]; 695 } 696 sum /= diag[pj]; 697 sum2 += sum * sum; 698 } 699 double gNorm = FastMath.sqrt(sum2); 700 double paru = gNorm / delta; 701 if (paru == 0) { 702 paru = Precision.SAFE_MIN / FastMath.min(delta, 0.1); 703 } 704 705 // if the input par lies outside of the interval (parl,paru), 706 // set par to the closer endpoint 707 lmPar = FastMath.min(paru, FastMath.max(lmPar, parl)); 708 if (lmPar == 0) { 709 lmPar = gNorm / dxNorm; 710 } 711 712 for (int countdown = 10; countdown >= 0; --countdown) { 713 714 // evaluate the function at the current value of lmPar 715 if (lmPar == 0) { 716 lmPar = FastMath.max(Precision.SAFE_MIN, 0.001 * paru); 717 } 718 double sPar = FastMath.sqrt(lmPar); 719 for (int j = 0; j < solvedCols; ++j) { 720 int pj = permutation[j]; 721 work1[pj] = sPar * diag[pj]; 722 } 723 determineLMDirection(qy, work1, work2, internalData, solvedCols, work3, lmDir); 724 725 dxNorm = 0; 726 for (int j = 0; j < solvedCols; ++j) { 727 int pj = permutation[j]; 728 double s = diag[pj] * lmDir[pj]; 729 work3[pj] = s; 730 dxNorm += s * s; 731 } 732 dxNorm = FastMath.sqrt(dxNorm); 733 double previousFP = fp; 734 fp = dxNorm - delta; 735 736 // if the function is small enough, accept the current value 737 // of lmPar, also test for the exceptional cases where parl is zero 738 if (FastMath.abs(fp) <= 0.1 * delta || 739 (parl == 0 && 740 fp <= previousFP && 741 previousFP < 0)) { 742 return lmPar; 743 } 744 745 // compute the Newton correction 746 for (int j = 0; j < solvedCols; ++j) { 747 int pj = permutation[j]; 748 work1[pj] = work3[pj] * diag[pj] / dxNorm; 749 } 750 for (int j = 0; j < solvedCols; ++j) { 751 int pj = permutation[j]; 752 work1[pj] /= work2[j]; 753 double tmp = work1[pj]; 754 for (int i = j + 1; i < solvedCols; ++i) { 755 work1[permutation[i]] -= weightedJacobian[i][pj] * tmp; 756 } 757 } 758 sum2 = 0; 759 for (int j = 0; j < solvedCols; ++j) { 760 double s = work1[permutation[j]]; 761 sum2 += s * s; 762 } 763 double correction = fp / (delta * sum2); 764 765 // depending on the sign of the function, update parl or paru. 766 if (fp > 0) { 767 parl = FastMath.max(parl, lmPar); 768 } else if (fp < 0) { 769 paru = FastMath.min(paru, lmPar); 770 } 771 772 // compute an improved estimate for lmPar 773 lmPar = FastMath.max(parl, lmPar + correction); 774 } 775 776 return lmPar; 777 } 778 779 /** 780 * Solve a*x = b and d*x = 0 in the least squares sense. 781 * <p>This implementation is a translation in Java of the MINPACK 782 * <a href="http://www.netlib.org/minpack/qrsolv.f">qrsolv</a> 783 * routine.</p> 784 * <p>This method sets the lmDir and lmDiag attributes.</p> 785 * <p>The authors of the original fortran function are:</p> 786 * <ul> 787 * <li>Argonne National Laboratory. MINPACK project. March 1980</li> 788 * <li>Burton S. Garbow</li> 789 * <li>Kenneth E. Hillstrom</li> 790 * <li>Jorge J. More</li> 791 * </ul> 792 * <p>Luc Maisonobe did the Java translation.</p> 793 * 794 * @param qy array containing qTy 795 * @param diag diagonal matrix 796 * @param lmDiag diagonal elements associated with lmDir 797 * @param internalData Data (modified in-place in this method). 798 * @param solvedCols Number of sloved point. 799 * @param work work array 800 * @param lmDir the "returned" LM direction is stored in this array 801 */ determineLMDirection(double[] qy, double[] diag, double[] lmDiag, InternalData internalData, int solvedCols, double[] work, double[] lmDir)802 private void determineLMDirection(double[] qy, double[] diag, 803 double[] lmDiag, 804 InternalData internalData, 805 int solvedCols, 806 double[] work, 807 double[] lmDir) { 808 final int[] permutation = internalData.permutation; 809 final double[][] weightedJacobian = internalData.weightedJacobian; 810 final double[] diagR = internalData.diagR; 811 812 // copy R and Qty to preserve input and initialize s 813 // in particular, save the diagonal elements of R in lmDir 814 for (int j = 0; j < solvedCols; ++j) { 815 int pj = permutation[j]; 816 for (int i = j + 1; i < solvedCols; ++i) { 817 weightedJacobian[i][pj] = weightedJacobian[j][permutation[i]]; 818 } 819 lmDir[j] = diagR[pj]; 820 work[j] = qy[j]; 821 } 822 823 // eliminate the diagonal matrix d using a Givens rotation 824 for (int j = 0; j < solvedCols; ++j) { 825 826 // prepare the row of d to be eliminated, locating the 827 // diagonal element using p from the Q.R. factorization 828 int pj = permutation[j]; 829 double dpj = diag[pj]; 830 if (dpj != 0) { 831 Arrays.fill(lmDiag, j + 1, lmDiag.length, 0); 832 } 833 lmDiag[j] = dpj; 834 835 // the transformations to eliminate the row of d 836 // modify only a single element of Qty 837 // beyond the first n, which is initially zero. 838 double qtbpj = 0; 839 for (int k = j; k < solvedCols; ++k) { 840 int pk = permutation[k]; 841 842 // determine a Givens rotation which eliminates the 843 // appropriate element in the current row of d 844 if (lmDiag[k] != 0) { 845 846 final double sin; 847 final double cos; 848 double rkk = weightedJacobian[k][pk]; 849 if (FastMath.abs(rkk) < FastMath.abs(lmDiag[k])) { 850 final double cotan = rkk / lmDiag[k]; 851 sin = 1.0 / FastMath.sqrt(1.0 + cotan * cotan); 852 cos = sin * cotan; 853 } else { 854 final double tan = lmDiag[k] / rkk; 855 cos = 1.0 / FastMath.sqrt(1.0 + tan * tan); 856 sin = cos * tan; 857 } 858 859 // compute the modified diagonal element of R and 860 // the modified element of (Qty,0) 861 weightedJacobian[k][pk] = cos * rkk + sin * lmDiag[k]; 862 final double temp = cos * work[k] + sin * qtbpj; 863 qtbpj = -sin * work[k] + cos * qtbpj; 864 work[k] = temp; 865 866 // accumulate the tranformation in the row of s 867 for (int i = k + 1; i < solvedCols; ++i) { 868 double rik = weightedJacobian[i][pk]; 869 final double temp2 = cos * rik + sin * lmDiag[i]; 870 lmDiag[i] = -sin * rik + cos * lmDiag[i]; 871 weightedJacobian[i][pk] = temp2; 872 } 873 } 874 } 875 876 // store the diagonal element of s and restore 877 // the corresponding diagonal element of R 878 lmDiag[j] = weightedJacobian[j][permutation[j]]; 879 weightedJacobian[j][permutation[j]] = lmDir[j]; 880 } 881 882 // solve the triangular system for z, if the system is 883 // singular, then obtain a least squares solution 884 int nSing = solvedCols; 885 for (int j = 0; j < solvedCols; ++j) { 886 if ((lmDiag[j] == 0) && (nSing == solvedCols)) { 887 nSing = j; 888 } 889 if (nSing < solvedCols) { 890 work[j] = 0; 891 } 892 } 893 if (nSing > 0) { 894 for (int j = nSing - 1; j >= 0; --j) { 895 int pj = permutation[j]; 896 double sum = 0; 897 for (int i = j + 1; i < nSing; ++i) { 898 sum += weightedJacobian[i][pj] * work[i]; 899 } 900 work[j] = (work[j] - sum) / lmDiag[j]; 901 } 902 } 903 904 // permute the components of z back to components of lmDir 905 for (int j = 0; j < lmDir.length; ++j) { 906 lmDir[permutation[j]] = work[j]; 907 } 908 } 909 910 /** 911 * Decompose a matrix A as A.P = Q.R using Householder transforms. 912 * <p>As suggested in the P. Lascaux and R. Theodor book 913 * <i>Analyse numérique matricielle appliquée à 914 * l'art de l'ingénieur</i> (Masson, 1986), instead of representing 915 * the Householder transforms with u<sub>k</sub> unit vectors such that: 916 * <pre> 917 * H<sub>k</sub> = I - 2u<sub>k</sub>.u<sub>k</sub><sup>t</sup> 918 * </pre> 919 * we use <sub>k</sub> non-unit vectors such that: 920 * <pre> 921 * H<sub>k</sub> = I - beta<sub>k</sub>v<sub>k</sub>.v<sub>k</sub><sup>t</sup> 922 * </pre> 923 * where v<sub>k</sub> = a<sub>k</sub> - alpha<sub>k</sub> e<sub>k</sub>. 924 * The beta<sub>k</sub> coefficients are provided upon exit as recomputing 925 * them from the v<sub>k</sub> vectors would be costly.</p> 926 * <p>This decomposition handles rank deficient cases since the tranformations 927 * are performed in non-increasing columns norms order thanks to columns 928 * pivoting. The diagonal elements of the R matrix are therefore also in 929 * non-increasing absolute values order.</p> 930 * 931 * @param jacobian Weighted Jacobian matrix at the current point. 932 * @param solvedCols Number of solved point. 933 * @return data used in other methods of this class. 934 * @throws ConvergenceException if the decomposition cannot be performed. 935 */ qrDecomposition(RealMatrix jacobian, int solvedCols)936 private InternalData qrDecomposition(RealMatrix jacobian, 937 int solvedCols) throws ConvergenceException { 938 // Code in this class assumes that the weighted Jacobian is -(W^(1/2) J), 939 // hence the multiplication by -1. 940 final double[][] weightedJacobian = jacobian.scalarMultiply(-1).getData(); 941 942 final int nR = weightedJacobian.length; 943 final int nC = weightedJacobian[0].length; 944 945 final int[] permutation = new int[nC]; 946 final double[] diagR = new double[nC]; 947 final double[] jacNorm = new double[nC]; 948 final double[] beta = new double[nC]; 949 950 // initializations 951 for (int k = 0; k < nC; ++k) { 952 permutation[k] = k; 953 double norm2 = 0; 954 for (int i = 0; i < nR; ++i) { 955 double akk = weightedJacobian[i][k]; 956 norm2 += akk * akk; 957 } 958 jacNorm[k] = FastMath.sqrt(norm2); 959 } 960 961 // transform the matrix column after column 962 for (int k = 0; k < nC; ++k) { 963 964 // select the column with the greatest norm on active components 965 int nextColumn = -1; 966 double ak2 = Double.NEGATIVE_INFINITY; 967 for (int i = k; i < nC; ++i) { 968 double norm2 = 0; 969 for (int j = k; j < nR; ++j) { 970 double aki = weightedJacobian[j][permutation[i]]; 971 norm2 += aki * aki; 972 } 973 if (Double.isInfinite(norm2) || Double.isNaN(norm2)) { 974 throw new ConvergenceException(LocalizedFormats.UNABLE_TO_PERFORM_QR_DECOMPOSITION_ON_JACOBIAN, 975 nR, nC); 976 } 977 if (norm2 > ak2) { 978 nextColumn = i; 979 ak2 = norm2; 980 } 981 } 982 if (ak2 <= qrRankingThreshold) { 983 return new InternalData(weightedJacobian, permutation, k, diagR, jacNorm, beta); 984 } 985 int pk = permutation[nextColumn]; 986 permutation[nextColumn] = permutation[k]; 987 permutation[k] = pk; 988 989 // choose alpha such that Hk.u = alpha ek 990 double akk = weightedJacobian[k][pk]; 991 double alpha = (akk > 0) ? -FastMath.sqrt(ak2) : FastMath.sqrt(ak2); 992 double betak = 1.0 / (ak2 - akk * alpha); 993 beta[pk] = betak; 994 995 // transform the current column 996 diagR[pk] = alpha; 997 weightedJacobian[k][pk] -= alpha; 998 999 // transform the remaining columns 1000 for (int dk = nC - 1 - k; dk > 0; --dk) { 1001 double gamma = 0; 1002 for (int j = k; j < nR; ++j) { 1003 gamma += weightedJacobian[j][pk] * weightedJacobian[j][permutation[k + dk]]; 1004 } 1005 gamma *= betak; 1006 for (int j = k; j < nR; ++j) { 1007 weightedJacobian[j][permutation[k + dk]] -= gamma * weightedJacobian[j][pk]; 1008 } 1009 } 1010 } 1011 1012 return new InternalData(weightedJacobian, permutation, solvedCols, diagR, jacNorm, beta); 1013 } 1014 1015 /** 1016 * Compute the product Qt.y for some Q.R. decomposition. 1017 * 1018 * @param y vector to multiply (will be overwritten with the result) 1019 * @param internalData Data. 1020 */ qTy(double[] y, InternalData internalData)1021 private void qTy(double[] y, 1022 InternalData internalData) { 1023 final double[][] weightedJacobian = internalData.weightedJacobian; 1024 final int[] permutation = internalData.permutation; 1025 final double[] beta = internalData.beta; 1026 1027 final int nR = weightedJacobian.length; 1028 final int nC = weightedJacobian[0].length; 1029 1030 for (int k = 0; k < nC; ++k) { 1031 int pk = permutation[k]; 1032 double gamma = 0; 1033 for (int i = k; i < nR; ++i) { 1034 gamma += weightedJacobian[i][pk] * y[i]; 1035 } 1036 gamma *= beta[pk]; 1037 for (int i = k; i < nR; ++i) { 1038 y[i] -= gamma * weightedJacobian[i][pk]; 1039 } 1040 } 1041 } 1042 } 1043