1 SUBROUTINE DTIMQL( LINE, NM, MVAL, NVAL, NK, KVAL, NNB, NBVAL, 2 $ NXVAL, NLDA, LDAVAL, TIMMIN, A, TAU, B, WORK, 3 $ RESLTS, LDR1, LDR2, LDR3, NOUT ) 4* 5* -- LAPACK timing routine (version 3.0) -- 6* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 7* Courant Institute, Argonne National Lab, and Rice University 8* March 31, 1993 9* 10* .. Scalar Arguments .. 11 CHARACTER*80 LINE 12 INTEGER LDR1, LDR2, LDR3, NK, NLDA, NM, NNB, NOUT 13 DOUBLE PRECISION TIMMIN 14* .. 15* .. Array Arguments .. 16 INTEGER KVAL( * ), LDAVAL( * ), MVAL( * ), NBVAL( * ), 17 $ NVAL( * ), NXVAL( * ) 18 DOUBLE PRECISION A( * ), B( * ), RESLTS( LDR1, LDR2, LDR3, * ), 19 $ TAU( * ), WORK( * ) 20* .. 21* 22* Purpose 23* ======= 24* 25* DTIMQL times the LAPACK routines to perform the QL factorization of 26* a DOUBLE PRECISION general matrix. 27* 28* Arguments 29* ========= 30* 31* LINE (input) CHARACTER*80 32* The input line that requested this routine. The first six 33* characters contain either the name of a subroutine or a 34* generic path name. The remaining characters may be used to 35* specify the individual routines to be timed. See ATIMIN for 36* a full description of the format of the input line. 37* 38* NM (input) INTEGER 39* The number of values of M and N contained in the vectors 40* MVAL and NVAL. The matrix sizes are used in pairs (M,N). 41* 42* MVAL (input) INTEGER array, dimension (NM) 43* The values of the matrix row dimension M. 44* 45* NVAL (input) INTEGER array, dimension (NM) 46* The values of the matrix column dimension N. 47* 48* NK (input) INTEGER 49* The number of values of K in the vector KVAL. 50* 51* KVAL (input) INTEGER array, dimension (NK) 52* The values of the matrix dimension K, used in DORMQL. 53* 54* NNB (input) INTEGER 55* The number of values of NB and NX contained in the 56* vectors NBVAL and NXVAL. The blocking parameters are used 57* in pairs (NB,NX). 58* 59* NBVAL (input) INTEGER array, dimension (NNB) 60* The values of the blocksize NB. 61* 62* NXVAL (input) INTEGER array, dimension (NNB) 63* The values of the crossover point NX. 64* 65* NLDA (input) INTEGER 66* The number of values of LDA contained in the vector LDAVAL. 67* 68* LDAVAL (input) INTEGER array, dimension (NLDA) 69* The values of the leading dimension of the array A. 70* 71* TIMMIN (input) DOUBLE PRECISION 72* The minimum time a subroutine will be timed. 73* 74* A (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX) 75* where LDAMAX and NMAX are the maximum values of LDA and N. 76* 77* TAU (workspace) DOUBLE PRECISION array, dimension (min(M,N)) 78* 79* B (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX) 80* 81* WORK (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NBMAX) 82* where NBMAX is the maximum value of NB. 83* 84* RESLTS (workspace) DOUBLE PRECISION array, dimension 85* (LDR1,LDR2,LDR3,2*NK) 86* The timing results for each subroutine over the relevant 87* values of (M,N), (NB,NX), and LDA. 88* 89* LDR1 (input) INTEGER 90* The first dimension of RESLTS. LDR1 >= max(1,NNB). 91* 92* LDR2 (input) INTEGER 93* The second dimension of RESLTS. LDR2 >= max(1,NM). 94* 95* LDR3 (input) INTEGER 96* The third dimension of RESLTS. LDR3 >= max(1,NLDA). 97* 98* NOUT (input) INTEGER 99* The unit number for output. 100* 101* Internal Parameters 102* =================== 103* 104* MODE INTEGER 105* The matrix type. MODE = 3 is a geometric distribution of 106* eigenvalues. See DLATMS for further details. 107* 108* COND DOUBLE PRECISION 109* The condition number of the matrix. The singular values are 110* set to values from DMAX to DMAX/COND. 111* 112* DMAX DOUBLE PRECISION 113* The magnitude of the largest singular value. 114* 115* ===================================================================== 116* 117* .. Parameters .. 118 INTEGER NSUBS 119 PARAMETER ( NSUBS = 3 ) 120 INTEGER MODE 121 DOUBLE PRECISION COND, DMAX 122 PARAMETER ( MODE = 3, COND = 100.0D0, DMAX = 1.0D0 ) 123* .. 124* .. Local Scalars .. 125 CHARACTER LABM, SIDE, TRANS 126 CHARACTER*3 PATH 127 CHARACTER*6 CNAME 128 INTEGER I, I4, IC, ICL, IK, ILDA, IM, IMX, INB, INFO, 129 $ ISIDE, ISUB, ITOFF, ITRAN, K, K1, LDA, LW, M, 130 $ M1, MINMN, N, N1, NB, NX 131 DOUBLE PRECISION OPS, S1, S2, TIME, UNTIME 132* .. 133* .. Local Arrays .. 134 LOGICAL TIMSUB( NSUBS ) 135 CHARACTER SIDES( 2 ), TRANSS( 2 ) 136 CHARACTER*6 SUBNAM( NSUBS ) 137 INTEGER ISEED( 4 ), MUSE( 12 ), NUSE( 12 ), RESEED( 4 ) 138* .. 139* .. External Functions .. 140 DOUBLE PRECISION DMFLOP, DOPLA, DSECND 141 EXTERNAL DMFLOP, DOPLA, DSECND 142* .. 143* .. External Subroutines .. 144 EXTERNAL ATIMCK, ATIMIN, DGEQLF, DLACPY, DLATMS, DORGQL, 145 $ DORMQL, DPRTB4, DPRTB5, DTIMMG, ICOPY, XLAENV 146* .. 147* .. Intrinsic Functions .. 148 INTRINSIC DBLE, MAX, MIN 149* .. 150* .. Data statements .. 151 DATA SUBNAM / 'DGEQLF', 'DORGQL', 'DORMQL' / 152 DATA SIDES / 'L', 'R' / , TRANSS / 'N', 'T' / 153 DATA ISEED / 0, 0, 0, 1 / 154* .. 155* .. Executable Statements .. 156* 157* Extract the timing request from the input line. 158* 159 PATH( 1: 1 ) = 'Double precision' 160 PATH( 2: 3 ) = 'QL' 161 CALL ATIMIN( PATH, LINE, NSUBS, SUBNAM, TIMSUB, NOUT, INFO ) 162 IF( INFO.NE.0 ) 163 $ GO TO 230 164* 165* Check that M <= LDA for the input values. 166* 167 CNAME = LINE( 1: 6 ) 168 CALL ATIMCK( 1, CNAME, NM, MVAL, NLDA, LDAVAL, NOUT, INFO ) 169 IF( INFO.GT.0 ) THEN 170 WRITE( NOUT, FMT = 9999 )CNAME 171 GO TO 230 172 END IF 173* 174* Do for each pair of values (M,N): 175* 176 DO 70 IM = 1, NM 177 M = MVAL( IM ) 178 N = NVAL( IM ) 179 MINMN = MIN( M, N ) 180 CALL ICOPY( 4, ISEED, 1, RESEED, 1 ) 181* 182* Do for each value of LDA: 183* 184 DO 60 ILDA = 1, NLDA 185 LDA = LDAVAL( ILDA ) 186* 187* Do for each pair of values (NB, NX) in NBVAL and NXVAL. 188* 189 DO 50 INB = 1, NNB 190 NB = NBVAL( INB ) 191 CALL XLAENV( 1, NB ) 192 NX = NXVAL( INB ) 193 CALL XLAENV( 3, NX ) 194 LW = MAX( 1, N*MAX( 1, NB ) ) 195* 196* Generate a test matrix of size M by N. 197* 198 CALL ICOPY( 4, RESEED, 1, ISEED, 1 ) 199 CALL DLATMS( M, N, 'Uniform', ISEED, 'Nonsymm', TAU, 200 $ MODE, COND, DMAX, M, N, 'No packing', B, 201 $ LDA, WORK, INFO ) 202* 203 IF( TIMSUB( 1 ) ) THEN 204* 205* DGEQLF: QL factorization 206* 207 CALL DLACPY( 'Full', M, N, B, LDA, A, LDA ) 208 IC = 0 209 S1 = DSECND( ) 210 10 CONTINUE 211 CALL DGEQLF( M, N, A, LDA, TAU, WORK, LW, INFO ) 212 S2 = DSECND( ) 213 TIME = S2 - S1 214 IC = IC + 1 215 IF( TIME.LT.TIMMIN ) THEN 216 CALL DLACPY( 'Full', M, N, B, LDA, A, LDA ) 217 GO TO 10 218 END IF 219* 220* Subtract the time used in DLACPY. 221* 222 ICL = 1 223 S1 = DSECND( ) 224 20 CONTINUE 225 S2 = DSECND( ) 226 UNTIME = S2 - S1 227 ICL = ICL + 1 228 IF( ICL.LE.IC ) THEN 229 CALL DLACPY( 'Full', M, N, A, LDA, B, LDA ) 230 GO TO 20 231 END IF 232* 233 TIME = ( TIME-UNTIME ) / DBLE( IC ) 234 OPS = DOPLA( 'DGEQLF', M, N, 0, 0, NB ) 235 RESLTS( INB, IM, ILDA, 1 ) = DMFLOP( OPS, TIME, INFO ) 236 ELSE 237* 238* If DGEQLF was not timed, generate a matrix and factor 239* it using DGEQLF anyway so that the factored form of 240* the matrix can be used in timing the other routines. 241* 242 CALL DLACPY( 'Full', M, N, B, LDA, A, LDA ) 243 CALL DGEQLF( M, N, A, LDA, TAU, WORK, LW, INFO ) 244 END IF 245* 246 IF( TIMSUB( 2 ) ) THEN 247* 248* DORGQL: Generate orthogonal matrix Q from the QL 249* factorization 250* 251 CALL DLACPY( 'Full', M, MINMN, A, LDA, B, LDA ) 252 IC = 0 253 S1 = DSECND( ) 254 30 CONTINUE 255 CALL DORGQL( M, MINMN, MINMN, B, LDA, TAU, WORK, LW, 256 $ INFO ) 257 S2 = DSECND( ) 258 TIME = S2 - S1 259 IC = IC + 1 260 IF( TIME.LT.TIMMIN ) THEN 261 CALL DLACPY( 'Full', M, MINMN, A, LDA, B, LDA ) 262 GO TO 30 263 END IF 264* 265* Subtract the time used in DLACPY. 266* 267 ICL = 1 268 S1 = DSECND( ) 269 40 CONTINUE 270 S2 = DSECND( ) 271 UNTIME = S2 - S1 272 ICL = ICL + 1 273 IF( ICL.LE.IC ) THEN 274 CALL DLACPY( 'Full', M, MINMN, A, LDA, B, LDA ) 275 GO TO 40 276 END IF 277* 278 TIME = ( TIME-UNTIME ) / DBLE( IC ) 279 OPS = DOPLA( 'DORGQL', M, MINMN, MINMN, 0, NB ) 280 RESLTS( INB, IM, ILDA, 2 ) = DMFLOP( OPS, TIME, INFO ) 281 END IF 282* 283 50 CONTINUE 284 60 CONTINUE 285 70 CONTINUE 286* 287* Print tables of results 288* 289 DO 90 ISUB = 1, NSUBS - 1 290 IF( .NOT.TIMSUB( ISUB ) ) 291 $ GO TO 90 292 WRITE( NOUT, FMT = 9998 )SUBNAM( ISUB ) 293 IF( NLDA.GT.1 ) THEN 294 DO 80 I = 1, NLDA 295 WRITE( NOUT, FMT = 9997 )I, LDAVAL( I ) 296 80 CONTINUE 297 END IF 298 WRITE( NOUT, FMT = * ) 299 IF( ISUB.EQ.2 ) 300 $ WRITE( NOUT, FMT = 9996 ) 301 CALL DPRTB4( '( NB, NX)', 'M', 'N', NNB, NBVAL, NXVAL, NM, 302 $ MVAL, NVAL, NLDA, RESLTS( 1, 1, 1, ISUB ), LDR1, 303 $ LDR2, NOUT ) 304 90 CONTINUE 305* 306* Time DORMQL separately. Here the starting matrix is M by N, and 307* K is the free dimension of the matrix multiplied by Q. 308* 309 IF( TIMSUB( 3 ) ) THEN 310* 311* Check that K <= LDA for the input values. 312* 313 CALL ATIMCK( 3, CNAME, NK, KVAL, NLDA, LDAVAL, NOUT, INFO ) 314 IF( INFO.GT.0 ) THEN 315 WRITE( NOUT, FMT = 9999 )SUBNAM( 3 ) 316 GO TO 230 317 END IF 318* 319* Use only the pairs (M,N) where M >= N. 320* 321 IMX = 0 322 DO 100 IM = 1, NM 323 IF( MVAL( IM ).GE.NVAL( IM ) ) THEN 324 IMX = IMX + 1 325 MUSE( IMX ) = MVAL( IM ) 326 NUSE( IMX ) = NVAL( IM ) 327 END IF 328 100 CONTINUE 329* 330* DORMQL: Multiply by Q stored as a product of elementary 331* transformations 332* 333* Do for each pair of values (M,N): 334* 335 DO 180 IM = 1, IMX 336 M = MUSE( IM ) 337 N = NUSE( IM ) 338* 339* Do for each value of LDA: 340* 341 DO 170 ILDA = 1, NLDA 342 LDA = LDAVAL( ILDA ) 343* 344* Generate an M by N matrix and form its QL decomposition. 345* 346 CALL DLATMS( M, N, 'Uniform', ISEED, 'Nonsymm', TAU, 347 $ MODE, COND, DMAX, M, N, 'No packing', A, 348 $ LDA, WORK, INFO ) 349 LW = MAX( 1, N*MAX( 1, NB ) ) 350 CALL DGEQLF( M, N, A, LDA, TAU, WORK, LW, INFO ) 351* 352* Do first for SIDE = 'L', then for SIDE = 'R' 353* 354 I4 = 0 355 DO 160 ISIDE = 1, 2 356 SIDE = SIDES( ISIDE ) 357* 358* Do for each pair of values (NB, NX) in NBVAL and 359* NXVAL. 360* 361 DO 150 INB = 1, NNB 362 NB = NBVAL( INB ) 363 CALL XLAENV( 1, NB ) 364 NX = NXVAL( INB ) 365 CALL XLAENV( 3, NX ) 366* 367* Do for each value of K in KVAL 368* 369 DO 140 IK = 1, NK 370 K = KVAL( IK ) 371* 372* Sort out which variable is which 373* 374 IF( ISIDE.EQ.1 ) THEN 375 M1 = M 376 K1 = N 377 N1 = K 378 LW = MAX( 1, N1*MAX( 1, NB ) ) 379 ELSE 380 N1 = M 381 K1 = N 382 M1 = K 383 LW = MAX( 1, M1*MAX( 1, NB ) ) 384 END IF 385* 386* Do first for TRANS = 'N', then for TRANS = 'T' 387* 388 ITOFF = 0 389 DO 130 ITRAN = 1, 2 390 TRANS = TRANSS( ITRAN ) 391 CALL DTIMMG( 0, M1, N1, B, LDA, 0, 0 ) 392 IC = 0 393 S1 = DSECND( ) 394 110 CONTINUE 395 CALL DORMQL( SIDE, TRANS, M1, N1, K1, A, LDA, 396 $ TAU, B, LDA, WORK, LW, INFO ) 397 S2 = DSECND( ) 398 TIME = S2 - S1 399 IC = IC + 1 400 IF( TIME.LT.TIMMIN ) THEN 401 CALL DTIMMG( 0, M1, N1, B, LDA, 0, 0 ) 402 GO TO 110 403 END IF 404* 405* Subtract the time used in DTIMMG. 406* 407 ICL = 1 408 S1 = DSECND( ) 409 120 CONTINUE 410 S2 = DSECND( ) 411 UNTIME = S2 - S1 412 ICL = ICL + 1 413 IF( ICL.LE.IC ) THEN 414 CALL DTIMMG( 0, M1, N1, B, LDA, 0, 0 ) 415 GO TO 120 416 END IF 417* 418 TIME = ( TIME-UNTIME ) / DBLE( IC ) 419 OPS = DOPLA( 'DORMQL', M1, N1, K1, ISIDE-1, 420 $ NB ) 421 RESLTS( INB, IM, ILDA, 422 $ I4+ITOFF+IK ) = DMFLOP( OPS, TIME, INFO ) 423 ITOFF = NK 424 130 CONTINUE 425 140 CONTINUE 426 150 CONTINUE 427 I4 = 2*NK 428 160 CONTINUE 429 170 CONTINUE 430 180 CONTINUE 431* 432* Print tables of results 433* 434 ISUB = 3 435 I4 = 1 436 IF( IMX.GE.1 ) THEN 437 DO 220 ISIDE = 1, 2 438 SIDE = SIDES( ISIDE ) 439 IF( ISIDE.EQ.1 ) THEN 440 WRITE( NOUT, FMT = 9998 )SUBNAM( ISUB ) 441 IF( NLDA.GT.1 ) THEN 442 DO 190 I = 1, NLDA 443 WRITE( NOUT, FMT = 9997 )I, LDAVAL( I ) 444 190 CONTINUE 445 END IF 446 END IF 447 DO 210 ITRAN = 1, 2 448 TRANS = TRANSS( ITRAN ) 449 DO 200 IK = 1, NK 450 IF( ISIDE.EQ.1 ) THEN 451 N = KVAL( IK ) 452 WRITE( NOUT, FMT = 9995 )SUBNAM( ISUB ), SIDE, 453 $ TRANS, 'N', N 454 LABM = 'M' 455 ELSE 456 M = KVAL( IK ) 457 WRITE( NOUT, FMT = 9995 )SUBNAM( ISUB ), SIDE, 458 $ TRANS, 'M', M 459 LABM = 'N' 460 END IF 461 CALL DPRTB5( 'NB', LABM, 'K', NNB, NBVAL, IMX, 462 $ MUSE, NUSE, NLDA, 463 $ RESLTS( 1, 1, 1, I4 ), LDR1, LDR2, 464 $ NOUT ) 465 I4 = I4 + 1 466 200 CONTINUE 467 210 CONTINUE 468 220 CONTINUE 469 ELSE 470 WRITE( NOUT, FMT = 9994 )SUBNAM( ISUB ) 471 END IF 472 END IF 473 230 CONTINUE 474 9999 FORMAT( 1X, A6, ' timing run not attempted', / ) 475 9998 FORMAT( / ' *** Speed of ', A6, ' in megaflops ***' ) 476 9997 FORMAT( 5X, 'line ', I2, ' with LDA = ', I5 ) 477 9996 FORMAT( 5X, 'K = min(M,N)', / ) 478 9995 FORMAT( / 5X, A6, ' with SIDE = ''', A1, ''', TRANS = ''', A1, 479 $ ''', ', A1, ' =', I6, / ) 480 9994 FORMAT( ' *** No pairs (M,N) found with M >= N: ', A6, 481 $ ' not timed' ) 482 RETURN 483* 484* End of DTIMQL 485* 486 END 487