1 SUBROUTINE CTIMHR( LINE, NM, MVAL, NN, NVAL, NNB, NBVAL, NXVAL, 2 $ NLDA, LDAVAL, TIMMIN, A, TAU, B, WORK, RWORK, 3 $ RESLTS, LDR1, LDR2, LDR3, NOUT ) 4* 5* -- LAPACK timing routine (version 3.0) -- 6* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 7* Courant Institute, Argonne National Lab, and Rice University 8* March 31, 1993 9* 10* .. Scalar Arguments .. 11 CHARACTER*80 LINE 12 INTEGER LDR1, LDR2, LDR3, NLDA, NM, NN, NNB, NOUT 13 REAL TIMMIN 14* .. 15* .. Array Arguments .. 16 INTEGER LDAVAL( * ), MVAL( * ), NBVAL( * ), NVAL( * ), 17 $ NXVAL( * ) 18 REAL RESLTS( LDR1, LDR2, LDR3, * ), RWORK( * ) 19 COMPLEX A( * ), B( * ), TAU( * ), WORK( * ) 20* .. 21* 22* Purpose 23* ======= 24* 25* CTIMHR times the LAPACK routines CGEHRD, CUNGHR, and CUNMHR. 26* 27* Arguments 28* ========= 29* 30* LINE (input) CHARACTER*80 31* The input line that requested this routine. The first six 32* characters contain either the name of a subroutine or a 33* generic path name. The remaining characters may be used to 34* specify the individual routines to be timed. See ATIMIN for 35* a full description of the format of the input line. 36* 37* NM (input) INTEGER 38* The number of values of M contained in the vector MVAL. 39* 40* MVAL (input) INTEGER array, dimension (NM) 41* The values of the matrix size M. 42* 43* NN (input) INTEGER 44* The number of values of N contained in the vector NVAL. 45* 46* NVAL (input) INTEGER array, dimension (NN) 47* The values of the matrix column dimension N. 48* 49* NNB (input) INTEGER 50* The number of values of NB and NX contained in the 51* vectors NBVAL and NXVAL. The blocking parameters are used 52* in pairs (NB,NX). 53* 54* NBVAL (input) INTEGER array, dimension (NNB) 55* The values of the blocksize NB. 56* 57* NXVAL (input) INTEGER array, dimension (NNB) 58* The values of the crossover point NX. 59* 60* NLDA (input) INTEGER 61* The number of values of LDA contained in the vector LDAVAL. 62* 63* LDAVAL (input) INTEGER array, dimension (NLDA) 64* The values of the leading dimension of the array A. 65* 66* TIMMIN (input) REAL 67* The minimum time a subroutine will be timed. 68* 69* A (workspace) COMPLEX array, dimension (LDAMAX*NMAX) 70* where LDAMAX and NMAX are the maximum values of LDA and N. 71* 72* TAU (workspace) COMPLEX array, dimension (min(M,N)) 73* 74* B (workspace) COMPLEX array, dimension (LDAMAX*NMAX) 75* 76* WORK (workspace) COMPLEX array, dimension (LDAMAX*NBMAX) 77* where NBMAX is the maximum value of NB. 78* 79* RWORK (workspace) REAL array, dimension 80* (min(MMAX,NMAX)) 81* 82* RESLTS (workspace) REAL array, dimension 83* (LDR1,LDR2,LDR3,4*NN+3) 84* The timing results for each subroutine over the relevant 85* values of M, (NB,NX), LDA, and N. 86* 87* LDR1 (input) INTEGER 88* The first dimension of RESLTS. LDR1 >= max(1,NNB). 89* 90* LDR2 (input) INTEGER 91* The second dimension of RESLTS. LDR2 >= max(1,NM). 92* 93* LDR3 (input) INTEGER 94* The third dimension of RESLTS. LDR3 >= max(1,NLDA). 95* 96* NOUT (input) INTEGER 97* The unit number for output. 98* 99* Internal Parameters 100* =================== 101* 102* MODE INTEGER 103* The matrix type. MODE = 3 is a geometric distribution of 104* eigenvalues. See CLATMS for further details. 105* 106* COND REAL 107* The condition number of the matrix. The singular values are 108* set to values from DMAX to DMAX/COND. 109* 110* DMAX REAL 111* The magnitude of the largest singular value. 112* 113* ===================================================================== 114* 115* .. Parameters .. 116 INTEGER NSUBS 117 PARAMETER ( NSUBS = 3 ) 118 INTEGER MODE 119 REAL COND, DMAX 120 PARAMETER ( MODE = 3, COND = 100.0E0, DMAX = 1.0E0 ) 121* .. 122* .. Local Scalars .. 123 CHARACTER LAB1, LAB2, SIDE, TRANS 124 CHARACTER*3 PATH 125 CHARACTER*6 CNAME 126 INTEGER I, I4, IC, ICL, IHI, ILDA, ILO, IM, IN, INB, 127 $ INFO, ISIDE, ISUB, ITOFF, ITRAN, LDA, LW, M, 128 $ M1, N, N1, NB, NX 129 REAL OPS, S1, S2, TIME, UNTIME 130* .. 131* .. Local Arrays .. 132 LOGICAL TIMSUB( NSUBS ) 133 CHARACTER SIDES( 2 ), TRANSS( 2 ) 134 CHARACTER*6 SUBNAM( NSUBS ) 135 INTEGER ISEED( 4 ), RESEED( 4 ) 136* .. 137* .. External Functions .. 138 REAL SECOND, SMFLOP, SOPLA 139 EXTERNAL SECOND, SMFLOP, SOPLA 140* .. 141* .. External Subroutines .. 142 EXTERNAL ATIMCK, ATIMIN, CGEHRD, CLACPY, CLATMS, CTIMMG, 143 $ CUNGHR, CUNMHR, ICOPY, SPRTB3, SPRTBL, XLAENV 144* .. 145* .. Intrinsic Functions .. 146 INTRINSIC MAX, REAL 147* .. 148* .. Data statements .. 149 DATA SUBNAM / 'CGEHRD', 'CUNGHR', 'CUNMHR' / 150 DATA SIDES / 'L', 'R' / , TRANSS / 'N', 'C' / 151 DATA ISEED / 0, 0, 0, 1 / 152* .. 153* .. Executable Statements .. 154* 155* Extract the timing request from the input line. 156* 157 PATH( 1: 1 ) = 'Complex precision' 158 PATH( 2: 3 ) = 'HR' 159 CALL ATIMIN( PATH, LINE, NSUBS, SUBNAM, TIMSUB, NOUT, INFO ) 160 IF( INFO.NE.0 ) 161 $ GO TO 190 162* 163* Check that N <= LDA for the input values. 164* 165 CNAME = LINE( 1: 6 ) 166 CALL ATIMCK( 2, CNAME, NM, MVAL, NLDA, LDAVAL, NOUT, INFO ) 167 IF( INFO.GT.0 ) THEN 168 WRITE( NOUT, FMT = 9999 )CNAME 169 GO TO 190 170 END IF 171* 172* Check that K <= LDA for CUNMHR 173* 174 IF( TIMSUB( 3 ) ) THEN 175 CALL ATIMCK( 3, CNAME, NN, NVAL, NLDA, LDAVAL, NOUT, INFO ) 176 IF( INFO.GT.0 ) THEN 177 WRITE( NOUT, FMT = 9999 )SUBNAM( 3 ) 178 TIMSUB( 3 ) = .FALSE. 179 END IF 180 END IF 181* 182* Do for each value of M: 183* 184 DO 120 IM = 1, NM 185 M = MVAL( IM ) 186 ILO = 1 187 IHI = M 188 CALL ICOPY( 4, ISEED, 1, RESEED, 1 ) 189* 190* Do for each value of LDA: 191* 192 DO 110 ILDA = 1, NLDA 193 LDA = LDAVAL( ILDA ) 194* 195* Do for each pair of values (NB, NX) in NBVAL and NXVAL. 196* 197 DO 100 INB = 1, NNB 198 NB = NBVAL( INB ) 199 CALL XLAENV( 1, NB ) 200 NX = NXVAL( INB ) 201 CALL XLAENV( 3, NX ) 202 LW = MAX( 1, M*MAX( 1, NB ) ) 203* 204* Generate a test matrix of size M by M. 205* 206 CALL ICOPY( 4, RESEED, 1, ISEED, 1 ) 207 CALL CLATMS( M, M, 'Uniform', ISEED, 'Nonsymm', RWORK, 208 $ MODE, COND, DMAX, M, M, 'No packing', B, 209 $ LDA, WORK, INFO ) 210* 211 IF( TIMSUB( 1 ) ) THEN 212* 213* CGEHRD: Reduction to Hesenberg form 214* 215 CALL CLACPY( 'Full', M, M, B, LDA, A, LDA ) 216 IC = 0 217 S1 = SECOND( ) 218 10 CONTINUE 219 CALL CGEHRD( M, ILO, IHI, A, LDA, TAU, WORK, LW, 220 $ INFO ) 221 S2 = SECOND( ) 222 TIME = S2 - S1 223 IC = IC + 1 224 IF( TIME.LT.TIMMIN ) THEN 225 CALL CLACPY( 'Full', M, M, B, LDA, A, LDA ) 226 GO TO 10 227 END IF 228* 229* Subtract the time used in CLACPY. 230* 231 ICL = 1 232 S1 = SECOND( ) 233 20 CONTINUE 234 S2 = SECOND( ) 235 UNTIME = S2 - S1 236 ICL = ICL + 1 237 IF( ICL.LE.IC ) THEN 238 CALL CLACPY( 'Full', M, M, A, LDA, B, LDA ) 239 GO TO 20 240 END IF 241* 242 TIME = ( TIME-UNTIME ) / REAL( IC ) 243 OPS = SOPLA( 'CGEHRD', M, ILO, IHI, 0, NB ) 244 RESLTS( INB, IM, ILDA, 1 ) = SMFLOP( OPS, TIME, INFO ) 245 ELSE 246* 247* If CGEHRD was not timed, generate a matrix and factor 248* it using CGEHRD anyway so that the factored form of 249* the matrix can be used in timing the other routines. 250* 251 CALL CLACPY( 'Full', M, M, B, LDA, A, LDA ) 252 CALL CGEHRD( M, ILO, IHI, A, LDA, TAU, WORK, LW, 253 $ INFO ) 254 END IF 255* 256 IF( TIMSUB( 2 ) ) THEN 257* 258* CUNGHR: Generate the orthogonal matrix Q from the 259* reduction to Hessenberg form A = Q*H*Q' 260* 261 CALL CLACPY( 'Full', M, M, A, LDA, B, LDA ) 262 IC = 0 263 S1 = SECOND( ) 264 30 CONTINUE 265 CALL CUNGHR( M, ILO, IHI, B, LDA, TAU, WORK, LW, 266 $ INFO ) 267 S2 = SECOND( ) 268 TIME = S2 - S1 269 IC = IC + 1 270 IF( TIME.LT.TIMMIN ) THEN 271 CALL CLACPY( 'Full', M, M, A, LDA, B, LDA ) 272 GO TO 30 273 END IF 274* 275* Subtract the time used in CLACPY. 276* 277 ICL = 1 278 S1 = SECOND( ) 279 40 CONTINUE 280 S2 = SECOND( ) 281 UNTIME = S2 - S1 282 ICL = ICL + 1 283 IF( ICL.LE.IC ) THEN 284 CALL CLACPY( 'Full', M, M, A, LDA, B, LDA ) 285 GO TO 40 286 END IF 287* 288 TIME = ( TIME-UNTIME ) / REAL( IC ) 289* 290* Op count for CUNGHR: same as 291* CUNGQR( IHI-ILO, IHI-ILO, IHI-ILO, ... ) 292* 293 OPS = SOPLA( 'CUNGQR', IHI-ILO, IHI-ILO, IHI-ILO, 0, 294 $ NB ) 295 RESLTS( INB, IM, ILDA, 2 ) = SMFLOP( OPS, TIME, INFO ) 296 END IF 297* 298 IF( TIMSUB( 3 ) ) THEN 299* 300* CUNMHR: Multiply by Q stored as a product of 301* elementary transformations 302* 303 I4 = 2 304 DO 90 ISIDE = 1, 2 305 SIDE = SIDES( ISIDE ) 306 DO 80 IN = 1, NN 307 N = NVAL( IN ) 308 LW = MAX( 1, MAX( 1, NB )*N ) 309 IF( ISIDE.EQ.1 ) THEN 310 M1 = M 311 N1 = N 312 ELSE 313 M1 = N 314 N1 = M 315 END IF 316 ITOFF = 0 317 DO 70 ITRAN = 1, 2 318 TRANS = TRANSS( ITRAN ) 319 CALL CTIMMG( 0, M1, N1, B, LDA, 0, 0 ) 320 IC = 0 321 S1 = SECOND( ) 322 50 CONTINUE 323 CALL CUNMHR( SIDE, TRANS, M1, N1, ILO, IHI, 324 $ A, LDA, TAU, B, LDA, WORK, LW, 325 $ INFO ) 326 S2 = SECOND( ) 327 TIME = S2 - S1 328 IC = IC + 1 329 IF( TIME.LT.TIMMIN ) THEN 330 CALL CTIMMG( 0, M1, N1, B, LDA, 0, 0 ) 331 GO TO 50 332 END IF 333* 334* Subtract the time used in CTIMMG. 335* 336 ICL = 1 337 S1 = SECOND( ) 338 60 CONTINUE 339 S2 = SECOND( ) 340 UNTIME = S2 - S1 341 ICL = ICL + 1 342 IF( ICL.LE.IC ) THEN 343 CALL CTIMMG( 0, M1, N1, B, LDA, 0, 0 ) 344 GO TO 60 345 END IF 346* 347 TIME = ( TIME-UNTIME ) / REAL( IC ) 348* 349* Op count for CUNMHR, SIDE='L': same as 350* CUNMQR( 'L', TRANS, IHI-ILO, N, IHI-ILO, ...) 351* 352* Op count for CUNMHR, SIDE='R': same as 353* CUNMQR( 'R', TRANS, M, IHI-ILO, IHI-ILO, ...) 354* 355 IF( ISIDE.EQ.1 ) THEN 356 OPS = SOPLA( 'CUNMQR', IHI-ILO, N1, 357 $ IHI-ILO, -1, NB ) 358 ELSE 359 OPS = SOPLA( 'CUNMQR', M1, IHI-ILO, 360 $ IHI-ILO, 1, NB ) 361 END IF 362* 363 RESLTS( INB, IM, ILDA, 364 $ I4+ITOFF+IN ) = SMFLOP( OPS, TIME, INFO ) 365 ITOFF = NN 366 70 CONTINUE 367 80 CONTINUE 368 I4 = I4 + 2*NN 369 90 CONTINUE 370 END IF 371* 372 100 CONTINUE 373 110 CONTINUE 374 120 CONTINUE 375* 376* Print tables of results for CGEHRD and CUNGHR 377* 378 DO 140 ISUB = 1, NSUBS - 1 379 IF( .NOT.TIMSUB( ISUB ) ) 380 $ GO TO 140 381 WRITE( NOUT, FMT = 9998 )SUBNAM( ISUB ) 382 IF( NLDA.GT.1 ) THEN 383 DO 130 I = 1, NLDA 384 WRITE( NOUT, FMT = 9997 )I, LDAVAL( I ) 385 130 CONTINUE 386 END IF 387 WRITE( NOUT, FMT = 9995 ) 388 CALL SPRTB3( '( NB, NX)', 'N', NNB, NBVAL, NXVAL, NM, MVAL, 389 $ NLDA, RESLTS( 1, 1, 1, ISUB ), LDR1, LDR2, NOUT ) 390 140 CONTINUE 391* 392* Print tables of results for CUNMHR 393* 394 ISUB = 3 395 IF( TIMSUB( ISUB ) ) THEN 396 I4 = 2 397 DO 180 ISIDE = 1, 2 398 IF( ISIDE.EQ.1 ) THEN 399 LAB1 = 'M' 400 LAB2 = 'N' 401 IF( NLDA.GT.1 ) THEN 402 WRITE( NOUT, FMT = 9998 )SUBNAM( ISUB ) 403 DO 150 I = 1, NLDA 404 WRITE( NOUT, FMT = 9997 )I, LDAVAL( I ) 405 150 CONTINUE 406 WRITE( NOUT, FMT = 9994 ) 407 END IF 408 ELSE 409 LAB1 = 'N' 410 LAB2 = 'M' 411 END IF 412 DO 170 ITRAN = 1, 2 413 DO 160 IN = 1, NN 414 WRITE( NOUT, FMT = 9996 )SUBNAM( ISUB ), 415 $ SIDES( ISIDE ), TRANSS( ITRAN ), LAB2, NVAL( IN ) 416 CALL SPRTBL( 'NB', LAB1, NNB, NBVAL, NM, MVAL, NLDA, 417 $ RESLTS( 1, 1, 1, I4+IN ), LDR1, LDR2, 418 $ NOUT ) 419 160 CONTINUE 420 I4 = I4 + NN 421 170 CONTINUE 422 180 CONTINUE 423 END IF 424 190 CONTINUE 425* 426* Print a table of results for each timed routine. 427* 428 RETURN 429 9999 FORMAT( 1X, A6, ' timing run not attempted', / ) 430 9998 FORMAT( / ' *** Speed of ', A6, ' in megaflops *** ' ) 431 9997 FORMAT( 5X, 'line ', I2, ' with LDA = ', I5 ) 432 9996 FORMAT( / 5X, A6, ' with SIDE = ''', A1, ''', TRANS = ''', A1, 433 $ ''', ', A1, ' =', I6, / ) 434 9995 FORMAT( / 5X, 'ILO = 1, IHI = N', / ) 435 9994 FORMAT( / 5X, 'ILO = 1, IHI = M if SIDE = ''L''', / 5X, 436 $ ' = N if SIDE = ''R''' ) 437* 438* End of CTIMHR 439* 440 END 441