1*> \brief \b DSB2ST_KERNELS 2* 3* @generated from zhb2st_kernels.f, fortran z -> d, Wed Dec 7 08:22:39 2016 4* 5* =========== DOCUMENTATION =========== 6* 7* Online html documentation available at 8* http://www.netlib.org/lapack/explore-html/ 9* 10*> \htmlonly 11*> Download DSB2ST_KERNELS + dependencies 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsb2st_kernels.f"> 13*> [TGZ]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsb2st_kernels.f"> 15*> [ZIP]</a> 16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsb2st_kernels.f"> 17*> [TXT]</a> 18*> \endhtmlonly 19* 20* Definition: 21* =========== 22* 23* SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE, 24* ST, ED, SWEEP, N, NB, IB, 25* A, LDA, V, TAU, LDVT, WORK) 26* 27* IMPLICIT NONE 28* 29* .. Scalar Arguments .. 30* CHARACTER UPLO 31* LOGICAL WANTZ 32* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT 33* .. 34* .. Array Arguments .. 35* DOUBLE PRECISION A( LDA, * ), V( * ), 36* TAU( * ), WORK( * ) 37* 38*> \par Purpose: 39* ============= 40*> 41*> \verbatim 42*> 43*> DSB2ST_KERNELS is an internal routine used by the DSYTRD_SB2ST 44*> subroutine. 45*> \endverbatim 46* 47* Arguments: 48* ========== 49* 50*> \param[in] UPLO 51*> \verbatim 52*> UPLO is CHARACTER*1 53*> \endverbatim 54*> 55*> \param[in] WANTZ 56*> \verbatim 57*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both 58*> Eigenvalue/Eigenvectors. 59*> \endverbatim 60*> 61*> \param[in] TTYPE 62*> \verbatim 63*> TTYPE is INTEGER 64*> \endverbatim 65*> 66*> \param[in] ST 67*> \verbatim 68*> ST is INTEGER 69*> internal parameter for indices. 70*> \endverbatim 71*> 72*> \param[in] ED 73*> \verbatim 74*> ED is INTEGER 75*> internal parameter for indices. 76*> \endverbatim 77*> 78*> \param[in] SWEEP 79*> \verbatim 80*> SWEEP is INTEGER 81*> internal parameter for indices. 82*> \endverbatim 83*> 84*> \param[in] N 85*> \verbatim 86*> N is INTEGER. The order of the matrix A. 87*> \endverbatim 88*> 89*> \param[in] NB 90*> \verbatim 91*> NB is INTEGER. The size of the band. 92*> \endverbatim 93*> 94*> \param[in] IB 95*> \verbatim 96*> IB is INTEGER. 97*> \endverbatim 98*> 99*> \param[in, out] A 100*> \verbatim 101*> A is DOUBLE PRECISION array. A pointer to the matrix A. 102*> \endverbatim 103*> 104*> \param[in] LDA 105*> \verbatim 106*> LDA is INTEGER. The leading dimension of the matrix A. 107*> \endverbatim 108*> 109*> \param[out] V 110*> \verbatim 111*> V is DOUBLE PRECISION array, dimension 2*n if eigenvalues only are 112*> requested or to be queried for vectors. 113*> \endverbatim 114*> 115*> \param[out] TAU 116*> \verbatim 117*> TAU is DOUBLE PRECISION array, dimension (2*n). 118*> The scalar factors of the Householder reflectors are stored 119*> in this array. 120*> \endverbatim 121*> 122*> \param[in] LDVT 123*> \verbatim 124*> LDVT is INTEGER. 125*> \endverbatim 126*> 127*> \param[out] WORK 128*> \verbatim 129*> WORK is DOUBLE PRECISION array. Workspace of size nb. 130*> \endverbatim 131*> 132*> \par Further Details: 133* ===================== 134*> 135*> \verbatim 136*> 137*> Implemented by Azzam Haidar. 138*> 139*> All details are available on technical report, SC11, SC13 papers. 140*> 141*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. 142*> Parallel reduction to condensed forms for symmetric eigenvalue problems 143*> using aggregated fine-grained and memory-aware kernels. In Proceedings 144*> of 2011 International Conference for High Performance Computing, 145*> Networking, Storage and Analysis (SC '11), New York, NY, USA, 146*> Article 8 , 11 pages. 147*> http://doi.acm.org/10.1145/2063384.2063394 148*> 149*> A. Haidar, J. Kurzak, P. Luszczek, 2013. 150*> An improved parallel singular value algorithm and its implementation 151*> for multicore hardware, In Proceedings of 2013 International Conference 152*> for High Performance Computing, Networking, Storage and Analysis (SC '13). 153*> Denver, Colorado, USA, 2013. 154*> Article 90, 12 pages. 155*> http://doi.acm.org/10.1145/2503210.2503292 156*> 157*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. 158*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure 159*> calculations based on fine-grained memory aware tasks. 160*> International Journal of High Performance Computing Applications. 161*> Volume 28 Issue 2, Pages 196-209, May 2014. 162*> http://hpc.sagepub.com/content/28/2/196 163*> 164*> \endverbatim 165*> 166* ===================================================================== 167 SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE, 168 $ ST, ED, SWEEP, N, NB, IB, 169 $ A, LDA, V, TAU, LDVT, WORK) 170* 171 IMPLICIT NONE 172* 173* -- LAPACK computational routine -- 174* -- LAPACK is a software package provided by Univ. of Tennessee, -- 175* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 176* 177* .. Scalar Arguments .. 178 CHARACTER UPLO 179 LOGICAL WANTZ 180 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT 181* .. 182* .. Array Arguments .. 183 DOUBLE PRECISION A( LDA, * ), V( * ), 184 $ TAU( * ), WORK( * ) 185* .. 186* 187* ===================================================================== 188* 189* .. Parameters .. 190 DOUBLE PRECISION ZERO, ONE 191 PARAMETER ( ZERO = 0.0D+0, 192 $ ONE = 1.0D+0 ) 193* .. 194* .. Local Scalars .. 195 LOGICAL UPPER 196 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, 197 $ DPOS, OFDPOS, AJETER 198 DOUBLE PRECISION CTMP 199* .. 200* .. External Subroutines .. 201 EXTERNAL DLARFG, DLARFX, DLARFY 202* .. 203* .. Intrinsic Functions .. 204 INTRINSIC MOD 205* .. External Functions .. 206 LOGICAL LSAME 207 EXTERNAL LSAME 208* .. 209* .. 210* .. Executable Statements .. 211* 212 AJETER = IB + LDVT 213 UPPER = LSAME( UPLO, 'U' ) 214 215 IF( UPPER ) THEN 216 DPOS = 2 * NB + 1 217 OFDPOS = 2 * NB 218 ELSE 219 DPOS = 1 220 OFDPOS = 2 221 ENDIF 222 223* 224* Upper case 225* 226 IF( UPPER ) THEN 227* 228 IF( WANTZ ) THEN 229 VPOS = MOD( SWEEP-1, 2 ) * N + ST 230 TAUPOS = MOD( SWEEP-1, 2 ) * N + ST 231 ELSE 232 VPOS = MOD( SWEEP-1, 2 ) * N + ST 233 TAUPOS = MOD( SWEEP-1, 2 ) * N + ST 234 ENDIF 235* 236 IF( TTYPE.EQ.1 ) THEN 237 LM = ED - ST + 1 238* 239 V( VPOS ) = ONE 240 DO 10 I = 1, LM-1 241 V( VPOS+I ) = ( A( OFDPOS-I, ST+I ) ) 242 A( OFDPOS-I, ST+I ) = ZERO 243 10 CONTINUE 244 CTMP = ( A( OFDPOS, ST ) ) 245 CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, 246 $ TAU( TAUPOS ) ) 247 A( OFDPOS, ST ) = CTMP 248* 249 LM = ED - ST + 1 250 CALL DLARFY( UPLO, LM, V( VPOS ), 1, 251 $ ( TAU( TAUPOS ) ), 252 $ A( DPOS, ST ), LDA-1, WORK) 253 ENDIF 254* 255 IF( TTYPE.EQ.3 ) THEN 256* 257 LM = ED - ST + 1 258 CALL DLARFY( UPLO, LM, V( VPOS ), 1, 259 $ ( TAU( TAUPOS ) ), 260 $ A( DPOS, ST ), LDA-1, WORK) 261 ENDIF 262* 263 IF( TTYPE.EQ.2 ) THEN 264 J1 = ED+1 265 J2 = MIN( ED+NB, N ) 266 LN = ED-ST+1 267 LM = J2-J1+1 268 IF( LM.GT.0) THEN 269 CALL DLARFX( 'Left', LN, LM, V( VPOS ), 270 $ ( TAU( TAUPOS ) ), 271 $ A( DPOS-NB, J1 ), LDA-1, WORK) 272* 273 IF( WANTZ ) THEN 274 VPOS = MOD( SWEEP-1, 2 ) * N + J1 275 TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 276 ELSE 277 VPOS = MOD( SWEEP-1, 2 ) * N + J1 278 TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 279 ENDIF 280* 281 V( VPOS ) = ONE 282 DO 30 I = 1, LM-1 283 V( VPOS+I ) = 284 $ ( A( DPOS-NB-I, J1+I ) ) 285 A( DPOS-NB-I, J1+I ) = ZERO 286 30 CONTINUE 287 CTMP = ( A( DPOS-NB, J1 ) ) 288 CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) 289 A( DPOS-NB, J1 ) = CTMP 290* 291 CALL DLARFX( 'Right', LN-1, LM, V( VPOS ), 292 $ TAU( TAUPOS ), 293 $ A( DPOS-NB+1, J1 ), LDA-1, WORK) 294 ENDIF 295 ENDIF 296* 297* Lower case 298* 299 ELSE 300* 301 IF( WANTZ ) THEN 302 VPOS = MOD( SWEEP-1, 2 ) * N + ST 303 TAUPOS = MOD( SWEEP-1, 2 ) * N + ST 304 ELSE 305 VPOS = MOD( SWEEP-1, 2 ) * N + ST 306 TAUPOS = MOD( SWEEP-1, 2 ) * N + ST 307 ENDIF 308* 309 IF( TTYPE.EQ.1 ) THEN 310 LM = ED - ST + 1 311* 312 V( VPOS ) = ONE 313 DO 20 I = 1, LM-1 314 V( VPOS+I ) = A( OFDPOS+I, ST-1 ) 315 A( OFDPOS+I, ST-1 ) = ZERO 316 20 CONTINUE 317 CALL DLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, 318 $ TAU( TAUPOS ) ) 319* 320 LM = ED - ST + 1 321* 322 CALL DLARFY( UPLO, LM, V( VPOS ), 1, 323 $ ( TAU( TAUPOS ) ), 324 $ A( DPOS, ST ), LDA-1, WORK) 325 326 ENDIF 327* 328 IF( TTYPE.EQ.3 ) THEN 329 LM = ED - ST + 1 330* 331 CALL DLARFY( UPLO, LM, V( VPOS ), 1, 332 $ ( TAU( TAUPOS ) ), 333 $ A( DPOS, ST ), LDA-1, WORK) 334 335 ENDIF 336* 337 IF( TTYPE.EQ.2 ) THEN 338 J1 = ED+1 339 J2 = MIN( ED+NB, N ) 340 LN = ED-ST+1 341 LM = J2-J1+1 342* 343 IF( LM.GT.0) THEN 344 CALL DLARFX( 'Right', LM, LN, V( VPOS ), 345 $ TAU( TAUPOS ), A( DPOS+NB, ST ), 346 $ LDA-1, WORK) 347* 348 IF( WANTZ ) THEN 349 VPOS = MOD( SWEEP-1, 2 ) * N + J1 350 TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 351 ELSE 352 VPOS = MOD( SWEEP-1, 2 ) * N + J1 353 TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 354 ENDIF 355* 356 V( VPOS ) = ONE 357 DO 40 I = 1, LM-1 358 V( VPOS+I ) = A( DPOS+NB+I, ST ) 359 A( DPOS+NB+I, ST ) = ZERO 360 40 CONTINUE 361 CALL DLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, 362 $ TAU( TAUPOS ) ) 363* 364 CALL DLARFX( 'Left', LM, LN-1, V( VPOS ), 365 $ ( TAU( TAUPOS ) ), 366 $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) 367 368 ENDIF 369 ENDIF 370 ENDIF 371* 372 RETURN 373* 374* End of DSB2ST_KERNELS 375* 376 END 377