1*> \brief \b DSB2ST_KERNELS 2* 3* @generated from zhb2st_kernels.f, fortran z -> d, Wed Dec 7 08:22:39 2016 4* 5* =========== DOCUMENTATION =========== 6* 7* Online html documentation available at 8* http://www.netlib.org/lapack/explore-html/ 9* 10*> \htmlonly 11*> Download DSB2ST_KERNELS + dependencies 12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsb2st_kernels.f"> 13*> [TGZ]</a> 14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsb2st_kernels.f"> 15*> [ZIP]</a> 16*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsb2st_kernels.f"> 17*> [TXT]</a> 18*> \endhtmlonly 19* 20* Definition: 21* =========== 22* 23* SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE, 24* ST, ED, SWEEP, N, NB, IB, 25* A, LDA, V, TAU, LDVT, WORK) 26* 27* IMPLICIT NONE 28* 29* .. Scalar Arguments .. 30* CHARACTER UPLO 31* LOGICAL WANTZ 32* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT 33* .. 34* .. Array Arguments .. 35* DOUBLE PRECISION A( LDA, * ), V( * ), 36* TAU( * ), WORK( * ) 37* 38*> \par Purpose: 39* ============= 40*> 41*> \verbatim 42*> 43*> DSB2ST_KERNELS is an internal routine used by the DSYTRD_SB2ST 44*> subroutine. 45*> \endverbatim 46* 47* Arguments: 48* ========== 49* 50*> \param[in] UPLO 51*> \verbatim 52*> UPLO is CHARACTER*1 53*> \endverbatim 54*> 55*> \param[in] WANTZ 56*> \verbatim 57*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both 58*> Eigenvalue/Eigenvectors. 59*> \endverbatim 60*> 61*> \param[in] TTYPE 62*> \verbatim 63*> TTYPE is INTEGER 64*> \endverbatim 65*> 66*> \param[in] ST 67*> \verbatim 68*> ST is INTEGER 69*> internal parameter for indices. 70*> \endverbatim 71*> 72*> \param[in] ED 73*> \verbatim 74*> ED is INTEGER 75*> internal parameter for indices. 76*> \endverbatim 77*> 78*> \param[in] SWEEP 79*> \verbatim 80*> SWEEP is INTEGER 81*> internal parameter for indices. 82*> \endverbatim 83*> 84*> \param[in] N 85*> \verbatim 86*> N is INTEGER. The order of the matrix A. 87*> \endverbatim 88*> 89*> \param[in] NB 90*> \verbatim 91*> NB is INTEGER. The size of the band. 92*> \endverbatim 93*> 94*> \param[in] IB 95*> \verbatim 96*> IB is INTEGER. 97*> \endverbatim 98*> 99*> \param[in, out] A 100*> \verbatim 101*> A is DOUBLE PRECISION array. A pointer to the matrix A. 102*> \endverbatim 103*> 104*> \param[in] LDA 105*> \verbatim 106*> LDA is INTEGER. The leading dimension of the matrix A. 107*> \endverbatim 108*> 109*> \param[out] V 110*> \verbatim 111*> V is DOUBLE PRECISION array, dimension 2*n if eigenvalues only are 112*> requested or to be queried for vectors. 113*> \endverbatim 114*> 115*> \param[out] TAU 116*> \verbatim 117*> TAU is DOUBLE PRECISION array, dimension (2*n). 118*> The scalar factors of the Householder reflectors are stored 119*> in this array. 120*> \endverbatim 121*> 122*> \param[in] LDVT 123*> \verbatim 124*> LDVT is INTEGER. 125*> \endverbatim 126*> 127*> \param[out] WORK 128*> \verbatim 129*> WORK is DOUBLE PRECISION array. Workspace of size nb. 130*> \endverbatim 131*> 132*> \par Further Details: 133* ===================== 134*> 135*> \verbatim 136*> 137*> Implemented by Azzam Haidar. 138*> 139*> All details are available on technical report, SC11, SC13 papers. 140*> 141*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. 142*> Parallel reduction to condensed forms for symmetric eigenvalue problems 143*> using aggregated fine-grained and memory-aware kernels. In Proceedings 144*> of 2011 International Conference for High Performance Computing, 145*> Networking, Storage and Analysis (SC '11), New York, NY, USA, 146*> Article 8 , 11 pages. 147*> http://doi.acm.org/10.1145/2063384.2063394 148*> 149*> A. Haidar, J. Kurzak, P. Luszczek, 2013. 150*> An improved parallel singular value algorithm and its implementation 151*> for multicore hardware, In Proceedings of 2013 International Conference 152*> for High Performance Computing, Networking, Storage and Analysis (SC '13). 153*> Denver, Colorado, USA, 2013. 154*> Article 90, 12 pages. 155*> http://doi.acm.org/10.1145/2503210.2503292 156*> 157*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. 158*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure 159*> calculations based on fine-grained memory aware tasks. 160*> International Journal of High Performance Computing Applications. 161*> Volume 28 Issue 2, Pages 196-209, May 2014. 162*> http://hpc.sagepub.com/content/28/2/196 163*> 164*> \endverbatim 165*> 166* ===================================================================== 167 SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE, 168 $ ST, ED, SWEEP, N, NB, IB, 169 $ A, LDA, V, TAU, LDVT, WORK) 170* 171 IMPLICIT NONE 172* 173* -- LAPACK computational routine (version 3.7.1) -- 174* -- LAPACK is a software package provided by Univ. of Tennessee, -- 175* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 176* June 2017 177* 178* .. Scalar Arguments .. 179 CHARACTER UPLO 180 LOGICAL WANTZ 181 INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT 182* .. 183* .. Array Arguments .. 184 DOUBLE PRECISION A( LDA, * ), V( * ), 185 $ TAU( * ), WORK( * ) 186* .. 187* 188* ===================================================================== 189* 190* .. Parameters .. 191 DOUBLE PRECISION ZERO, ONE 192 PARAMETER ( ZERO = 0.0D+0, 193 $ ONE = 1.0D+0 ) 194* .. 195* .. Local Scalars .. 196 LOGICAL UPPER 197 INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, 198 $ DPOS, OFDPOS, AJETER 199 DOUBLE PRECISION CTMP 200* .. 201* .. External Subroutines .. 202 EXTERNAL DLARFG, DLARFX, DLARFY 203* .. 204* .. Intrinsic Functions .. 205 INTRINSIC MOD 206* .. External Functions .. 207 LOGICAL LSAME 208 EXTERNAL LSAME 209* .. 210* .. 211* .. Executable Statements .. 212* 213 AJETER = IB + LDVT 214 UPPER = LSAME( UPLO, 'U' ) 215 216 IF( UPPER ) THEN 217 DPOS = 2 * NB + 1 218 OFDPOS = 2 * NB 219 ELSE 220 DPOS = 1 221 OFDPOS = 2 222 ENDIF 223 224* 225* Upper case 226* 227 IF( UPPER ) THEN 228* 229 IF( WANTZ ) THEN 230 VPOS = MOD( SWEEP-1, 2 ) * N + ST 231 TAUPOS = MOD( SWEEP-1, 2 ) * N + ST 232 ELSE 233 VPOS = MOD( SWEEP-1, 2 ) * N + ST 234 TAUPOS = MOD( SWEEP-1, 2 ) * N + ST 235 ENDIF 236* 237 IF( TTYPE.EQ.1 ) THEN 238 LM = ED - ST + 1 239* 240 V( VPOS ) = ONE 241 DO 10 I = 1, LM-1 242 V( VPOS+I ) = ( A( OFDPOS-I, ST+I ) ) 243 A( OFDPOS-I, ST+I ) = ZERO 244 10 CONTINUE 245 CTMP = ( A( OFDPOS, ST ) ) 246 CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, 247 $ TAU( TAUPOS ) ) 248 A( OFDPOS, ST ) = CTMP 249* 250 LM = ED - ST + 1 251 CALL DLARFY( UPLO, LM, V( VPOS ), 1, 252 $ ( TAU( TAUPOS ) ), 253 $ A( DPOS, ST ), LDA-1, WORK) 254 ENDIF 255* 256 IF( TTYPE.EQ.3 ) THEN 257* 258 LM = ED - ST + 1 259 CALL DLARFY( UPLO, LM, V( VPOS ), 1, 260 $ ( TAU( TAUPOS ) ), 261 $ A( DPOS, ST ), LDA-1, WORK) 262 ENDIF 263* 264 IF( TTYPE.EQ.2 ) THEN 265 J1 = ED+1 266 J2 = MIN( ED+NB, N ) 267 LN = ED-ST+1 268 LM = J2-J1+1 269 IF( LM.GT.0) THEN 270 CALL DLARFX( 'Left', LN, LM, V( VPOS ), 271 $ ( TAU( TAUPOS ) ), 272 $ A( DPOS-NB, J1 ), LDA-1, WORK) 273* 274 IF( WANTZ ) THEN 275 VPOS = MOD( SWEEP-1, 2 ) * N + J1 276 TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 277 ELSE 278 VPOS = MOD( SWEEP-1, 2 ) * N + J1 279 TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 280 ENDIF 281* 282 V( VPOS ) = ONE 283 DO 30 I = 1, LM-1 284 V( VPOS+I ) = 285 $ ( A( DPOS-NB-I, J1+I ) ) 286 A( DPOS-NB-I, J1+I ) = ZERO 287 30 CONTINUE 288 CTMP = ( A( DPOS-NB, J1 ) ) 289 CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) 290 A( DPOS-NB, J1 ) = CTMP 291* 292 CALL DLARFX( 'Right', LN-1, LM, V( VPOS ), 293 $ TAU( TAUPOS ), 294 $ A( DPOS-NB+1, J1 ), LDA-1, WORK) 295 ENDIF 296 ENDIF 297* 298* Lower case 299* 300 ELSE 301* 302 IF( WANTZ ) THEN 303 VPOS = MOD( SWEEP-1, 2 ) * N + ST 304 TAUPOS = MOD( SWEEP-1, 2 ) * N + ST 305 ELSE 306 VPOS = MOD( SWEEP-1, 2 ) * N + ST 307 TAUPOS = MOD( SWEEP-1, 2 ) * N + ST 308 ENDIF 309* 310 IF( TTYPE.EQ.1 ) THEN 311 LM = ED - ST + 1 312* 313 V( VPOS ) = ONE 314 DO 20 I = 1, LM-1 315 V( VPOS+I ) = A( OFDPOS+I, ST-1 ) 316 A( OFDPOS+I, ST-1 ) = ZERO 317 20 CONTINUE 318 CALL DLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, 319 $ TAU( TAUPOS ) ) 320* 321 LM = ED - ST + 1 322* 323 CALL DLARFY( UPLO, LM, V( VPOS ), 1, 324 $ ( TAU( TAUPOS ) ), 325 $ A( DPOS, ST ), LDA-1, WORK) 326 327 ENDIF 328* 329 IF( TTYPE.EQ.3 ) THEN 330 LM = ED - ST + 1 331* 332 CALL DLARFY( UPLO, LM, V( VPOS ), 1, 333 $ ( TAU( TAUPOS ) ), 334 $ A( DPOS, ST ), LDA-1, WORK) 335 336 ENDIF 337* 338 IF( TTYPE.EQ.2 ) THEN 339 J1 = ED+1 340 J2 = MIN( ED+NB, N ) 341 LN = ED-ST+1 342 LM = J2-J1+1 343* 344 IF( LM.GT.0) THEN 345 CALL DLARFX( 'Right', LM, LN, V( VPOS ), 346 $ TAU( TAUPOS ), A( DPOS+NB, ST ), 347 $ LDA-1, WORK) 348* 349 IF( WANTZ ) THEN 350 VPOS = MOD( SWEEP-1, 2 ) * N + J1 351 TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 352 ELSE 353 VPOS = MOD( SWEEP-1, 2 ) * N + J1 354 TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 355 ENDIF 356* 357 V( VPOS ) = ONE 358 DO 40 I = 1, LM-1 359 V( VPOS+I ) = A( DPOS+NB+I, ST ) 360 A( DPOS+NB+I, ST ) = ZERO 361 40 CONTINUE 362 CALL DLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, 363 $ TAU( TAUPOS ) ) 364* 365 CALL DLARFX( 'Left', LM, LN-1, V( VPOS ), 366 $ ( TAU( TAUPOS ) ), 367 $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) 368 369 ENDIF 370 ENDIF 371 ENDIF 372* 373 RETURN 374* 375* END OF DSB2ST_KERNELS 376* 377 END 378