1C 2C This file is part of MUMPS 5.1.2, released 3C on Mon Oct 2 07:37:01 UTC 2017 4C 5C 6C Copyright 1991-2017 CERFACS, CNRS, ENS Lyon, INP Toulouse, Inria, 7C University of Bordeaux. 8C 9C This version of MUMPS is provided to you free of charge. It is 10C released under the CeCILL-C license: 11C http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.html 12C 13 SUBROUTINE ZMUMPS_MV_ELT( N, NELT, ELTPTR, ELTVAR, A_ELT, 14 & X, Y, K50, MTYPE ) 15 IMPLICIT NONE 16C 17C Purpose 18C ======= 19C 20C To perform the matrix vector product 21C A_ELT X = Y if MTYPE = 1 22C A_ELT^T X = Y if MTYPE = 0 23C 24C If K50 is different from 0, then the elements are 25C supposed to be in symmetric packed storage; the 26C lower part is stored by columns. 27C Otherwise, the element is square, stored by columns. 28C 29C Note 30C ==== 31C 32C A_ELT is processed entry by entry and this code is not 33C optimized. In particular, one could gather/scatter 34C X / Y for each element to improve performance. 35C 36C Arguments 37C ========= 38C 39 INTEGER N, NELT, K50, MTYPE 40 INTEGER ELTPTR( NELT + 1 ), ELTVAR( * ) 41 COMPLEX(kind=8) A_ELT( * ), X( N ), Y( N ) 42C 43C Local variables 44C =============== 45C 46 INTEGER IEL, I , J, SIZEI, IELPTR 47 INTEGER(8) :: K8 48 COMPLEX(kind=8) TEMP 49 COMPLEX(kind=8) ZERO 50 PARAMETER( ZERO = (0.0D0,0.0D0) ) 51C 52C 53C Executable statements 54C ===================== 55C 56 Y = ZERO 57 K8 = 1_8 58C -------------------- 59C Process the elements 60C -------------------- 61 DO IEL = 1, NELT 62 SIZEI = ELTPTR( IEL + 1 ) - ELTPTR( IEL ) 63 IELPTR = ELTPTR( IEL ) - 1 64 IF ( K50 .eq. 0 ) THEN 65C ------------------- 66C Unsymmetric element 67C stored by columns 68C ------------------- 69 IF ( MTYPE .eq. 1 ) THEN 70C ----------------- 71C Compute A_ELT x X 72C ----------------- 73 DO J = 1, SIZEI 74 TEMP = X( ELTVAR( IELPTR + J ) ) 75 DO I = 1, SIZEI 76 Y( ELTVAR( IELPTR + I ) ) = 77 & Y( ELTVAR( IELPTR + I ) ) + 78 & A_ELT( K8 ) * TEMP 79 K8 = K8 + 1 80 END DO 81 END DO 82 ELSE 83C ------------------- 84C Compute A_ELT^T x X 85C ------------------- 86 DO J = 1, SIZEI 87 TEMP = Y( ELTVAR( IELPTR + J ) ) 88 DO I = 1, SIZEI 89 TEMP = TEMP + 90 & A_ELT( K8 ) * X( ELTVAR( IELPTR + I ) ) 91 K8 = K8 + 1 92 END DO 93 Y( ELTVAR( IELPTR + J ) ) = TEMP 94 END DO 95 END IF 96 ELSE 97C ----------------- 98C Symmetric element 99C L stored by cols 100C ----------------- 101 DO J = 1, SIZEI 102C Diagonal counted once 103 Y( ELTVAR( IELPTR + J ) ) = 104 & Y( ELTVAR( IELPTR + J ) ) + 105 & A_ELT( K8 ) * X( ELTVAR( IELPTR + J ) ) 106 K8 = K8 + 1 107 DO I = J+1, SIZEI 108C Off diagonal + transpose 109 Y( ELTVAR( IELPTR + I ) ) = 110 & Y( ELTVAR( IELPTR + I ) ) + 111 & A_ELT( K8 ) * X( ELTVAR( IELPTR + J ) ) 112 Y( ELTVAR( IELPTR + J ) ) = 113 & Y( ELTVAR( IELPTR + J ) ) + 114 & A_ELT( K8 ) * X( ELTVAR( IELPTR + I ) ) 115 K8 = K8 + 1 116 END DO 117 END DO 118 END IF 119 END DO 120 RETURN 121 END SUBROUTINE ZMUMPS_MV_ELT 122 SUBROUTINE ZMUMPS_LOC_MV8 123 &( N, NZ_loc8, IRN_loc, JCN_loc, A_loc, X, Y_loc, 124 & LDLT, MTYPE) 125 IMPLICIT NONE 126C 127C Purpose: 128C ======= 129C 130C Perform a distributed matrix vector product. 131C Y_loc <- A X if MTYPE = 1 132C Y_loc <- A^T X if MTYPE = 0 133C 134C Notes: 135C ===== 136C 137C 1) assembly of all Y_loc still has to be done on exit. 138C 2) X should be available on all processors. 139C 140C Arguments: 141C ========= 142C 143 INTEGER N 144 INTEGER(8) :: NZ_loc8 145 INTEGER IRN_loc( NZ_loc8 ), JCN_loc( NZ_loc8 ) 146 COMPLEX(kind=8) A_loc( NZ_loc8 ), X( N ), Y_loc( N ) 147 INTEGER LDLT, MTYPE 148C 149C Locals variables: 150C ================ 151C 152 INTEGER I, J 153 INTEGER(8) :: K8 154 COMPLEX(kind=8) ZERO 155 PARAMETER( ZERO = (0.0D0,0.0D0) ) 156 Y_loc = ZERO 157 IF ( LDLT .eq. 0 ) THEN 158C Unsymmetric 159 IF ( MTYPE .eq. 1 ) THEN 160C No transpose 161 DO K8 = 1_8, NZ_loc8 162 I = IRN_loc(K8) 163 J = JCN_loc(K8) 164 IF ((I .LE. 0) .OR. (I .GT. N) .OR. 165 & (J .LE. 0) .OR. (J .GT. N) 166 & ) CYCLE 167 Y_loc(I) = Y_loc(I) + A_loc(K8) * X(J) 168 ENDDO 169 ELSE 170C Transpose 171 DO K8 = 1_8, NZ_loc8 172 I = IRN_loc(K8) 173 J = JCN_loc(K8) 174 IF ((I .LE. 0) .OR. (I .GT. N) 175 & .OR. (J .LE. 0) .OR. (J .GT. N) 176 & ) CYCLE 177 Y_loc(J) = Y_loc(J) + A_loc(K8) * X(I) 178 ENDDO 179 END IF 180 ELSE 181C Lower (or upper) part of symmetric 182C matrix was provided (LDLT facto) 183 DO K8 = 1_8, NZ_loc8 184 I = IRN_loc(K8) 185 J = JCN_loc(K8) 186 IF ((I .LE. 0) .OR. (I .GT. N) .OR. 187 & (J .LE. 0) .OR. (J .GT. N) 188 & ) CYCLE 189 Y_loc(I) = Y_loc(I) + A_loc(K8) * X(J) 190 IF (J.NE.I) THEN 191 Y_loc(J) = Y_loc(J) + A_loc(K8) * X(I) 192 ENDIF 193 ENDDO 194 END IF 195 RETURN 196 END SUBROUTINE ZMUMPS_LOC_MV8 197 SUBROUTINE ZMUMPS_MV8( N, NZ8, IRN, ICN, ASPK, X, Y, 198 & LDLT, MTYPE, MAXTRANS, PERM, 199 & IFLAG, IERROR ) 200C 201C Purpose: 202C ======= 203C 204C Perform matrix-vector product 205C Y <- A X if MTYPE = 1 206C Y <- A^T X if MTYPE = 0 207C 208C 209C Note: 210C ==== 211C 212C MAXTRANS should be set to 1 if a column permutation 213C was applied on A and we still want the matrix vector 214C product wrt the original matrix. 215C 216C Arguments: 217C ========= 218C 219 INTEGER N, LDLT, MTYPE, MAXTRANS 220 INTEGER(8) :: NZ8 221 INTEGER IRN( NZ8 ), ICN( NZ8 ) 222 INTEGER PERM( N ) 223 COMPLEX(kind=8) ASPK( NZ8 ), X( N ), Y( N ) 224 INTEGER, intent(out) :: IFLAG, IERROR 225C 226C Local variables 227C =============== 228C 229 INTEGER I, J 230 INTEGER(8) :: K8 231 COMPLEX(kind=8), DIMENSION(:), ALLOCATABLE :: PX 232 COMPLEX(kind=8) ZERO 233 INTEGER :: allocok 234 PARAMETER( ZERO = (0.0D0,0.0D0) ) 235 Y = ZERO 236 ALLOCATE(PX(N), stat=allocok) 237 IF (allocok < 0) THEN 238 IFLAG = -13 239 IERROR = N 240 RETURN 241 ENDIF 242C 243C -------------------------------------- 244C Permute X if A has been permuted 245C with some max-trans column permutation 246C -------------------------------------- 247 IF ( MAXTRANS .eq. 1 .and. MTYPE .eq. 1) THEN 248 DO I = 1, N 249 PX(I) = X( PERM( I ) ) 250 END DO 251 ELSE 252 PX = X 253 END IF 254 IF ( LDLT .eq. 0 ) THEN 255C 256C Complete unsymmetric matrix was provided (LU facto) 257 IF (MTYPE .EQ. 1) THEN 258 DO K8 = 1_8, NZ8 259 I = IRN(K8) 260 J = ICN(K8) 261 IF ((I .LE. 0) .OR. (I .GT. N) .OR. (J .LE. 0) .OR. (J .GT. N) 262 & ) CYCLE 263 Y(I) = Y(I) + ASPK(K8) * PX(J) 264 ENDDO 265 ELSE 266 DO K8 = 1_8, NZ8 267 I = IRN(K8) 268 J = ICN(K8) 269 IF ((I .LE. 0) .OR. (I .GT. N) .OR. (J .LE. 0) .OR. (J .GT. N) 270 & ) CYCLE 271 Y(J) = Y(J) + ASPK(K8) * PX(I) 272 ENDDO 273 ENDIF 274C 275 ELSE 276C 277C Lower (or upper) part of symmetric 278C matrix was provided (LDLT facto) 279 DO K8 = 1_8, NZ8 280 I = IRN(K8) 281 J = ICN(K8) 282 IF ((I .LE. 0) .OR. (I .GT. N) .OR. (J .LE. 0) .OR. (J .GT. N) 283 & ) CYCLE 284 Y(I) = Y(I) + ASPK(K8) * PX(J) 285 IF (J.NE.I) THEN 286 Y(J) = Y(J) + ASPK(K8) * PX(I) 287 ENDIF 288 ENDDO 289 END IF 290 IF ( MAXTRANS .EQ. 1 .AND. MTYPE .eq. 0 ) THEN 291 PX = Y 292 DO I = 1, N 293 Y( PERM( I ) ) = PX( I ) 294 END DO 295 END IF 296 DEALLOCATE(PX) 297 RETURN 298 END SUBROUTINE ZMUMPS_MV8 299C 300C 301 SUBROUTINE ZMUMPS_LOC_OMEGA1 302 &( N, NZ_loc8, IRN_loc, JCN_loc, A_loc, X, Y_loc, 303 & LDLT, MTYPE) 304 IMPLICIT NONE 305C 306C Purpose: 307C ======= 308C Compute 309C * If MTYPE = 1 310C Y_loc(i) = Sum | Aij | | Xj | 311C j 312C * If MTYPE = 0 313C Y_loc(j) = Sum | Aij | | Xi | 314C 315C 316C Notes: 317C ===== 318C 319C 1) assembly of all Y_loc still has to be done. 320C 2) X should be available on all processors. 321C 322C Arguments: 323C ========= 324C 325 INTEGER N 326 INTEGER(8) :: NZ_loc8 327 INTEGER IRN_loc( NZ_loc8 ), JCN_loc( NZ_loc8 ) 328 COMPLEX(kind=8) A_loc( NZ_loc8 ), X( N ) 329 DOUBLE PRECISION Y_loc( N ) 330 INTEGER LDLT, MTYPE 331C 332C Local variables: 333C =============== 334C 335 INTEGER I, J 336 INTEGER(8) :: K8 337 DOUBLE PRECISION, PARAMETER :: RZERO=0.0D0 338C 339 Y_loc = RZERO 340 IF ( LDLT .eq. 0 ) THEN 341C Unsymmetric 342 IF ( MTYPE .eq. 1 ) THEN 343C No transpose 344 DO K8 = 1_8, NZ_loc8 345 I = IRN_loc(K8) 346 J = JCN_loc(K8) 347 IF ((I .LE. 0) .OR. (I .GT. N) .OR. 348 & (J .LE. 0) .OR. (J .GT. N) 349 & ) CYCLE 350 Y_loc(I) = Y_loc(I) + abs( A_loc(K8) * X(J) ) 351 ENDDO 352 ELSE 353C Transpose 354 DO K8 = 1_8, NZ_loc8 355 I = IRN_loc(K8) 356 J = JCN_loc(K8) 357 IF ((I .LE. 0) .OR. (I .GT. N) 358 & .OR. (J .LE. 0) .OR. (J .GT. N) 359 & ) CYCLE 360 Y_loc(J) = Y_loc(J) + abs( A_loc(K8) * X(I) ) 361 ENDDO 362 END IF 363 ELSE 364C Lower (or upper) part of symmetric 365C matrix was provided (LDLT facto) 366 DO K8 = 1_8, NZ_loc8 367 I = IRN_loc(K8) 368 J = JCN_loc(K8) 369 IF ((I .LE. 0) .OR. (I .GT. N) .OR. 370 & (J .LE. 0) .OR. (J .GT. N) 371 & ) CYCLE 372 Y_loc(I) = Y_loc(I) + abs( A_loc(K8) * X(J) ) 373 IF (J.NE.I) THEN 374 Y_loc(J) = Y_loc(J) + abs( A_loc(K8) * X(I) ) 375 ENDIF 376 ENDDO 377 END IF 378 RETURN 379 END SUBROUTINE ZMUMPS_LOC_OMEGA1 380