1/* ffpack.inl 2 * Copyright (C) 2014 FFLAS-FFACK group 3 * 4 * Written by Clement Pernet <Clement.Pernet@imag.fr> 5 * Brice Boyer (briceboyer) <boyer.brice@gmail.com> 6 * 7 * 8 * ========LICENCE======== 9 * This file is part of the library FFLAS-FFPACK. 10 * 11 * FFLAS-FFPACK is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public 13 * License as published by the Free Software Foundation; either 14 * version 2.1 of the License, or (at your option) any later version. 15 * 16 * This library is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Lesser General Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser General Public 22 * License along with this library; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24 * ========LICENCE======== 25 *. 26 */ 27 28#ifndef __FFLASFFPACK_ffpack_INL 29#define __FFLASFFPACK_ffpack_INL 30 31namespace FFPACK { 32 33 template <class Field> 34 size_t 35 Rank (const Field& F, const size_t M, const size_t N, 36 typename Field::Element_ptr A, const size_t lda) 37 { 38 size_t R = Rank (F, M, N, A, lda, FFLAS::ParSeqHelper::Sequential()); 39 return R; 40 } 41 42 template <class Field> 43 size_t 44 pRank (const Field& F, const size_t M, const size_t N, 45 typename Field::Element_ptr A, const size_t lda, size_t numthreads) 46 { 47 size_t R; 48 PAR_BLOCK{ 49 size_t nt = numthreads ? numthreads : NUM_THREADS; 50 FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive,FFLAS::StrategyParameter::Threads> parH(nt); 51 R = Rank (F, M, N, A, lda, parH); 52 } 53 return R; 54 } 55 56 template <class Field, class PSHelper> 57 size_t 58 Rank( const Field& F, const size_t M, const size_t N, 59 typename Field::Element_ptr A, const size_t lda, const PSHelper& psH) 60 { 61 if (M == 0 and N == 0) 62 return 0 ; 63 64 size_t *P = FFLAS::fflas_new<size_t>(M); 65 size_t *Q = FFLAS::fflas_new<size_t>(N); 66 size_t R = PLUQ (F, FFLAS::FflasNonUnit, M, N, A, lda, P, Q, psH); 67 FFLAS::fflas_delete( Q); 68 FFLAS::fflas_delete( P); 69 return R; 70 } 71 72 73 template <class Field> 74 bool 75 IsSingular (const Field& F, const size_t M, const size_t N, 76 typename Field::Element_ptr A, const size_t lda) 77 { 78 if ( (M==0) and (N==0) ) return false; 79 if ( (M==0) or (N==0) ) return true; 80 if ( M != N ) return true ; 81 82 83 size_t *P = FFLAS::fflas_new<size_t>(N); 84 size_t *Q = FFLAS::fflas_new<size_t>(M); 85 bool singular = !LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, M, N, A, lda, P, Q, FfpackSingular); 86 87 FFLAS::fflas_delete( P); 88 FFLAS::fflas_delete( Q); 89 return singular; 90 } 91 92 template <class Field> 93 inline typename Field::Element& 94 Det (const Field& F, typename Field::Element& det, const size_t N, 95 typename Field::Element_ptr A, const size_t lda, size_t * P, size_t * Q) 96 { 97 return FFPACK::Det (F, det, N, A, lda, FFLAS::ParSeqHelper::Sequential(), P, Q); 98 } 99 100 template <class Field> 101 inline typename Field::Element& 102 pDet (const Field& F, typename Field::Element& det, const size_t N, 103 typename Field::Element_ptr A, const size_t lda, size_t numthreads, size_t * P, size_t * Q) 104 { 105 //return FFPACK::Det (F, det, N, A, lda, FFLAS::ParSeqHelper::Sequential(), P, Q); 106 PAR_BLOCK{ 107 size_t nt = numthreads ? numthreads : NUM_THREADS; 108 FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive,FFLAS::StrategyParameter::Threads> parH(nt); 109 FFPACK::Det (F, det, N, A, lda, parH, P, Q); 110 } 111 return det; 112 } 113 114 template <class Field, class PSHelper> 115 typename Field::Element& 116 Det (const Field& F, typename Field::Element& det, const size_t N, 117 typename Field::Element_ptr A, const size_t lda, const PSHelper& psH, 118 size_t* P, size_t * Q) 119 { 120 if (N==0) 121 return F.assign(det,F.one) ; 122 bool allocPQ = false; 123 if (P==NULL || Q == NULL) { 124 allocPQ = true; 125 P = FFLAS::fflas_new<size_t>(N); 126 Q = FFLAS::fflas_new<size_t>(N); 127 } 128 size_t R = PLUQ (F,FFLAS::FflasNonUnit,N,N,A,lda,P,Q,psH); 129 130 if (R<N){ 131 if (allocPQ) FFLAS::fflas_delete(P,Q); 132 return F.assign(det,F.zero); 133 } 134 F.assign(det,F.one); 135 typename Field::Element_ptr Ai=A; 136 for (; Ai < A+ N*(lda+1); Ai+=lda+1 ) 137 F.mulin( det, *Ai ); 138 int count=0; 139 for (size_t i=0;i<N;++i){ 140 if (P[i] != i) ++count; 141 if (Q[i] != i) ++count; 142 } 143 144 if (allocPQ) FFLAS::fflas_delete(P,Q); 145 146 if ((count&1) == 1) 147 return F.negin(det); 148 else 149 return det; 150 } 151 152 template <class Field> 153 inline typename Field::Element_ptr 154 Solve (const Field& F, const size_t M, 155 typename Field::Element_ptr A, const size_t lda, 156 typename Field::Element_ptr x, const int incx, 157 typename Field::ConstElement_ptr b, const int incb) { 158 FFLAS::ParSeqHelper::Sequential seqH; 159 return FFPACK::Solve(F, M, A, lda, x, incx, b, incb, seqH); 160 } 161 162 template <class Field, class PSHelper> 163 typename Field::Element_ptr 164 Solve( const Field& F, const size_t M, 165 typename Field::Element_ptr A, const size_t lda, 166 typename Field::Element_ptr x, const int incx, 167 typename Field::ConstElement_ptr b, const int incb, PSHelper& psH) 168 { 169 170 size_t *P = FFLAS::fflas_new<size_t>(M); 171 size_t *rowP = FFLAS::fflas_new<size_t>(M); 172 173 if (PLUQ( F, FFLAS::FflasNonUnit, M, M, A, lda, rowP, P, psH) < M){ 174 175 std::cerr<<"SINGULAR MATRIX"<<std::endl; 176 FFLAS::fflas_delete (P); 177 FFLAS::fflas_delete (rowP); 178 return x; 179 } 180 else{ 181 FFLAS::fassign( F, M, b, incb, x, incx ); 182 183 applyP (F, FFLAS::FflasLeft, FFLAS::FflasNoTrans, 1, 0,(int) M, x, incx, rowP ); 184 ftrsv (F, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M, A, lda , x, incx); 185 ftrsv (F, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, M, A, lda , x, incx); 186 applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, 1, 0,(int) M, x, incx, P ); 187 FFLAS::fflas_delete( rowP); 188 FFLAS::fflas_delete( P); 189 190 return x; 191 192 } 193 } 194 195 template <class Field> 196 inline typename Field::Element_ptr 197 pSolve (const Field& F, const size_t M, 198 typename Field::Element_ptr A, const size_t lda, 199 typename Field::Element_ptr x, const int incx, 200 typename Field::ConstElement_ptr b, const int incb, size_t numthreads) { 201 PAR_BLOCK{ 202 size_t nt = numthreads ? numthreads : NUM_THREADS; 203 FFLAS::ParSeqHelper::Parallel<FFLAS::CuttingStrategy::Recursive,FFLAS::StrategyParameter::Threads> parH(nt); 204 FFPACK::Solve(F, M, A, lda, x, incx, b, incb, parH); 205 } 206 return x; 207 } 208 209 template <class Field> 210 void RandomNullSpaceVector (const Field& F, const FFLAS::FFLAS_SIDE Side, 211 const size_t M, const size_t N, 212 typename Field::Element_ptr A, const size_t lda, 213 typename Field::Element_ptr X, const size_t incX) 214 { 215 // Right kernel vector: X s.t. AX == 0 216 if (Side == FFLAS::FflasRight) { 217 size_t* P = FFLAS::fflas_new<size_t>(M); 218 size_t* Q = FFLAS::fflas_new<size_t>(N); 219 220 size_t R = PLUQ (F, FFLAS::FflasNonUnit, M, N, A, lda, P, Q); 221 FFLAS::fflas_delete(P); 222 223 // Nullspace is {0} 224 if (N == R) { 225 FFLAS::fzero(F, N, X, incX); 226 FFLAS::fflas_delete(Q); 227 return; 228 } 229 230 // We create t (into X) not null such that U * t == 0, i.e. U1 * t1 == -U2 * t2 231 232 // Random after rank is passed (t2) 233 typename Field::RandIter g(F); 234 for (size_t i = R; i < N; ++i) 235 g.random(*(X + i * incX)); 236 237 // Nullspace is total, any random vector would do 238 if (R == 0) { 239 FFLAS::fflas_delete(Q); 240 return; 241 } 242 243 // Compute -U2 * t2 (into t1 as temporary) 244 FFLAS::fgemv(F, FFLAS::FflasNoTrans, R, N - R, 245 F.mOne, A + R, lda, X + R * incX, incX, 0u, X, incX); 246 247 // Now get t1 such that U1 * t1 == -U2 * t2 248 FFLAS::ftrsv(F, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, R, 249 A, lda, X, (int)incX); 250 251 applyP(F, FFLAS::FflasLeft, FFLAS::FflasTrans, 1u, 0u, (int) N, X, 1u, Q); 252 253 FFLAS::fflas_delete(Q); 254 } 255 256 // Left kernel vector 257 else { 258 size_t* P = FFLAS::fflas_new<size_t>(M); 259 size_t* Q = FFLAS::fflas_new<size_t>(N); 260 261 size_t R = PLUQ (F, FFLAS::FflasNonUnit, M, N, A, lda, P, Q); 262 FFLAS::fflas_delete(Q); 263 264 // Nullspace is {0} 265 if (M == R) { 266 FFLAS::fzero(F, M, X, incX); 267 FFLAS::fflas_delete(P); 268 return; 269 } 270 271 // We create t (into X) not null such that t * L == 0, i.e. t1 * L1 == -t2 * L2 272 273 // Random after rank is passed (t2) 274 typename Field::RandIter g(F); 275 for (size_t i = R; i < M; ++i) 276 g.random(*(X + i * incX)); 277 278 // Nullspace is total, any random vector would do 279 if (R == 0) { 280 FFLAS::fflas_delete(P); 281 return; 282 } 283 284 // Compute -t2 * L2 (into t1 as temporary) 285 FFLAS::fgemv(F, FFLAS::FflasTrans, M - R, R, 286 F.mOne, A + R * lda, lda, X + R * incX, incX, 0u, X, incX); 287 288 // Now get t1 such that t1 * L1 == -t2 * L2 289 FFLAS::ftrsv(F, FFLAS::FflasLower, FFLAS::FflasTrans, FFLAS::FflasUnit, R, 290 A, lda, X, (int)incX); 291 292 applyP(F, FFLAS::FflasRight, FFLAS::FflasNoTrans, 1u, 0u, (int) M, X, 1u, P); 293 294 FFLAS::fflas_delete(P); 295 } 296 } 297 298 template <class Field> 299 size_t NullSpaceBasis (const Field& F, const FFLAS::FFLAS_SIDE Side, 300 const size_t M, const size_t N, 301 typename Field::Element_ptr A, const size_t lda, 302 typename Field::Element_ptr& NS, size_t& ldn, 303 size_t& NSdim) 304 { 305 size_t* P = FFLAS::fflas_new<size_t>(M); 306 size_t* Q = FFLAS::fflas_new<size_t>(N); 307 308 size_t R = PLUQ (F, FFLAS::FflasNonUnit, M, N, A, lda, P, Q); 309 310 if (Side == FFLAS::FflasRight) { // Right NullSpace 311 FFLAS::fflas_delete(P); 312 313 ldn = N-R; 314 NSdim = ldn; 315 316 if (NSdim == 0) { 317 FFLAS::fflas_delete (Q); 318 NS = NULL ; 319 return NSdim ; 320 } 321 322 NS = FFLAS::fflas_new (F, N, ldn); 323 324 if (R == 0) { 325 FFLAS::fflas_delete(Q); 326 FFLAS::fidentity(F,N,ldn,NS,ldn); 327 return NSdim; 328 } 329 330 FFLAS::fassign (F, R, ldn, A + R, lda, NS , ldn); 331 332 ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, R, ldn, 333 F.mOne, A, lda, NS, ldn); 334 335 FFLAS::fidentity(F,NSdim,NSdim,NS+R*ldn,ldn); 336 337 applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, NSdim, 0,(int) N, NS, ldn, Q); 338 339 FFLAS::fflas_delete(Q); 340 341 return NSdim; 342 } 343 else { // Left NullSpace 344 FFLAS::fflas_delete(Q); 345 346 ldn = M; 347 NSdim = M-R; 348 349 if (NSdim == 0) { 350 FFLAS::fflas_delete (P); 351 NS = NULL; 352 return NSdim; 353 } 354 355 NS = FFLAS::fflas_new (F, NSdim, ldn); 356 357 if (R == 0) { 358 FFLAS::fflas_delete( P); 359 FFLAS::fidentity(F,NSdim,ldn,NS,ldn); 360 return NSdim; 361 } 362 363 FFLAS::fassign (F, NSdim, R, A + R *lda, lda, NS, ldn); 364 ftrsm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, NSdim, R, F.mOne, A, lda, NS, ldn); 365 366 FFLAS::fidentity(F,NSdim,NSdim,NS+R,ldn); 367 applyP (F, FFLAS::FflasRight, FFLAS::FflasNoTrans, NSdim, 0,(int) M, NS, ldn, P); 368 369 FFLAS::fflas_delete(P); 370 return NSdim; 371 } 372 } 373 374 template<class Field> 375 void 376 solveLB( const Field& F, const FFLAS::FFLAS_SIDE Side, 377 const size_t M, const size_t N, const size_t R, 378 typename Field::Element_ptr L, const size_t ldl, 379 const size_t * Q, 380 typename Field::Element_ptr B, const size_t ldb ) 381 { 382 383 size_t LM = (Side == FFLAS::FflasRight)?N:M; 384 int i = (int)R ; 385 for (; i--; ){ // much faster for 386 if ( Q[i] > (size_t) i){ 387 FFLAS::fassign( F, LM-Q[i]-1, L+(Q[i]+1)*ldl+i, ldl , L+Q[i]*(ldl+1)+ldl,ldl); 388 for ( size_t j=Q[i]*ldl; j<LM*ldl; j+=ldl) 389 F.assign( *(L+i+j), F.zero ); 390 } 391 } 392 ftrsm( F, Side, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M, N, F.one, L, ldl , B, ldb); 393 // Undo the permutation of L 394 for (size_t ii=0; ii<R; ++ii){ 395 if ( Q[ii] > (size_t) ii){ 396 FFLAS::fassign( F, LM-Q[ii]-1, L+Q[ii]*(ldl+1)+ldl,ldl, L+(Q[ii]+1)*ldl+ii, ldl ); 397 for ( size_t j=Q[ii]*ldl; j<LM*ldl; j+=ldl) 398 F.assign( *(L+Q[ii]+j), F.zero ); 399 } 400 } 401 } 402 403 template<class Field> 404 void 405 solveLB2( const Field& F, const FFLAS::FFLAS_SIDE Side, 406 const size_t M, const size_t N, const size_t R, 407 typename Field::Element_ptr L, const size_t ldl, 408 const size_t * Q, 409 typename Field::Element_ptr B, const size_t ldb ) 410 { 411 typename Field::Element_ptr Lcurr, Rcurr, Bcurr; 412 size_t ib, Ldim; 413 int k; 414 if ( Side == FFLAS::FflasLeft ){ 415 size_t j = 0; 416 while ( j<R ) { 417 ib = Q[j]; 418 k = (int)ib ; 419 while ((j<R) && ( (int) Q[j] == k) ) {k++;j++;} 420 Ldim = (size_t)k-ib; 421 Lcurr = L + j-Ldim + ib*ldl; 422 Bcurr = B + ib*ldb; 423 Rcurr = Lcurr + Ldim*ldl; 424 425 ftrsm( F, Side, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, Ldim, N, F.one, 426 Lcurr, ldl , Bcurr, ldb ); 427 428 fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M-(size_t)k, N, Ldim, F.mOne, 429 Rcurr , ldl, Bcurr, ldb, F.one, Bcurr+Ldim*ldb, ldb); 430 } 431 } 432 else{ // Side == FFLAS::FflasRight 433 int j=(int)R-1; 434 while ( j >= 0 ) { 435 ib = Q[j]; 436 k = (int) ib; 437 while ( (j >= 0) && ( (int)Q[j] == k) ) {--k;--j;} 438 Ldim = ib-(size_t)k; 439 Lcurr = L + j+1 + (k+1)*(int)ldl; 440 Bcurr = B + ib+1; 441 Rcurr = Lcurr + Ldim*ldl; 442 443 fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, M, Ldim, N-ib-1, F.mOne, 444 Bcurr, ldb, Rcurr, ldl, F.one, Bcurr-Ldim, ldb); 445 446 ftrsm (F, Side, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, M, Ldim, F.one, 447 Lcurr, ldl , Bcurr-Ldim, ldb ); 448 } 449 } 450 } 451 452} // FFPACK 453 454#endif // __FFLASFFPACK_ffpack_INL 455/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 456// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s 457