1/* fflas-ffpack/ffpack/ffpack_frobenius.inl 2 * Copyright (C) 2007 Clement Pernet 3 * 4 * Written by Clement Pernet <cpernet@uwaterloo.ca> 5 * 6 * 7 * ========LICENCE======== 8 * This file is part of the library FFLAS-FFPACK. 9 * 10 * FFLAS-FFPACK is free software: you can redistribute it and/or modify 11 * it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with this library; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 * ========LICENCE======== 24 *. 25 */ 26 27#include <givaro/givranditer.h> 28 29//--------------------------------------------------------------------- 30// CharpolyArithProg: Las Vegas algorithm to compute the Charpoly 31// over a large field (Z/pZ, s.t. p > 2n^2) 32//--------------------------------------------------------------------- 33// 34// 35namespace FFPACK { namespace Protected { 36 template <class Field> 37 inline void CompressRows (Field& F, const size_t M, 38 typename Field::Element_ptr A, const size_t lda, 39 typename Field::Element_ptr tmp, const size_t ldtmp, 40 const size_t * d, const size_t nb_blocs); 41 42 template <class Field> 43 inline void CompressRowsQK (Field& F, const size_t M, 44 typename Field::Element_ptr A, const size_t lda, 45 typename Field::Element_ptr tmp, const size_t ldtmp, 46 const size_t * d,const size_t deg, const size_t nb_blocs); 47 48 template <class Field> 49 inline void DeCompressRows (Field& F, const size_t M, const size_t N, 50 typename Field::Element_ptr A, const size_t lda, 51 typename Field::Element_ptr tmp, const size_t ldtmp, 52 const size_t * d, const size_t nb_blocs); 53 54 template <class Field> 55 inline void DeCompressRowsQK (Field& F, const size_t M, const size_t N, 56 typename Field::Element_ptr A, const size_t lda, 57 typename Field::Element_ptr tmp, const size_t ldtmp, 58 const size_t * d, const size_t deg, const size_t nb_blocs); 59 60 template <class Field> 61 inline void CompressRowsQA (Field& F, const size_t M, 62 typename Field::Element_ptr A, const size_t lda, 63 typename Field::Element_ptr tmp, const size_t ldtmp, 64 const size_t * d, const size_t nb_blocs); 65 66 template <class Field> 67 inline void DeCompressRowsQA (Field& F, const size_t M, const size_t N, 68 typename Field::Element_ptr A, const size_t lda, 69 typename Field::Element_ptr tmp, const size_t ldtmp, 70 const size_t * d, const size_t nb_blocs); 71 72 73 template <class PolRing> 74 inline void 75 RandomKrylovPrecond (const PolRing& PR, std::list<typename PolRing::Element>& completedFactors, const size_t N, 76 typename PolRing::Domain_t::Element_ptr A, const size_t lda, 77 size_t & Nb, typename PolRing::Domain_t::Element_ptr& B, size_t& ldb, 78 typename PolRing::Domain_t::RandIter& g, const size_t degree) 79 { 80 typedef typename PolRing::Domain_t Field; 81 typedef typename PolRing::Element Polynomial; 82 const Field& F = PR.getdomain(); 83 84 FFLASFFPACK_check(degree); 85 size_t noc = (N-1)/degree + 1; 86 // Building the workplace matrix 87 typename Field::Element_ptr K = FFLAS::fflas_new (F, degree*noc, N); 88 typename Field::Element_ptr K2 = FFLAS::fflas_new (F, degree*noc, N); 89 size_t ldk = N; 90// size_t bk_stride = noc*ldk; 91 92 size_t *dA = FFLAS::fflas_new<size_t>(N); //PA 93 size_t *dK = FFLAS::fflas_new<size_t>(noc*degree); 94 for (size_t i=0; i<noc; ++i) 95 dK[i]=0; 96 97#ifdef __FFLASFFPACK_ARITHPROG_PROFILING 98 Givaro::Timer timkrylov, timelim, timsimilar, timrest; 99 timkrylov.start(); 100#endif 101 // Picking a random noc x N block vector U^T 102 Givaro::GeneralRingNonZeroRandIter<Field> nzg (g); 103 RandomMatrix (F, noc, N, K, ldk*degree, g); 104 for (size_t i = 0; i < noc; ++i) 105 nzg.random (*(K + i*(degree*ldk+1))); 106 107 // Computing the bloc Krylov matrix [u1 Au1 .. A^(c-1) u1 u2 Au2 ...]^T 108 for (size_t i = 1; i<degree; ++i){ 109 fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasTrans, noc, N, N,F.one, 110 K+(i-1)*ldk, degree*ldk, A, lda, F.zero, K+i*ldk, degree*ldk); 111 } 112 // K2 <- K (re-ordering) 113 //! @todo swap to save space ?? 114 //! @todo don't assing K2 c*noc x N but only mas (c,noc) x N and store each one after the other 115 // size_t w_idx = 0; 116 // for (size_t i=0; i<noc; ++i) 117 // for (size_t j=0; j<degree; ++j, w_idx++) 118 // FFLAS::fassign(F, N, (K+(i+j*noc)*ldk), 1, (K2+(w_idx)*ldk), 1); 119 120 // Copying K2 <- K 121// for (size_t i=0; i<noc*degree; ++i) 122 FFLAS::fassign (F, noc*degree, N, K, ldk, K2, ldk); 123 124#ifdef __FFLASFFPACK_ARITHPROG_PROFILING 125 timkrylov.stop(); 126 std::cerr<<"Random Krylov Preconditionning:"<<std::endl 127 <<" Krylov basis computation : "<<timkrylov.usertime()<<std::endl; 128 timelim.start(); 129#endif 130 size_t * Pk = FFLAS::fflas_new<size_t>(N); 131 size_t * Qk = FFLAS::fflas_new<size_t>(N); 132 for (size_t i=0; i<N; ++i) 133 Qk[i] = 0; 134 for (size_t i=0; i<N; ++i) 135 Pk[i] = 0; 136 137 // @todo: replace by PLUQ 138 size_t R = LUdivine(F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, N, N, K, ldk, Pk, Qk); 139 size_t row_idx = 0; 140 size_t ii=0; 141 size_t dold = degree; 142 size_t nb_full_blocks = 0; 143 size_t Mk = 0; 144 // Determining the degree sequence dK 145 for (size_t k = 0; k<noc; ++k){ 146 size_t d = 0; 147 while ( (d<degree) && (row_idx<R) && (Qk[row_idx] == ii)) {ii++; row_idx++; d++;} 148 if (d > dold){ 149 // std::cerr << "FAIL in preconditionning phase:" 150 // << " degree sequence is not monotonically not increasing" 151 // << std::endl; 152 FFLAS::fflas_delete (K, Pk, Qk, dA, dK); 153 throw CharpolyFailed(); 154 } 155 dK[k] = dold = d; 156 Mk++; 157 if (d == degree) 158 nb_full_blocks++; 159 if (row_idx < N) 160 ii = Qk[row_idx]; 161 } 162#ifdef __FFLASFFPACK_ARITHPROG_PROFILING 163 timelim.stop(); 164 std::cerr <<" LU (Krylov) : "<<timelim.usertime()<<std::endl; 165 timsimilar.start(); 166#endif 167 168 // Selection of the last iterate of each block 169 170 typename Field::Element_ptr K3 = FFLAS::fflas_new (F, Mk, N); 171 typename Field::Element_ptr K4 = FFLAS::fflas_new (F, Mk, N); 172 size_t bk_idx = 0; 173 for (size_t i = 0; i < Mk; ++i){ 174 FFLAS::fassign (F, N, (K2 + (bk_idx + dK[i]-1)*ldk), 1, (K3+i*ldk), 1); 175 bk_idx += degree; 176 } 177 FFLAS::fflas_delete (K2); 178 179 // K <- K A^T 180 fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasTrans, Mk, N, N,F.one, K3, ldk, A, lda, F.zero, K4, ldk); 181 182 // K <- K P^T 183 applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans, Mk, 0,(int) R, K4, ldk, Pk); 184 185 // K <- K U^-1 186 ftrsm (F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, Mk, R,F.one, K, ldk, K4, ldk); 187 188 // L <- Q^T L 189 applyP(F, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N, 0,(int) R, K, ldk, Qk); 190 191 // K <- K L^-1 192 ftrsm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, Mk, R,F.one, K, ldk, K4, ldk); 193 194 //undoing permutation on L 195 applyP(F, FFLAS::FflasLeft, FFLAS::FflasTrans, N, 0,(int) R, K, ldk, Qk); 196 197 // Recovery of the completed invariant factors 198 size_t Ma = Mk; 199 size_t Ncurr = R; 200 size_t offset = Ncurr-1; 201 for (size_t i=Mk-1; i>=nb_full_blocks+1; --i){ 202 if (dK[i] >= 1){ 203 for (size_t j = offset+1; j<R; ++j) 204 if (!F.isZero(*(K4 + i*ldk + j))){ 205 //std::cerr<<"FAIL C != 0 in preconditionning"<<std::endl; 206 FFLAS::fflas_delete (K,K3,K4,Pk,Qk,dA,dK); 207 throw CharpolyFailed(); 208 } 209 Polynomial P (dK [i]+1); 210 F.assign(P[dK[i]],F.one); 211 for (size_t j=0; j < dK [i]; ++j) 212 F.neg (P [dK [i]-j-1], *(K4 + i*ldk + (offset-j))); 213 completedFactors.push_front(P); 214 offset -= dK [i]; 215 Ncurr -= dK [i]; 216 Ma--; 217 } 218 } 219 Mk = Ma; 220 221#ifdef __FFLASFFPACK_ARITHPROG_PROFILING 222 timsimilar.stop(); 223 std::cerr <<" Similarity) : "<<timsimilar.usertime()<<std::endl; 224 timrest.start(); 225#endif 226 if (R<N){ 227 // The Krylov basis did not span the whole space 228 // Recurse on the complementary subspace 229 if (! FFLAS::fiszero (F, nb_full_blocks+1, N-R, K4+R, ldk)){ 230 231 // for (size_t i=0; i<nb_full_blocks + 1; ++i) 232 // for (size_t j=R; j<N; ++j){ 233 // if (!F.isZero( *(K4+i*ldk+j) )){ 234 FFLAS::fflas_delete (K3, K4, K, Pk, Qk, dA, dK); 235 throw CharpolyFailed(); 236 } 237 238 size_t Nrest = N-R; 239 typename Field::Element_ptr K21 = K + R*ldk; 240 typename Field::Element_ptr K22 = K21 + R; 241 242 // Compute the n-k last rows of A' = P A^T P^T in K2_ 243 // A = A . P^t 244 applyP( F, FFLAS::FflasRight, FFLAS::FflasTrans, 245 N, 0,(int) R, A, lda, Pk); 246 247 // Copy K2_ = (A'_2)^t 248 for (size_t i=0; i<Nrest; i++) 249 FFLAS::fassign (F, N, A+R+i, lda, K21+i*ldk, 1); 250 251 // A = A . P : Undo the permutation on A 252 applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans, N, 0,(int) R, A, lda, Pk); 253 254 // K2_ = K2_ . P^t (= ( P A^t P^t )2_ ) 255 applyP( F, FFLAS::FflasRight, FFLAS::FflasTrans, Nrest, 0,(int) R, K21, ldk, Pk); 256 257 // K21 = K21 . S1^-1 258 ftrsm (F, FFLAS::FflasRight,FFLAS::FflasUpper,FFLAS::FflasNoTrans,FFLAS::FflasNonUnit, Nrest, R, F.one, K, ldk, K21, ldk); 259 260 typename Field::Element_ptr Arec = FFLAS::fflas_new (F, Nrest, Nrest); 261 size_t ldarec = Nrest; 262 263 // Creation of the matrix A2 for recursive call 264 FFLAS::fassign (F, Nrest, Nrest, K22, ldk, Arec, ldarec); 265 266 fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Nrest, Nrest, R,F.mOne, K21, ldk, K+R, ldk,F.one, Arec, ldarec); 267 268 std::list<Polynomial> polyList; 269 polyList.clear(); 270 271 // Recursive call on the complementary subspace 272 CharPoly (PR, polyList, Nrest, Arec, ldarec, g, FfpackArithProgKrylovPrecond); 273 FFLAS::fflas_delete (Arec); 274 completedFactors.merge(polyList); 275 } 276#ifdef __FFLASFFPACK_ARITHPROG_PROFILING 277 timrest.stop(); 278 std::cerr<<" left-over : "<<timrest.usertime()<<std::endl; 279#endif 280 281 FFLAS::fflas_delete( Pk); 282 FFLAS::fflas_delete( Qk); 283 for (size_t i=0; i<Mk; ++i) 284 dA[i] = dK[i]; 285 bk_idx = 0; 286 287 ldb = Ma; 288 Nb = Ncurr; 289 B = FFLAS::fflas_new (F, Ncurr, ldb); 290 291 for (size_t j=0; j<Ma; ++j) 292 FFLAS::fassign(F, Ncurr, K4+j*ldk, 1, B+j, ldb); 293 FFLAS::fflas_delete (K4); 294 295 } 296 297 template <class PolRing> 298 inline std::list<typename PolRing::Element>& 299 ArithProg (const PolRing& PR, std::list<typename PolRing::Element>& frobeniusForm, 300 const size_t N, typename PolRing::Domain_t::Element_ptr A, const size_t lda, 301 const size_t degree) 302 { 303 304 typedef typename PolRing::Domain_t Field; 305 typedef typename PolRing::Element Polynomial; 306 const Field& F = PR.getdomain(); 307 308#ifdef __FFLASFFPACK_ARITHPROG_PROFILING 309 Givaro::Timer tim; 310 tim.start(); 311#endif 312 size_t nb_full_blocks, Mk, Ma; 313 Mk = Ma = nb_full_blocks = (N-1)/degree +1; 314 typename Field::Element_ptr K, K3, Ac; 315 size_t ldk=Ma; 316 size_t ldac = Ma; 317 K = FFLAS::fflas_new(F, N, ldk); 318 Ac = FFLAS::fflas_new(F, N, ldac); 319 320 FFLAS::fassign(F, N, Ma, A, lda, Ac, ldac); 321 FFLAS::fassign(F, N, Ma, Ac, ldac, K, ldk); 322 323 size_t Ncurr=N; 324 325 size_t * dA = FFLAS::fflas_new<size_t>(Ma); 326 size_t * dK = FFLAS::fflas_new<size_t>(Mk); 327 for (size_t i=0; i<Ma; i++){ 328 dK[i] = dA[i] = degree; 329 } 330 size_t rdeg = N % degree; 331 if (rdeg) 332 dK[Mk-1] = dA[Ma-1] = rdeg; 333 334 typename Field::Element_ptr Arp = FFLAS::fflas_new (F, Ncurr, Ma); 335 size_t ldarp = Ncurr; 336 size_t deg = degree+1; 337 size_t * rp=NULL; 338 // Main loop of the arithmetic progession 339 while ((nb_full_blocks >= 1) && (Mk > 1)) { 340 size_t block_idx, it_idx, rp_val; 341 K = FFLAS::fflas_new (F, Ncurr, Ma); 342 K3 = FFLAS::fflas_new (F, Ncurr, Ma); 343 ldk = Ma; 344 345 // Computation of the rank profile 346 for (size_t i=0; i < Ncurr; ++i) 347 for (size_t j=0; j < Ma; ++j) 348 *(Arp + j*ldarp + Ncurr-i-1) = *(Ac + i*ldac + j); 349 rp = FFLAS::fflas_new<size_t>(2*Ncurr); 350 for (size_t i=0; i<2*Ncurr; ++i) 351 rp[i] = 0; 352 size_t RR; 353 try{ 354 RR = SpecRankProfile (F, Ma, Ncurr, Arp, ldarp, deg-1, rp); 355 } catch (CharpolyFailed){ 356 FFLAS::fflas_delete (Arp, Ac, K, K3, rp, dA, dK); 357 throw CharpolyFailed(); 358 } 359 if (RR < Ncurr){ 360 //std::cerr<<"FAIL RR<Ncurr"<<std::endl; 361 FFLAS::fflas_delete (Arp, Ac, K, K3, rp, dA, dK); 362 throw CharpolyFailed(); 363 } 364 365 // Computation of the degree vector dK 366 it_idx = 0; 367 rp_val = 0; 368 size_t gg = 0; 369 size_t dtot=0; 370 block_idx = 0; 371 nb_full_blocks = 0; 372 while (dtot<Ncurr){ 373 do {gg++; rp_val++; it_idx++;} 374 while ( /*(gg<Ncurr ) &&*/ (rp[gg] == rp_val) && (it_idx < deg )); 375 if ((block_idx)&&(it_idx > dK[block_idx-1])){ 376 FFLAS::fflas_delete (Arp, Ac, K, K3, rp, dA, dK); 377 throw CharpolyFailed(); 378 //std::cerr<<"FAIL d non decreasing"<<std::endl; 379 //exit(-1); 380 } 381 dK[block_idx++] = it_idx; 382 dtot += it_idx; 383 if (it_idx == deg) 384 nb_full_blocks ++; 385 it_idx=0; 386 rp_val = rp[gg]; 387 } 388 389 Mk = block_idx; 390 391 // Selection of dense colums of K 392 for (size_t i=0; i < nb_full_blocks; ++i){ 393 FFLAS::fassign (F, Ncurr, Ac+i, ldac, K+i, ldk); 394 } 395 396 // K <- QK K 397 size_t pos = nb_full_blocks*(deg-1); 398 for (size_t i = nb_full_blocks; i < Mk; ++i){ 399 for (size_t j=0; j<Ncurr; ++j) 400 F.assign (*(K + i + j*ldk), F.zero); 401 F.assign (*(K + i + (pos + dK[i]-1)*ldk),F.one); 402 pos += dA[i]; 403 } 404 405 // Copying K3 <- K 406 for (size_t i=0; i<Mk; ++i) 407 FFLAS::fassign (F, Ncurr, K+i, ldk, K3+i, ldk); 408 Protected::CompressRowsQK (F, Mk, K3 + nb_full_blocks*(deg-1)*ldk, ldk, 409 Arp, ldarp, dK+nb_full_blocks, deg, Mk-nb_full_blocks); 410 411 // K <- PA K 412 Protected::CompressRows (F, nb_full_blocks, K, ldk, Arp, ldarp, dA, Ma); 413 414 // A <- newQA^T K (compress) 415 Protected::CompressRowsQA (F, Ma, Ac, ldac, Arp, ldarp, dA, Ma); 416 417 // K <- A K 418 fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ncurr-Ma, nb_full_blocks, Ma,F.one, 419 Ac, ldac, K+(Ncurr-Ma)*ldk, ldk,F.one, K, ldk); 420 fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ma, nb_full_blocks, Ma,F.one, 421 Ac+(Ncurr-Ma)*ldac, ldac, K+(Ncurr-Ma)*ldk, ldk, F.zero, Arp, ldarp); 422 for (size_t i=0; i< Ma; ++i) 423 FFLAS::fassign(F, nb_full_blocks, Arp+i*ldarp, 1, K+(Ncurr-Ma+i)*ldk, 1); 424 425 // Copying the last rows of A times K 426 size_t offset = (deg-2)*nb_full_blocks; 427 for (size_t i = nb_full_blocks; i < Mk; ++i) { 428 for (size_t j=0; j<Ncurr; ++j) 429 F.assign(*(K+i+j*ldk), F.zero); 430 if (dK[i] == dA[i]) // copy the column of A 431 FFLAS::fassign (F, Ncurr, Ac+i, ldac, K+i, ldk); 432 else{ 433 F.assign (*(K + i + (offset+dK[i]-1)*ldk),F.one); 434 } 435 offset += dA[i]-1; 436 } 437 438 // K <- QA K 439 Protected::DeCompressRowsQA (F, Mk, Ncurr, K, ldk, Arp, ldarp, dA, Ma); 440 441 // K <- QK^T K 442 Protected::CompressRowsQK (F, Mk, K + nb_full_blocks*(deg-1)*ldk, ldk, Arp, ldarp, 443 dK+nb_full_blocks, deg, Mk-nb_full_blocks); 444 445 // K <- K^-1 K 446 size_t *P=FFLAS::fflas_new<size_t>(Mk); 447 size_t *Q=FFLAS::fflas_new<size_t>(Mk); 448 if (LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, Mk, Mk , K3 + (Ncurr-Mk)*ldk, ldk, P, Q) < Mk){ 449 // should never happen (not a LAS VEGAS check) 450 //std::cerr<<"FAIL R2 < MK"<<std::endl; 451 // exit(-1); 452 } 453 ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, Mk, Mk,F.one, 454 K3 + (Ncurr-Mk)*ldk, ldk, K+(Ncurr-Mk)*ldk, ldk); 455 ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, Mk, Mk,F.one, 456 K3+(Ncurr-Mk)*ldk, ldk, K+(Ncurr-Mk)*ldk, ldk); 457 applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans, 458 Mk, 0,(int) Mk, K+(Ncurr-Mk)*ldk,ldk, P); 459 fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ncurr-Mk, Mk, Mk,F.mOne, 460 K3, ldk, K+(Ncurr-Mk)*ldk,ldk,F.one, K, ldk); 461 FFLAS::fflas_delete( P); 462 FFLAS::fflas_delete( Q); 463 464 // K <- PK^T K 465 Protected::DeCompressRows (F, Mk, Ncurr, K, ldk, Arp, ldarp, dK, Mk); 466 467 // K <- K PK (dA <- dK) 468 if (nb_full_blocks*deg < Ncurr) 469 Ma = nb_full_blocks+1; 470 else 471 Ma = nb_full_blocks; 472 473 for (size_t i=0; i< Ma; ++i) 474 dA[i] = dK[i]; 475 476 // Recovery of the completed invariant factors 477 offset = Ncurr-1; 478 size_t oldNcurr = Ncurr; 479 for (size_t i=Mk-1; i>=nb_full_blocks+1; --i) 480 if (dK[i] >= 1){ 481 Polynomial PP (dK [i]+1); 482 F.assign(PP[dK[i]],F.one); 483 for (size_t j=0; j < dK[i]; ++j) 484 F.neg( PP[dK[i]-j-1], *(K + i + (offset-j)*ldk)); 485 frobeniusForm.push_front(PP); 486 offset -= dK[i]; 487 Ncurr -= dK[i]; 488 } 489 for (size_t i= offset+1; i<oldNcurr; ++i) 490 for (size_t j=0; j<nb_full_blocks+1; ++j){ 491 if (!F.isZero( *(K+i*ldk+j) )){ 492 //std::cerr<<"FAIL C != 0"<<std::endl; 493 FFLAS::fflas_delete (rp, Arp, Ac, K, K3, dA, dK); 494 throw CharpolyFailed(); 495 } 496 } 497 498 // A <- K 499 FFLAS::fflas_delete (Ac); 500 FFLAS::fflas_delete (Arp); 501 Ac = FFLAS::fflas_new (F, Ncurr, Mk); 502 ldac = Mk; 503 Arp = FFLAS::fflas_new (F, Ncurr, Mk); 504 ldarp=Ncurr; 505 for (size_t i=0; i < Ncurr; ++i ) 506 FFLAS::fassign (F, Mk, K + i*ldk, 1, Ac + i*ldac, 1); 507 508 deg++; 509 FFLAS::fflas_delete (K3, rp); 510 if ((nb_full_blocks > 0) && (Mk > 1)) 511 FFLAS::fflas_delete(K); 512 513 } 514 515 // Recovery of the first invariant factor 516 Polynomial Pl(dK [0]+1); 517 F.assign(Pl[dK[0]],F.one); 518 for (size_t j=0; j < dK[0]; ++j) 519 F.neg( Pl[j], *(K + j*ldk)); 520 frobeniusForm.push_front(Pl); 521 FFLAS::fflas_delete (Arp, Ac, K, dA, dK); 522#ifdef __FFLASFFPACK_ARITHPROG_PROFILING 523 tim.stop(); 524 std::cerr<<"Arith Prog : "<<tim.usertime()<<std::endl; 525#endif 526 return frobeniusForm; 527 } 528 529 template <class Field> 530 void CompressRowsQK (Field& F, const size_t M, 531 typename Field::Element_ptr A, const size_t lda, 532 typename Field::Element_ptr tmp, const size_t ldtmp, 533 const size_t * d, const size_t deg,const size_t nb_blocs) 534 { 535 536 int currtmp = 0; 537 size_t currw = d[0]-1; 538 size_t currr = d[0]-1; 539 for (int i = 0; i< int(nb_blocs)-1; ++i){ 540 // FFLAS::fassign(F,deg-d[i],M,A+currr*lda,lda,tmp+(size_t)currtmp*ldtmp); 541 for (int j = int(d[i]-1); j<int(deg)-1; ++j, ++currr, ++currtmp) 542 FFLAS::fassign(F, M, A + currr*lda, 1, tmp + (size_t)currtmp*ldtmp, 1); 543 // currr += (deg - d[i]); 544 for (int j=0; j < int(d[i+1]) -1; ++j, ++currr, ++currw){ 545 FFLAS::fassign(F, M, A+(currr)*lda, 1, A + (currw)*lda, 1); 546 } 547 } 548 for (int i=0; i < currtmp; ++i, ++currw){ 549 FFLAS::fassign (F, M, tmp + (size_t)i*ldtmp, 1, A + (currw)*lda, 1); 550 } 551 } 552 553 template <class Field> 554 void CompressRows (Field& F, const size_t M, 555 typename Field::Element_ptr A, const size_t lda, 556 typename Field::Element_ptr tmp, const size_t ldtmp, 557 const size_t * d, const size_t nb_blocs) 558 { 559 560 size_t currd = d[0]-1; 561 size_t curri = d[0]-1; 562 for (int i = 0; i< int(nb_blocs)-1; ++i){ 563 FFLAS::fassign(F, M, A + currd*lda, 1, tmp + i*(int)ldtmp, 1); 564 for (int j=0; j < int(d[i+1]) -1; ++j){ 565 FFLAS::fassign(F, M, A+(currd+(size_t)j+1)*lda, 1, A + (curri++)*lda, 1); 566 } 567 currd += d[i+1]; 568 } 569 for (int i=0; i < int(nb_blocs)-1; ++i){ 570 FFLAS::fassign (F, M, tmp + i*(int)ldtmp, 1, A + (curri++)*lda, 1); 571 } 572 } 573 574 template <class Field> 575 void DeCompressRows (Field& F, const size_t M, const size_t N, 576 typename Field::Element_ptr A, const size_t lda, 577 typename Field::Element_ptr tmp, const size_t ldtmp, 578 const size_t * d, const size_t nb_blocs) 579 { 580 581 for (int i=0; i<int(nb_blocs)-1; ++i) 582 FFLAS::fassign(F, M, A + (N-nb_blocs+(size_t)i)*lda, 1, tmp + i*(int)ldtmp, 1); 583 584 size_t w_idx = N - 2; 585 size_t r_idx = N - nb_blocs - 1; 586 int i = int(nb_blocs)-1 ; 587 for (; i--; ){ 588 for (size_t j = 0; j<d[i+1]-1; ++j) 589 FFLAS::fassign (F, M, A + (r_idx--)*lda, 1, A + (w_idx--)*lda, 1); 590 FFLAS::fassign (F, M, tmp + i*(int)ldtmp, 1, A + (w_idx--)*lda, 1); 591 } 592 } 593 594 template <class Field> 595 void DeCompressRowsQK (Field& F, const size_t M, const size_t N, 596 typename Field::Element_ptr A, const size_t lda, 597 typename Field::Element_ptr tmp, const size_t ldtmp, 598 const size_t * d, const size_t deg,const size_t nb_blocs) 599 { 600 601 size_t zeroblockdim = 1; // the last block contributes with 1 602 size_t currtmp = 0; 603 for (int i=0; i<int(nb_blocs)-1; ++i) 604 zeroblockdim += deg - d[i]; 605 for (size_t i=0; i < zeroblockdim - 1; ++i, ++currtmp) 606 FFLAS::fassign(F, M, A + (N - zeroblockdim +i)*lda, 1, tmp + currtmp*ldtmp, 1); 607 currtmp--; 608 size_t w_idx = N - 2; 609 size_t r_idx = N - zeroblockdim - 1; 610 611 int i = int(nb_blocs)-1 ; 612 for (; i--;){ 613 for (size_t j = 0; j < d [i+1] - 1; ++j) 614 FFLAS::fassign (F, M, A + (r_idx--)*lda, 1, A + (w_idx--)*lda, 1); 615 for (size_t j = 0; j < deg - d[i]; ++j) 616 FFLAS::fassign (F, M, tmp + (currtmp--)*ldtmp, 1, A + (w_idx--)*lda, 1); 617 } 618 } 619 620 template <class Field> 621 void CompressRowsQA (Field& F, const size_t M, 622 typename Field::Element_ptr A, const size_t lda, 623 typename Field::Element_ptr tmp, const size_t ldtmp, 624 const size_t * d, const size_t nb_blocs) 625 { 626 627 size_t currd = 0; 628 size_t curri = 0; 629 for (size_t i = 0; i< nb_blocs; ++i){ 630 FFLAS::fassign(F, M, A + currd*lda, 1, tmp + i*ldtmp, 1); 631 for (size_t j=0; j < d[i] -1; ++j) 632 FFLAS::fassign(F, M, A+(currd+j+1)*lda, 1, A + (curri++)*lda, 1); 633 currd += d[i]; 634 } 635 for (size_t i=0; i < nb_blocs; ++i) 636 FFLAS::fassign (F, M, tmp + i*ldtmp, 1, A + (curri++)*lda, 1); 637 } 638 639 template <class Field> 640 void DeCompressRowsQA (Field& F, const size_t M, const size_t N, 641 typename Field::Element_ptr A, const size_t lda, 642 typename Field::Element_ptr tmp, const size_t ldtmp, 643 const size_t * d, const size_t nb_blocs) 644 { 645 646 for (size_t i=0; i<nb_blocs; ++i) 647 FFLAS::fassign(F, M, A + (N-nb_blocs+i)*lda, 1, tmp + i*ldtmp, 1); 648 649 size_t w_idx = N - 1; 650 size_t r_idx = N - nb_blocs - 1; 651 int i = int(nb_blocs) ; 652 for (; i--; ){ 653 for (size_t j = 0; j<d[i]-1; ++j) 654 FFLAS::fassign (F, M, A + (r_idx--)*lda, 1, A + (w_idx--)*lda, 1); 655 FFLAS::fassign (F, M, tmp + i*(int)ldtmp, 1, A + (w_idx--)*lda, 1); 656 } 657 } 658 659} // Protected 660} //FFPACK 661/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 662// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s 663