1/* fflas-ffpack/ffpack/ffpack_frobenius.inl
2 * Copyright (C) 2007 Clement Pernet
3 *
4 * Written by Clement Pernet <cpernet@uwaterloo.ca>
5 *
6 *
7 * ========LICENCE========
8 * This file is part of the library FFLAS-FFPACK.
9 *
10 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
11 * it under the terms of the  GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23 * ========LICENCE========
24 *.
25 */
26
27#include <givaro/givranditer.h>
28
29//---------------------------------------------------------------------
30// CharpolyArithProg: Las Vegas algorithm to compute the Charpoly
31// over a large field (Z/pZ, s.t.  p > 2n^2)
32//---------------------------------------------------------------------
33//
34//
35namespace FFPACK { namespace Protected {
36    template <class Field>
37    inline void CompressRows (Field& F, const size_t M,
38                       typename Field::Element_ptr A, const size_t lda,
39                       typename Field::Element_ptr tmp, const size_t ldtmp,
40                       const size_t * d, const size_t nb_blocs);
41
42    template <class Field>
43    inline void CompressRowsQK (Field& F, const size_t M,
44                         typename Field::Element_ptr A, const size_t lda,
45                         typename Field::Element_ptr tmp, const size_t ldtmp,
46                         const size_t * d,const size_t deg, const size_t nb_blocs);
47
48    template <class Field>
49    inline void DeCompressRows (Field& F, const size_t M, const size_t N,
50                         typename Field::Element_ptr A, const size_t lda,
51                         typename Field::Element_ptr tmp, const size_t ldtmp,
52                         const size_t * d, const size_t nb_blocs);
53
54    template <class Field>
55    inline void DeCompressRowsQK (Field& F, const size_t M, const size_t N,
56                           typename Field::Element_ptr A, const size_t lda,
57                           typename Field::Element_ptr tmp, const size_t ldtmp,
58                           const size_t * d, const size_t deg, const size_t nb_blocs);
59
60    template <class Field>
61    inline void CompressRowsQA (Field& F, const size_t M,
62                         typename Field::Element_ptr A, const size_t lda,
63                         typename Field::Element_ptr tmp, const size_t ldtmp,
64                         const size_t * d, const size_t nb_blocs);
65
66    template <class Field>
67    inline void DeCompressRowsQA (Field& F, const size_t M, const size_t N,
68                           typename Field::Element_ptr A, const size_t lda,
69                           typename Field::Element_ptr tmp, const size_t ldtmp,
70                           const size_t * d, const size_t nb_blocs);
71
72
73    template <class PolRing>
74    inline void
75    RandomKrylovPrecond (const PolRing& PR, std::list<typename PolRing::Element>& completedFactors, const size_t N,
76                         typename PolRing::Domain_t::Element_ptr A, const size_t lda,
77                         size_t & Nb, typename PolRing::Domain_t::Element_ptr& B, size_t& ldb,
78                         typename PolRing::Domain_t::RandIter& g, const size_t degree)
79    {
80        typedef typename PolRing::Domain_t Field;
81        typedef typename PolRing::Element Polynomial;
82        const Field& F = PR.getdomain();
83
84        FFLASFFPACK_check(degree);
85        size_t noc = (N-1)/degree + 1;
86        // Building the workplace matrix
87        typename Field::Element_ptr K  = FFLAS::fflas_new (F, degree*noc, N);
88        typename Field::Element_ptr K2 = FFLAS::fflas_new (F, degree*noc, N);
89        size_t ldk = N;
90//        size_t bk_stride = noc*ldk;
91
92        size_t *dA = FFLAS::fflas_new<size_t>(N); //PA
93        size_t *dK = FFLAS::fflas_new<size_t>(noc*degree);
94        for (size_t i=0; i<noc; ++i)
95            dK[i]=0;
96
97#ifdef __FFLASFFPACK_ARITHPROG_PROFILING
98        Givaro::Timer timkrylov, timelim, timsimilar, timrest;
99        timkrylov.start();
100#endif
101            // Picking a random noc x N block vector U^T
102        Givaro::GeneralRingNonZeroRandIter<Field> nzg (g);
103        RandomMatrix (F, noc, N, K, ldk*degree, g);
104        for (size_t i = 0; i < noc; ++i)
105            nzg.random (*(K + i*(degree*ldk+1)));
106
107        // Computing the bloc Krylov matrix [u1 Au1 .. A^(c-1) u1 u2 Au2 ...]^T
108        for (size_t i = 1; i<degree; ++i){
109            fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasTrans,  noc, N, N,F.one,
110                   K+(i-1)*ldk, degree*ldk, A, lda, F.zero, K+i*ldk, degree*ldk);
111        }
112        // K2 <- K (re-ordering)
113        //! @todo swap to save space ??
114        //! @todo don't assing K2 c*noc x N but only mas (c,noc) x N and store each one after the other
115        // size_t w_idx = 0;
116        // for (size_t i=0; i<noc; ++i)
117        //     for (size_t j=0; j<degree; ++j, w_idx++)
118        //         FFLAS::fassign(F, N, (K+(i+j*noc)*ldk), 1, (K2+(w_idx)*ldk), 1);
119
120        // Copying K2 <- K
121//        for (size_t i=0; i<noc*degree; ++i)
122        FFLAS::fassign (F, noc*degree, N, K, ldk, K2, ldk);
123
124#ifdef __FFLASFFPACK_ARITHPROG_PROFILING
125        timkrylov.stop();
126         std::cerr<<"Random Krylov Preconditionning:"<<std::endl
127                 <<"  Krylov basis computation : "<<timkrylov.usertime()<<std::endl;
128        timelim.start();
129#endif
130        size_t * Pk = FFLAS::fflas_new<size_t>(N);
131        size_t * Qk = FFLAS::fflas_new<size_t>(N);
132        for (size_t i=0; i<N; ++i)
133            Qk[i] = 0;
134        for (size_t i=0; i<N; ++i)
135            Pk[i] = 0;
136
137            // @todo: replace by PLUQ
138        size_t R = LUdivine(F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, N, N, K, ldk, Pk, Qk);
139        size_t row_idx = 0;
140        size_t ii=0;
141        size_t dold = degree;
142        size_t nb_full_blocks = 0;
143        size_t Mk = 0;
144        // Determining the degree sequence dK
145        for (size_t k = 0; k<noc; ++k){
146            size_t d = 0;
147            while ( (d<degree) && (row_idx<R) && (Qk[row_idx] == ii)) {ii++; row_idx++; d++;}
148            if (d > dold){
149                // std::cerr << "FAIL in preconditionning phase:"
150                //           << " degree sequence is not monotonically not increasing"
151                // 	     << std::endl;
152                FFLAS::fflas_delete (K, Pk, Qk, dA, dK);
153                throw CharpolyFailed();
154            }
155            dK[k] = dold = d;
156            Mk++;
157            if (d == degree)
158                nb_full_blocks++;
159            if (row_idx < N)
160                ii = Qk[row_idx];
161        }
162#ifdef __FFLASFFPACK_ARITHPROG_PROFILING
163        timelim.stop();
164        std::cerr <<"  LU (Krylov)              : "<<timelim.usertime()<<std::endl;
165        timsimilar.start();
166#endif
167
168        // Selection of the last iterate of each block
169
170        typename Field::Element_ptr K3 = FFLAS::fflas_new (F, Mk, N);
171        typename Field::Element_ptr K4 = FFLAS::fflas_new (F, Mk, N);
172        size_t bk_idx = 0;
173        for (size_t i = 0; i < Mk; ++i){
174            FFLAS::fassign (F, N, (K2 + (bk_idx + dK[i]-1)*ldk), 1, (K3+i*ldk), 1);
175            bk_idx += degree;
176        }
177        FFLAS::fflas_delete (K2);
178
179        // K <- K A^T
180        fgemm( F, FFLAS::FflasNoTrans, FFLAS::FflasTrans, Mk, N, N,F.one,  K3, ldk, A, lda, F.zero, K4, ldk);
181
182        // K <- K P^T
183        applyP (F, FFLAS::FflasRight, FFLAS::FflasTrans, Mk, 0,(int) R, K4, ldk, Pk);
184
185        // K <- K U^-1
186        ftrsm (F, FFLAS::FflasRight, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, Mk, R,F.one, K, ldk, K4, ldk);
187
188        // L <-  Q^T L
189        applyP(F, FFLAS::FflasLeft, FFLAS::FflasNoTrans, N, 0,(int) R, K, ldk, Qk);
190
191        // K <- K L^-1
192        ftrsm (F, FFLAS::FflasRight, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, Mk, R,F.one, K, ldk, K4, ldk);
193
194        //undoing permutation on L
195        applyP(F, FFLAS::FflasLeft, FFLAS::FflasTrans, N, 0,(int) R, K, ldk, Qk);
196
197        // Recovery of the completed invariant factors
198        size_t Ma = Mk;
199        size_t Ncurr = R;
200        size_t offset = Ncurr-1;
201        for (size_t i=Mk-1; i>=nb_full_blocks+1;  --i){
202            if (dK[i] >= 1){
203                for (size_t j = offset+1; j<R; ++j)
204                    if (!F.isZero(*(K4 + i*ldk + j))){
205                        //std::cerr<<"FAIL C != 0 in preconditionning"<<std::endl;
206                        FFLAS::fflas_delete (K,K3,K4,Pk,Qk,dA,dK);
207                        throw CharpolyFailed();
208                    }
209                Polynomial P (dK [i]+1);
210                F.assign(P[dK[i]],F.one);
211                for (size_t j=0; j < dK [i]; ++j)
212                    F.neg (P [dK [i]-j-1], *(K4 + i*ldk + (offset-j)));
213                completedFactors.push_front(P);
214                offset -= dK [i];
215                Ncurr -= dK [i];
216                Ma--;
217            }
218        }
219        Mk = Ma;
220
221#ifdef __FFLASFFPACK_ARITHPROG_PROFILING
222        timsimilar.stop();
223        std::cerr <<"  Similarity)              : "<<timsimilar.usertime()<<std::endl;
224        timrest.start();
225#endif
226        if (R<N){
227                // The Krylov basis did not span the whole space
228                // Recurse on the complementary subspace
229            if (! FFLAS::fiszero (F, nb_full_blocks+1, N-R, K4+R, ldk)){
230
231            // for (size_t i=0; i<nb_full_blocks + 1; ++i)
232            //     for (size_t j=R; j<N; ++j){
233            //         if (!F.isZero( *(K4+i*ldk+j) )){
234                        FFLAS::fflas_delete (K3, K4, K, Pk, Qk, dA, dK);
235                        throw CharpolyFailed();
236            }
237
238            size_t Nrest = N-R;
239            typename Field::Element_ptr K21 = K + R*ldk;
240            typename Field::Element_ptr K22 = K21 + R;
241
242            //  Compute the n-k last rows of A' = P A^T P^T in K2_
243            // A = A . P^t
244            applyP( F, FFLAS::FflasRight, FFLAS::FflasTrans,
245                    N, 0,(int) R, A, lda, Pk);
246
247            // Copy K2_ = (A'_2)^t
248            for (size_t i=0; i<Nrest; i++)
249                FFLAS::fassign (F, N, A+R+i, lda, K21+i*ldk, 1);
250
251            // A = A . P : Undo the permutation on A
252            applyP( F, FFLAS::FflasRight, FFLAS::FflasNoTrans, N, 0,(int) R, A, lda, Pk);
253
254            // K2_ = K2_ . P^t (=  ( P A^t P^t )2_ )
255            applyP( F, FFLAS::FflasRight, FFLAS::FflasTrans, Nrest, 0,(int) R, K21, ldk, Pk);
256
257            // K21 = K21 . S1^-1
258            ftrsm (F, FFLAS::FflasRight,FFLAS::FflasUpper,FFLAS::FflasNoTrans,FFLAS::FflasNonUnit, Nrest, R, F.one, K, ldk, K21, ldk);
259
260            typename Field::Element_ptr Arec = FFLAS::fflas_new (F, Nrest, Nrest);
261            size_t ldarec = Nrest;
262
263            // Creation of the matrix A2 for recursive call
264            FFLAS::fassign (F, Nrest, Nrest, K22, ldk, Arec, ldarec);
265
266            fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Nrest, Nrest, R,F.mOne, K21, ldk, K+R, ldk,F.one, Arec, ldarec);
267
268            std::list<Polynomial> polyList;
269            polyList.clear();
270
271            // Recursive call on the complementary subspace
272            CharPoly (PR, polyList, Nrest, Arec, ldarec, g, FfpackArithProgKrylovPrecond);
273            FFLAS::fflas_delete (Arec);
274            completedFactors.merge(polyList);
275        }
276#ifdef __FFLASFFPACK_ARITHPROG_PROFILING
277            timrest.stop();
278            std::cerr<<"  left-over                : "<<timrest.usertime()<<std::endl;
279#endif
280
281        FFLAS::fflas_delete( Pk);
282        FFLAS::fflas_delete( Qk);
283        for (size_t i=0; i<Mk; ++i)
284            dA[i] = dK[i];
285        bk_idx = 0;
286
287        ldb = Ma;
288        Nb = Ncurr;
289        B = FFLAS::fflas_new (F, Ncurr, ldb);
290
291        for (size_t j=0; j<Ma; ++j)
292            FFLAS::fassign(F, Ncurr, K4+j*ldk, 1, B+j, ldb);
293        FFLAS::fflas_delete (K4);
294
295    }
296
297    template <class PolRing>
298    inline std::list<typename PolRing::Element>&
299    ArithProg (const PolRing& PR, std::list<typename PolRing::Element>& frobeniusForm,
300               const size_t N, typename PolRing::Domain_t::Element_ptr A, const size_t lda,
301               const size_t degree)
302    {
303
304        typedef typename PolRing::Domain_t Field;
305        typedef typename PolRing::Element Polynomial;
306        const Field& F = PR.getdomain();
307
308#ifdef __FFLASFFPACK_ARITHPROG_PROFILING
309        Givaro::Timer tim;
310        tim.start();
311#endif
312	size_t nb_full_blocks, Mk, Ma;
313	Mk = Ma = nb_full_blocks = (N-1)/degree +1;
314	typename Field::Element_ptr K, K3, Ac;
315	size_t ldk=Ma;
316	size_t ldac = Ma;
317	K = FFLAS::fflas_new(F, N, ldk);
318	Ac = FFLAS::fflas_new(F, N, ldac);
319
320	FFLAS::fassign(F, N, Ma, A, lda, Ac, ldac);
321	FFLAS::fassign(F, N, Ma, Ac, ldac, K, ldk);
322
323        size_t Ncurr=N;
324
325        size_t * dA = FFLAS::fflas_new<size_t>(Ma);
326	size_t * dK = FFLAS::fflas_new<size_t>(Mk);
327	for (size_t i=0; i<Ma; i++){
328		dK[i] = dA[i] = degree;
329	}
330	size_t rdeg = N % degree;
331	if (rdeg)
332		dK[Mk-1] = dA[Ma-1] = rdeg;
333
334        typename Field::Element_ptr Arp = FFLAS::fflas_new (F, Ncurr, Ma);
335        size_t ldarp = Ncurr;
336        size_t deg = degree+1;
337        size_t * rp=NULL;
338            // Main loop of the arithmetic progession
339        while ((nb_full_blocks >= 1) && (Mk > 1)) {
340            size_t block_idx, it_idx, rp_val;
341            K = FFLAS::fflas_new (F, Ncurr, Ma);
342            K3 = FFLAS::fflas_new (F, Ncurr, Ma);
343            ldk = Ma;
344
345            // Computation of the rank profile
346            for (size_t i=0; i < Ncurr; ++i)
347                for (size_t j=0; j < Ma; ++j)
348                    *(Arp + j*ldarp + Ncurr-i-1) = *(Ac + i*ldac + j);
349            rp = FFLAS::fflas_new<size_t>(2*Ncurr);
350            for (size_t i=0; i<2*Ncurr; ++i)
351                rp[i] = 0;
352            size_t RR;
353            try{
354                RR = SpecRankProfile (F, Ma, Ncurr, Arp, ldarp, deg-1, rp);
355            } catch (CharpolyFailed){
356                FFLAS::fflas_delete (Arp, Ac, K, K3, rp, dA, dK);
357                throw CharpolyFailed();
358            }
359            if (RR < Ncurr){
360                //std::cerr<<"FAIL RR<Ncurr"<<std::endl;
361                FFLAS::fflas_delete (Arp, Ac, K, K3, rp, dA, dK);
362                throw CharpolyFailed();
363            }
364
365            // Computation of the degree vector dK
366            it_idx = 0;
367            rp_val = 0;
368            size_t gg = 0;
369            size_t dtot=0;
370            block_idx = 0;
371            nb_full_blocks = 0;
372            while (dtot<Ncurr){
373                do {gg++; rp_val++; it_idx++;}
374                while ( /*(gg<Ncurr ) &&*/ (rp[gg] == rp_val) && (it_idx < deg ));
375                if ((block_idx)&&(it_idx > dK[block_idx-1])){
376                    FFLAS::fflas_delete (Arp, Ac, K, K3, rp, dA, dK);
377                    throw CharpolyFailed();
378                    //std::cerr<<"FAIL d non decreasing"<<std::endl;
379                    //exit(-1);
380                }
381                dK[block_idx++] = it_idx;
382                dtot += it_idx;
383                if (it_idx == deg)
384                    nb_full_blocks ++;
385                it_idx=0;
386                rp_val = rp[gg];
387            }
388
389            Mk = block_idx;
390
391            // Selection of dense colums of K
392            for (size_t i=0; i < nb_full_blocks; ++i){
393                FFLAS::fassign (F, Ncurr, Ac+i, ldac, K+i, ldk);
394            }
395
396            // K <- QK K
397            size_t pos = nb_full_blocks*(deg-1);
398            for (size_t i = nb_full_blocks; i < Mk; ++i){
399                for (size_t j=0; j<Ncurr; ++j)
400                    F.assign (*(K + i + j*ldk), F.zero);
401                F.assign (*(K + i + (pos + dK[i]-1)*ldk),F.one);
402                pos += dA[i];
403            }
404
405            // Copying K3 <- K
406            for (size_t i=0; i<Mk; ++i)
407                FFLAS::fassign (F, Ncurr, K+i, ldk, K3+i, ldk);
408            Protected::CompressRowsQK (F, Mk, K3 + nb_full_blocks*(deg-1)*ldk, ldk,
409                                       Arp, ldarp, dK+nb_full_blocks, deg, Mk-nb_full_blocks);
410
411            // K <- PA K
412            Protected::CompressRows (F, nb_full_blocks, K, ldk, Arp, ldarp, dA, Ma);
413
414            // A <- newQA^T K (compress)
415            Protected::CompressRowsQA (F, Ma, Ac, ldac, Arp, ldarp, dA, Ma);
416
417            // K <- A K
418            fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ncurr-Ma, nb_full_blocks, Ma,F.one,
419                   Ac, ldac, K+(Ncurr-Ma)*ldk, ldk,F.one, K, ldk);
420            fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ma, nb_full_blocks, Ma,F.one,
421                   Ac+(Ncurr-Ma)*ldac, ldac, K+(Ncurr-Ma)*ldk, ldk, F.zero, Arp, ldarp);
422            for (size_t i=0; i< Ma; ++i)
423                FFLAS::fassign(F, nb_full_blocks, Arp+i*ldarp, 1, K+(Ncurr-Ma+i)*ldk, 1);
424
425            // Copying the last rows of A times K
426            size_t offset = (deg-2)*nb_full_blocks;
427            for (size_t i = nb_full_blocks; i < Mk; ++i) {
428                for (size_t j=0; j<Ncurr; ++j)
429                    F.assign(*(K+i+j*ldk), F.zero);
430                if (dK[i] == dA[i]) // copy the column of A
431                    FFLAS::fassign (F, Ncurr, Ac+i, ldac, K+i, ldk);
432                else{
433                    F.assign (*(K + i + (offset+dK[i]-1)*ldk),F.one);
434                }
435                offset += dA[i]-1;
436            }
437
438            // K <- QA K
439            Protected::DeCompressRowsQA (F, Mk, Ncurr, K, ldk, Arp, ldarp, dA, Ma);
440
441            // K <- QK^T K
442            Protected::CompressRowsQK (F, Mk, K + nb_full_blocks*(deg-1)*ldk, ldk, Arp, ldarp,
443                                       dK+nb_full_blocks, deg, Mk-nb_full_blocks);
444
445            // K <- K^-1 K
446            size_t *P=FFLAS::fflas_new<size_t>(Mk);
447            size_t *Q=FFLAS::fflas_new<size_t>(Mk);
448            if (LUdivine (F, FFLAS::FflasNonUnit, FFLAS::FflasNoTrans, Mk, Mk , K3 + (Ncurr-Mk)*ldk, ldk, P, Q) < Mk){
449                // should never happen (not a LAS VEGAS check)
450                //std::cerr<<"FAIL R2 < MK"<<std::endl;
451                //			exit(-1);
452            }
453            ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasLower, FFLAS::FflasNoTrans, FFLAS::FflasUnit, Mk, Mk,F.one,
454                   K3 + (Ncurr-Mk)*ldk, ldk, K+(Ncurr-Mk)*ldk, ldk);
455            ftrsm (F, FFLAS::FflasLeft, FFLAS::FflasUpper, FFLAS::FflasNoTrans, FFLAS::FflasNonUnit, Mk, Mk,F.one,
456                   K3+(Ncurr-Mk)*ldk, ldk, K+(Ncurr-Mk)*ldk, ldk);
457            applyP (F, FFLAS::FflasLeft, FFLAS::FflasTrans,
458                    Mk, 0,(int) Mk, K+(Ncurr-Mk)*ldk,ldk, P);
459            fgemm (F, FFLAS::FflasNoTrans, FFLAS::FflasNoTrans, Ncurr-Mk, Mk, Mk,F.mOne,
460                   K3, ldk, K+(Ncurr-Mk)*ldk,ldk,F.one, K, ldk);
461            FFLAS::fflas_delete( P);
462            FFLAS::fflas_delete( Q);
463
464            // K <- PK^T K
465            Protected::DeCompressRows (F, Mk, Ncurr, K, ldk, Arp, ldarp, dK, Mk);
466
467            // K <- K PK (dA <- dK)
468            if (nb_full_blocks*deg < Ncurr)
469                Ma = nb_full_blocks+1;
470            else
471                Ma = nb_full_blocks;
472
473            for (size_t i=0; i< Ma; ++i)
474                dA[i] = dK[i];
475
476            // Recovery of the completed invariant factors
477            offset = Ncurr-1;
478            size_t oldNcurr = Ncurr;
479            for (size_t i=Mk-1; i>=nb_full_blocks+1;  --i)
480                if (dK[i] >= 1){
481                    Polynomial  PP (dK [i]+1);
482                    F.assign(PP[dK[i]],F.one);
483                    for (size_t j=0; j < dK[i]; ++j)
484                        F.neg( PP[dK[i]-j-1], *(K + i + (offset-j)*ldk));
485                    frobeniusForm.push_front(PP);
486                    offset -= dK[i];
487                    Ncurr -= dK[i];
488                }
489            for (size_t i= offset+1; i<oldNcurr; ++i)
490                for (size_t j=0; j<nb_full_blocks+1; ++j){
491                    if (!F.isZero( *(K+i*ldk+j) )){
492                        //std::cerr<<"FAIL C != 0"<<std::endl;
493                        FFLAS::fflas_delete (rp, Arp, Ac, K, K3, dA, dK);
494                        throw CharpolyFailed();
495                    }
496                }
497
498            // A <- K
499            FFLAS::fflas_delete (Ac);
500            FFLAS::fflas_delete (Arp);
501            Ac = FFLAS::fflas_new (F, Ncurr, Mk);
502            ldac = Mk;
503            Arp = FFLAS::fflas_new (F, Ncurr, Mk);
504            ldarp=Ncurr;
505            for (size_t i=0; i < Ncurr; ++i )
506                FFLAS::fassign (F, Mk, K + i*ldk, 1, Ac + i*ldac, 1);
507
508            deg++;
509            FFLAS::fflas_delete (K3, rp);
510            if ((nb_full_blocks > 0) && (Mk > 1))
511                FFLAS::fflas_delete(K);
512
513        }
514
515        // Recovery of the first invariant factor
516        Polynomial Pl(dK [0]+1);
517        F.assign(Pl[dK[0]],F.one);
518        for (size_t j=0; j < dK[0]; ++j)
519            F.neg( Pl[j], *(K  + j*ldk));
520        frobeniusForm.push_front(Pl);
521        FFLAS::fflas_delete (Arp, Ac, K, dA, dK);
522#ifdef __FFLASFFPACK_ARITHPROG_PROFILING
523        tim.stop();
524        std::cerr<<"Arith Prog                 : "<<tim.usertime()<<std::endl;
525#endif
526        return frobeniusForm;
527    }
528
529    template <class Field>
530    void CompressRowsQK (Field& F, const size_t M,
531                         typename Field::Element_ptr A, const size_t lda,
532                         typename Field::Element_ptr tmp, const size_t ldtmp,
533                         const size_t * d, const size_t deg,const size_t nb_blocs)
534    {
535
536        int currtmp = 0;
537        size_t currw = d[0]-1;
538        size_t currr = d[0]-1;
539        for (int i = 0; i< int(nb_blocs)-1; ++i){
540            // FFLAS::fassign(F,deg-d[i],M,A+currr*lda,lda,tmp+(size_t)currtmp*ldtmp);
541            for (int j = int(d[i]-1); j<int(deg)-1; ++j, ++currr, ++currtmp)
542                FFLAS::fassign(F, M,  A + currr*lda, 1, tmp + (size_t)currtmp*ldtmp, 1);
543            // currr += (deg - d[i]);
544            for (int j=0; j < int(d[i+1]) -1; ++j, ++currr, ++currw){
545                FFLAS::fassign(F, M, A+(currr)*lda, 1, A + (currw)*lda, 1);
546            }
547        }
548        for (int i=0; i < currtmp; ++i, ++currw){
549            FFLAS::fassign (F, M, tmp + (size_t)i*ldtmp, 1, A + (currw)*lda, 1);
550        }
551    }
552
553    template <class Field>
554    void CompressRows (Field& F, const size_t M,
555                       typename Field::Element_ptr A, const size_t lda,
556                       typename Field::Element_ptr tmp, const size_t ldtmp,
557                       const size_t * d, const size_t nb_blocs)
558    {
559
560        size_t currd = d[0]-1;
561        size_t curri = d[0]-1;
562        for (int i = 0; i< int(nb_blocs)-1; ++i){
563            FFLAS::fassign(F, M,  A + currd*lda, 1, tmp + i*(int)ldtmp, 1);
564            for (int j=0; j < int(d[i+1]) -1; ++j){
565                FFLAS::fassign(F, M, A+(currd+(size_t)j+1)*lda, 1, A + (curri++)*lda, 1);
566            }
567            currd += d[i+1];
568        }
569        for (int i=0; i < int(nb_blocs)-1; ++i){
570            FFLAS::fassign (F, M, tmp + i*(int)ldtmp, 1, A + (curri++)*lda, 1);
571        }
572    }
573
574    template <class Field>
575    void DeCompressRows (Field& F, const size_t M, const size_t N,
576                         typename Field::Element_ptr A, const size_t lda,
577                         typename Field::Element_ptr tmp, const size_t ldtmp,
578                         const size_t * d, const size_t nb_blocs)
579    {
580
581        for (int i=0; i<int(nb_blocs)-1; ++i)
582            FFLAS::fassign(F, M, A + (N-nb_blocs+(size_t)i)*lda, 1, tmp + i*(int)ldtmp, 1);
583
584        size_t w_idx = N - 2;
585        size_t r_idx = N - nb_blocs - 1;
586        int i = int(nb_blocs)-1 ;
587        for (; i--; ){
588            for (size_t j = 0; j<d[i+1]-1; ++j)
589                FFLAS::fassign (F, M, A + (r_idx--)*lda, 1, A + (w_idx--)*lda, 1);
590            FFLAS::fassign (F, M, tmp + i*(int)ldtmp, 1, A + (w_idx--)*lda, 1);
591        }
592    }
593
594    template <class Field>
595    void DeCompressRowsQK (Field& F, const size_t M, const size_t N,
596                           typename Field::Element_ptr A, const size_t lda,
597                           typename Field::Element_ptr tmp, const size_t ldtmp,
598                           const size_t * d, const size_t deg,const size_t nb_blocs)
599    {
600
601        size_t zeroblockdim = 1; // the last block contributes with 1
602        size_t currtmp = 0;
603        for (int i=0; i<int(nb_blocs)-1; ++i)
604            zeroblockdim += deg - d[i];
605        for (size_t i=0; i < zeroblockdim - 1; ++i, ++currtmp)
606            FFLAS::fassign(F, M,  A + (N - zeroblockdim +i)*lda, 1, tmp + currtmp*ldtmp, 1);
607        currtmp--;
608        size_t w_idx = N - 2;
609        size_t r_idx = N - zeroblockdim - 1;
610
611        int i = int(nb_blocs)-1 ;
612        for (; i--;){
613            for (size_t j = 0; j < d [i+1] - 1; ++j)
614                FFLAS::fassign (F, M, A + (r_idx--)*lda, 1, A + (w_idx--)*lda, 1);
615            for (size_t j = 0; j < deg - d[i]; ++j)
616                FFLAS::fassign (F, M, tmp + (currtmp--)*ldtmp, 1, A + (w_idx--)*lda, 1);
617        }
618    }
619
620    template <class Field>
621    void CompressRowsQA (Field& F, const size_t M,
622                         typename Field::Element_ptr A, const size_t lda,
623                         typename Field::Element_ptr tmp, const size_t ldtmp,
624                         const size_t * d, const size_t nb_blocs)
625    {
626
627        size_t currd = 0;
628        size_t curri = 0;
629        for (size_t i = 0; i< nb_blocs; ++i){
630            FFLAS::fassign(F, M,  A + currd*lda, 1, tmp + i*ldtmp, 1);
631            for (size_t j=0; j < d[i] -1; ++j)
632                FFLAS::fassign(F, M, A+(currd+j+1)*lda, 1, A + (curri++)*lda, 1);
633            currd += d[i];
634        }
635        for (size_t i=0; i < nb_blocs; ++i)
636            FFLAS::fassign (F, M, tmp + i*ldtmp, 1, A + (curri++)*lda, 1);
637    }
638
639    template <class Field>
640    void DeCompressRowsQA (Field& F, const size_t M, const size_t N,
641                           typename Field::Element_ptr A, const size_t lda,
642                           typename Field::Element_ptr tmp, const size_t ldtmp,
643                           const size_t * d, const size_t nb_blocs)
644    {
645
646        for (size_t i=0; i<nb_blocs; ++i)
647            FFLAS::fassign(F, M, A + (N-nb_blocs+i)*lda, 1, tmp + i*ldtmp, 1);
648
649        size_t w_idx = N - 1;
650        size_t r_idx = N - nb_blocs - 1;
651        int i = int(nb_blocs) ;
652        for (; i--; ){
653            for (size_t j = 0; j<d[i]-1; ++j)
654                FFLAS::fassign (F, M, A + (r_idx--)*lda, 1, A + (w_idx--)*lda, 1);
655            FFLAS::fassign (F, M, tmp + i*(int)ldtmp, 1, A + (w_idx--)*lda, 1);
656        }
657    }
658
659} // Protected
660} //FFPACK
661/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
662// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
663