1 /*  QRreduce.c  */
2 
3 #include "../A2.h"
4 
5 /*--------------------------------------------------------------------*/
6 static int copyIntoVec1 ( A2 *mtxA, double H0[],
7    int msglvl, FILE *msgFile ) ;
8 static double getHouseholderVector1 ( int type, int n, double H0[],
9    double *pbeta0, int msglvl, FILE *msgFile ) ;
10 static double computeW1 ( A2 *mtxA, double H0[], double W0[],
11    int msglvl, FILE *msgFile ) ;
12 static double updateA1 ( A2 *mtxA, double H0[], double beta0,
13    double W0[], int msglvl, FILE *msgFile ) ;
14 /*--------------------------------------------------------------------*/
15 /*
16    --------------------------------------------------------------
17    purpose -- compute A = QR, where Q is a product of householder
18               vectors, (I - beta_j v_j v_j^T). on return, v_j is
19               found in the lower triangle of A, v_j(j) = 1.0.
20 
21    return value -- # of floating point operations
22 
23    created -- 98may25, cca
24    --------------------------------------------------------------
25 */
26 double
A2_QRreduce(A2 * mtxA,DV * workDV,int msglvl,FILE * msgFile)27 A2_QRreduce (
28    A2       *mtxA,
29    DV       *workDV,
30    int      msglvl,
31    FILE     *msgFile
32 ) {
33 A2       tempA ;
34 double   nops ;
35 double   beta0 ;
36 double   *colA, *H0, *W0 ;
37 int      inc1, inc2, jcol, lastrow, length, ncolA, nrowA, nstep ;
38 /*
39    ---------------
40    check the input
41    ---------------
42 */
43 if (   mtxA == NULL || workDV == NULL
44     || (msglvl > 0 && msgFile == NULL) ) {
45    fprintf(stderr, "\n fatal error in A2_QRreduce()"
46            "\n bad input\n") ;
47    exit(-1) ;
48 }
49 if ( ! (A2_IS_REAL(mtxA) || A2_IS_COMPLEX(mtxA)) ) {
50    fprintf(stderr, "\n fatal error in A2_QRreduce()"
51            "\n matrix must be real or complex\n") ;
52    exit(-1) ;
53 }
54 nrowA = A2_nrow(mtxA) ;
55 ncolA = A2_ncol(mtxA) ;
56 inc1  = A2_inc1(mtxA) ;
57 inc2  = A2_inc2(mtxA) ;
58 if ( A2_IS_REAL(mtxA) ) {
59    DV_setSize(workDV, nrowA + ncolA) ;
60    H0 = DV_entries(workDV) ;
61    W0 = H0 + nrowA ;
62 } else if ( A2_IS_COMPLEX(mtxA) ) {
63    DV_setSize(workDV, 2*(nrowA + ncolA)) ;
64    H0 = DV_entries(workDV) ;
65    W0 = H0 + 2*nrowA ;
66 }
67 /*
68    -------------------------------------------------
69    determine the number of steps = min(ncolA, nrowA)
70    -------------------------------------------------
71 */
72 nstep = (ncolA <= nrowA) ? ncolA : nrowA ;
73 /*
74    -------------------
75    loop over the steps
76    -------------------
77 */
78 nops = 0.0 ;
79 for ( jcol = 0 ; jcol < nstep ; jcol++ ) {
80    if ( msglvl > 3 ) {
81       fprintf(msgFile, "\n %% jcol = %d", jcol) ;
82    }
83 /*
84    ----------------------------------
85    copy the column of A into a vector
86    and find the last nonzero element
87    ----------------------------------
88 */
89    A2_subA2(&tempA, mtxA, jcol, nrowA-1, jcol, ncolA-1) ;
90    length = 1 + copyIntoVec1(&tempA, H0, msglvl, msgFile) ;
91    lastrow = jcol + length - 1 ;
92    if ( msglvl > 5 ) {
93       fprintf(msgFile,
94             "\n %% return from copyIntoVec1, length = %d, lastrow = %d",
95             length, lastrow) ;
96    }
97 /*
98    ------------------------------
99    compute the Householder vector
100    and place into the column of A
101    ------------------------------
102 */
103    colA = A2_column(mtxA, jcol) ;
104    if ( A2_IS_REAL(mtxA) ) {
105       nops += getHouseholderVector1(SPOOLES_REAL, length, H0,
106                                     &beta0, msglvl, msgFile) ;
107       A2_subA2(&tempA, mtxA, jcol, lastrow, jcol, jcol) ;
108       A2_setColumn(&tempA, H0, 0) ;
109       H0[0] = 1.0 ;
110    } else if ( A2_IS_COMPLEX(mtxA) ) {
111       nops += getHouseholderVector1(SPOOLES_COMPLEX, length, H0,
112                                     &beta0, msglvl, msgFile) ;
113       A2_subA2(&tempA, mtxA, jcol, lastrow, jcol, jcol) ;
114       A2_setColumn(&tempA, H0, 0) ;
115       H0[0] = 1.0 ; H0[1] = 0.0 ;
116    }
117    if ( msglvl > 5 && jcol == 0 ) {
118       fprintf(msgFile, "\n %% beta0 = %12.4e;", beta0) ;
119    }
120    if ( beta0 != 0.0 && jcol + 1 < ncolA ) {
121       A2_subA2(&tempA, mtxA, jcol, lastrow, jcol+1, ncolA-1) ;
122 /*
123       ------------------------------------------------
124       compute w = v^T * A(jcol:lastrow,jcol+1:nrowA-1)
125       ------------------------------------------------
126 */
127       if ( msglvl > 3 ) {
128          fprintf(msgFile, "\n %% compute w") ;
129       }
130       nops += computeW1(&tempA, H0, W0, msglvl, msgFile) ;
131 /*
132       -------------------------------------------------
133       update A(jcol:lastrow,jcol+1:nrowA-1) -= beta*v*w
134       -------------------------------------------------
135 */
136       if ( msglvl > 3 ) {
137          fprintf(msgFile, "\n %% update A") ;
138       }
139       nops += updateA1(&tempA, H0, beta0, W0, msglvl, msgFile) ;
140    }
141 }
142 return(nops) ; }
143 
144 /*--------------------------------------------------------------------*/
145 /*
146    --------------------------------------------------
147    copy the first column of mtxA into the vector H0[]
148 
149    created -- 98may30, cca
150    --------------------------------------------------
151 */
152 static int
copyIntoVec1(A2 * mtxA,double H0[],int msglvl,FILE * msgFile)153 copyIntoVec1 (
154    A2       *mtxA,
155    double   H0[],
156    int      msglvl,
157    FILE     *msgFile
158 ) {
159 double   ival, rval ;
160 double   *colA ;
161 int      ii, inc1, irow, jj, lastrow, ncolA, nrowA ;
162 /*
163    ----------------------------------
164    copy the column of A into a vector
165    and find the last nonzero element
166    ----------------------------------
167 */
168 nrowA   = mtxA->n1 ;
169 ncolA   = mtxA->n2 ;
170 inc1    = mtxA->inc1 ;
171 lastrow = -1 ;
172 colA    = A2_column(mtxA, 0) ;
173 if ( A2_IS_REAL(mtxA) ) {
174    for ( irow = ii = jj = 0 ;
175          irow < nrowA ;
176          irow++, ii += inc1, jj++ ) {
177       rval = colA[ii] ;
178       if ( rval != 0.0 ) {
179          H0[jj] = rval ;
180          lastrow = irow ;
181       }
182    }
183 } else if ( A2_IS_COMPLEX(mtxA) ) {
184    for ( irow = ii = jj = 0 ;
185          irow < nrowA ;
186          irow++, ii += 2*inc1, jj += 2 ) {
187       rval = colA[ii] ; ival = colA[ii+1] ;
188       if ( rval != 0.0 || ival != 0.0 ) {
189          H0[jj] = rval ; H0[jj+1] = ival ;
190          lastrow = irow ;
191       }
192    }
193 }
194 return(lastrow) ; }
195 
196 /*--------------------------------------------------------------------*/
197 /*
198    ----------------------------------------------------------------
199    compute a householder transformation
200     (I - beta*v*v^H)x = alpha*e_1
201    where v[0] = 1.0
202    on input,
203       H0 contains x,
204    on output,
205       beta0 contains beta
206       H0[0] = alpha
207       H0[1:n-1] = v[1:n] ;
208 
209    created -- 98may30, cca
210    ----------------------------------------------------------------
211 */
212 static double
getHouseholderVector1(int type,int n,double H0[],double * pbeta0,int msglvl,FILE * msgFile)213 getHouseholderVector1 (
214    int      type,
215    int      n,
216    double   H0[],
217    double   *pbeta0,
218    int      msglvl,
219    FILE     *msgFile
220 ) {
221 double   beta0, ifac, ival, nops, normx, rfac, rval, sigma,
222          sum, v0imag, v0real, y0imag, y0real ;
223 int      ii, jj ;
224 /*
225    --------------------------------------------
226    compute ||H0(1:n-1)||_2^2 and the
227    row that contains the last nonzero entry
228    --------------------------------------------
229 */
230 sigma   = 0.0 ;
231 beta0   = 0.0 ;
232 nops    = 0.0 ;
233 if ( type == SPOOLES_REAL ) {
234    for ( ii = 1 ; ii < n ; ii++ ) {
235       rval = H0[ii] ;
236       sigma   += rval*rval ;
237    }
238    nops += 2*(n-1) ;
239 } else if ( type == SPOOLES_COMPLEX ) {
240    for ( ii = 1, jj = 2 ; ii < n ; ii++, jj += 2 ) {
241       rval = H0[jj] ; ival = H0[jj+1] ;
242       sigma   += rval*rval + ival*ival ;
243    }
244    nops += 4*(n-1) ;
245 }
246 if ( msglvl > 3 ) {
247    fprintf(msgFile, "\n %% sigma = %12.4e", sigma) ;
248 }
249 if ( sigma != 0.0 ) {
250 /*
251    --------------------------------------------
252    there are nonzero entries below the diagonal
253    --------------------------------------------
254 */
255    if ( type == SPOOLES_REAL ) {
256       rval = H0[0] ;
257       if ( rval == 0.0 ) {
258          normx  = sqrt(sigma) ;
259          v0real =   normx ;
260          y0real = - normx ;
261          nops++ ;
262       } else {
263          normx  = sqrt(sigma + rval*rval) ;
264          rfac   = normx/fabs(rval) ;
265          v0real = rval*(1 + rfac) ;
266          y0real = -rfac*rval ;
267          nops += 7 ;
268       }
269    } else if ( type == SPOOLES_COMPLEX ) {
270       rval = H0[0] ; ival = H0[1] ;
271       if ( rval == 0.0 && ival == 0.0 ) {
272          normx  = sqrt(sigma) ;
273          v0real =   normx ; v0imag = 0.0 ;
274          y0real = - normx ; y0imag = 0.0 ;
275          nops += 2 ;
276       } else {
277          normx  = sqrt(sigma + rval*rval + ival*ival) ;
278          rfac   = normx/Zabs(rval, ival) ;
279          v0real = rval + rfac*rval ; v0imag = ival + rfac*ival ;
280          y0real = -rfac*rval ;       y0imag = -rfac*ival ;
281          nops += 16 ;
282       }
283    }
284    if ( msglvl > 3 ) {
285       fprintf(msgFile, "\n %% normx = %12.4e", normx) ;
286    }
287 /*
288    -------------------------------------
289    scale u so u1 = 1.0 and compute beta0
290    -------------------------------------
291 */
292    if ( type == SPOOLES_REAL ) {
293       rfac = 1./v0real ;
294       for ( ii = 1 ; ii < n ; ii++ ) {
295          H0[ii] *= rfac ;
296       }
297       sum = 1.0 ;
298       for ( ii = 1 ; ii < n ; ii++ ) {
299          rval = H0[ii] ;
300          sum += rval*rval ;
301       }
302       nops += 3*(n-1) ;
303       beta0 = 2./sum ;
304 /*
305       rfac = 1./v0real ;
306       sum = 1.0 ;
307       for ( ii = 1 ; ii < n ; ii++ ) {
308          rval = H0[ii] = rfac*H0[ii] ;
309          sum += rval*rval ;
310       }
311       nops += 3*(n-1) ;
312       beta0 = 2./sum ;
313 */
314    } else if ( type == SPOOLES_COMPLEX ) {
315       Zrecip(v0real, v0imag, &rfac, &ifac) ;
316       sum = 1.0 ;
317       for ( ii = 1, jj = 2 ; ii < n ; ii++, jj += 2 ) {
318          rval = H0[jj] ; ival = H0[jj+1] ;
319          H0[jj]   = rfac*rval - ifac*ival ;
320          H0[jj+1] = rfac*ival + ifac*rval ;
321          rval = H0[jj] ; ival = H0[jj+1] ;
322          sum += rval*rval + ival*ival ;
323       }
324       nops += 10*(n-1) + 5 ;
325       beta0 = 2./sum ;
326    }
327    if ( msglvl > 2 ) {
328       fprintf(msgFile, "\n %% sum = %12.4e, beta0 = %12.4e",
329               sum, beta0) ;
330    }
331 /*
332    ---------------------------------------------
333    set the first entry of the transformed column
334    ---------------------------------------------
335 */
336    if ( type == SPOOLES_REAL ) {
337       H0[0] = y0real ;
338    } else if ( type == SPOOLES_COMPLEX ) {
339       H0[0] = y0real ; H0[1] = y0imag ;
340    }
341 }
342 *pbeta0 = beta0 ;
343 /*
344 fprintf(msgFile, "\n H0") ;
345 DVfprintf(msgFile, n, H0) ;
346 */
347 
348 return(nops) ; }
349 
350 /*--------------------------------------------------------------------*/
351 /*
352    -----------------------
353    compute W0 = v^H * A
354 
355    created -- 98may30, cca
356    -----------------------
357 */
358 static double
computeW1(A2 * mtxA,double H0[],double W0[],int msglvl,FILE * msgFile)359 computeW1 (
360    A2       *mtxA,
361    double   H0[],
362    double   W0[],
363    int      msglvl,
364    FILE     *msgFile
365 ) {
366 double   nops ;
367 int      inc1, inc2, ncolA, nrowA ;
368 
369 if ( msglvl > 5 ) {
370    fprintf(msgFile, "\n %% inside computeW1, nrow %d, ncol %d",
371            mtxA->n1, mtxA->n2) ;
372 }
373 
374 nrowA = mtxA->n1 ;
375 ncolA = mtxA->n2 ;
376 inc1  = mtxA->inc1 ;
377 inc2  = mtxA->inc2 ;
378 if ( inc1 != 1 && inc2 != 1 ) {
379    fprintf(stderr, "\n error in computeW1"
380            "\n inc1 = %d, inc2 = %d\n", inc1, inc2) ;
381    exit(-1) ;
382 }
383 nops  = 0.0 ;
384 if ( A2_IS_REAL(mtxA) ) {
385    int      irow, jcol ;
386 
387    if ( inc1 == 1 ) {
388       double   sums[3] ;
389       double   *colA0, *colA1, *colA2 ;
390 /*
391       ----------------------------
392       A is column major,
393       compute W(j) = H0^T * A(*,j)
394       ----------------------------
395 */
396       for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
397          colA0 = A2_column(mtxA, jcol)   ;
398          colA1 = A2_column(mtxA, jcol+1) ;
399          colA2 = A2_column(mtxA, jcol+2) ;
400          DVdot13(nrowA, H0, colA0, colA1, colA2, sums) ;
401          W0[jcol]   = sums[0] ;
402          W0[jcol+1] = sums[1] ;
403          W0[jcol+2] = sums[2] ;
404          nops += 6*nrowA ;
405       }
406       if ( jcol == ncolA - 2 ) {
407          colA0 = A2_column(mtxA, jcol)   ;
408          colA1 = A2_column(mtxA, jcol+1) ;
409          DVdot12(nrowA, H0, colA0, colA1, sums) ;
410          W0[jcol]   = sums[0] ;
411          W0[jcol+1] = sums[1] ;
412          nops += 4*nrowA ;
413       } else if ( jcol == ncolA - 1 ) {
414          colA0 = A2_column(mtxA, jcol)   ;
415          DVdot11(nrowA, H0, colA0, sums) ;
416          W0[jcol] = sums[0] ;
417          nops += 2*nrowA ;
418       }
419    } else {
420       double   alpha[3] ;
421       double   *rowA0, *rowA1, *rowA2 ;
422 /*
423       -------------------------------
424       A is row major
425       compute W := W + H0(j) * A(j,*)
426       -------------------------------
427 */
428       DVzero(ncolA, W0) ;
429       for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
430          rowA0 = A2_row(mtxA, irow) ;
431          rowA1 = A2_row(mtxA, irow+1) ;
432          rowA2 = A2_row(mtxA, irow+2) ;
433          alpha[0] = H0[irow]   ;
434          alpha[1] = H0[irow+1] ;
435          alpha[2] = H0[irow+2] ;
436          DVaxpy13(ncolA, W0, alpha, rowA0, rowA1, rowA2) ;
437          nops += 6*ncolA ;
438       }
439       if ( irow == nrowA - 2 ) {
440          rowA0 = A2_row(mtxA, irow) ;
441          rowA1 = A2_row(mtxA, irow+1) ;
442          alpha[0] = H0[irow]   ;
443          alpha[1] = H0[irow+1] ;
444          DVaxpy12(ncolA, W0, alpha, rowA0, rowA1) ;
445          nops += 4*ncolA ;
446       } else if ( irow == nrowA - 1 ) {
447          rowA0 = A2_row(mtxA, irow) ;
448          alpha[0] = H0[irow]   ;
449          DVaxpy11(ncolA, W0, alpha, rowA0) ;
450          nops += 2*ncolA ;
451       }
452    }
453 } else if ( A2_IS_COMPLEX(mtxA) ) {
454    int      irow, jcol ;
455 
456    if ( inc1 == 1 ) {
457       double   sums[6] ;
458       double   *colA0, *colA1, *colA2 ;
459 /*
460       ----------------------------
461       A is column major
462       compute W(j) = H0^H * A(*,j)
463       ----------------------------
464 */
465       for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
466          colA0 = A2_column(mtxA, jcol)   ;
467          colA1 = A2_column(mtxA, jcol+1) ;
468          colA2 = A2_column(mtxA, jcol+2) ;
469          ZVdotC13(nrowA, H0, colA0, colA1, colA2, sums) ;
470          W0[2*jcol]     = sums[0] ; W0[2*jcol+1]     = sums[1] ;
471          W0[2*(jcol+1)] = sums[2] ; W0[2*(jcol+1)+1] = sums[3] ;
472          W0[2*(jcol+2)] = sums[4] ; W0[2*(jcol+2)+1] = sums[5] ;
473          nops += 24*nrowA ;
474       }
475       if ( jcol == ncolA - 2 ) {
476          colA0 = A2_column(mtxA, jcol)   ;
477          colA1 = A2_column(mtxA, jcol+1) ;
478          ZVdotC12(nrowA, H0, colA0, colA1, sums) ;
479          W0[2*jcol]     = sums[0] ; W0[2*jcol+1]     = sums[1] ;
480          W0[2*(jcol+1)] = sums[2] ; W0[2*(jcol+1)+1] = sums[3] ;
481          nops += 16*nrowA ;
482       } else if ( jcol == ncolA - 1 ) {
483          colA0 = A2_column(mtxA, jcol)   ;
484          ZVdotC11(nrowA, H0, colA0, sums) ;
485          W0[2*jcol]     = sums[0] ; W0[2*jcol+1]     = sums[1] ;
486          nops += 8*nrowA ;
487       }
488    } else {
489       double   alpha[6] ;
490       double   *rowA0, *rowA1, *rowA2 ;
491 /*
492       ---------------------------------
493       A is row major
494       compute W := W + H0(j)^H * A(j,*)
495       ---------------------------------
496 */
497       DVzero(2*ncolA, W0) ;
498       for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
499          rowA0 = A2_row(mtxA, irow) ;
500          rowA1 = A2_row(mtxA, irow+1) ;
501          rowA2 = A2_row(mtxA, irow+2) ;
502          alpha[0] = H0[2*irow]     ; alpha[1] = -H0[2*irow+1]   ;
503          alpha[2] = H0[2*(irow+1)] ; alpha[3] = -H0[2*(irow+1)+1] ;
504          alpha[4] = H0[2*(irow+2)] ; alpha[5] = -H0[2*(irow+2)+1] ;
505          ZVaxpy13(ncolA, W0, alpha, rowA0, rowA1, rowA2) ;
506          nops += 24*ncolA ;
507       }
508       if ( irow == nrowA - 2 ) {
509          rowA0 = A2_row(mtxA, irow) ;
510          rowA1 = A2_row(mtxA, irow+1) ;
511          alpha[0] = H0[2*irow]     ; alpha[1] = -H0[2*irow+1]   ;
512          alpha[2] = H0[2*(irow+1)] ; alpha[3] = -H0[2*(irow+1)+1] ;
513          ZVaxpy12(ncolA, W0, alpha, rowA0, rowA1) ;
514          nops += 16*ncolA ;
515       } else if ( irow == nrowA - 1 ) {
516          rowA0 = A2_row(mtxA, irow) ;
517          alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ;
518          ZVaxpy11(ncolA, W0, alpha, rowA0) ;
519          nops += 8*ncolA ;
520       }
521    }
522 }
523 return(nops) ; }
524 
525 /*--------------------------------------------------------------------*/
526 /*
527    ---------------------------------
528    compute A := A - H0 * beta0 * W0
529 
530    created -- 98may30, cca
531    ---------------------------------
532 */
533 static double
updateA1(A2 * mtxA,double H0[],double beta0,double W0[],int msglvl,FILE * msgFile)534 updateA1 (
535    A2       *mtxA,
536    double   H0[],
537    double   beta0,
538    double   W0[],
539    int      msglvl,
540    FILE     *msgFile
541 ) {
542 double   nops ;
543 int      inc1, inc2, ncolA, nrowA ;
544 
545 if ( msglvl > 5 ) {
546    fprintf(msgFile, "\n %% inside updateA1, nrow %d, ncol %d",
547            mtxA->n1, mtxA->n2) ;
548 }
549 
550 nrowA = mtxA->n1 ;
551 ncolA = mtxA->n2 ;
552 inc1  = mtxA->inc1 ;
553 inc2  = mtxA->inc2 ;
554 nops  = 0.0 ;
555 if ( A2_IS_REAL(mtxA) ) {
556    int      irow, jcol ;
557 
558    if ( inc1 == 1 ) {
559       double   alpha[3] ;
560       double   *colA0, *colA1, *colA2 ;
561 /*
562       -----------------------------------------
563       A is column major
564       compute A(:,jcol) -= beta * W0(jcol) * H0
565       -----------------------------------------
566 */
567       for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
568          colA0 = A2_column(mtxA, jcol) ;
569          colA1 = A2_column(mtxA, jcol+1) ;
570          colA2 = A2_column(mtxA, jcol+2) ;
571          alpha[0] = -beta0 * W0[jcol] ;
572          alpha[1] = -beta0 * W0[jcol+1] ;
573          alpha[2] = -beta0 * W0[jcol+2] ;
574          DVaxpy31(nrowA, colA0, colA1, colA2, alpha, H0) ;
575          nops += 6*nrowA ;
576       }
577       if ( jcol == ncolA - 2 ) {
578          colA0 = A2_column(mtxA, jcol) ;
579          colA1 = A2_column(mtxA, jcol+1) ;
580          alpha[0] = -beta0 * W0[jcol] ;
581          alpha[1] = -beta0 * W0[jcol+1] ;
582          DVaxpy21(nrowA, colA0, colA1, alpha, H0) ;
583          nops += 4*nrowA ;
584       } else if ( jcol == ncolA - 1 ) {
585          colA0 = A2_column(mtxA, jcol) ;
586          alpha[0] = -beta0 * W0[jcol] ;
587          DVaxpy11(nrowA, colA0, alpha, H0) ;
588          nops += 2*nrowA ;
589       }
590    } else {
591       double   alpha[3] ;
592       double   *rowA0, *rowA1, *rowA2 ;
593 /*
594       -----------------------------------------
595       A is row major
596       compute A(irow,:) -= H0[irow]*beta0*W0(:)
597       -----------------------------------------
598 */
599       for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
600          rowA0 = A2_row(mtxA, irow) ;
601          rowA1 = A2_row(mtxA, irow+1) ;
602          rowA2 = A2_row(mtxA, irow+2) ;
603          alpha[0] = -beta0 * H0[irow] ;
604          alpha[1] = -beta0 * H0[irow+1] ;
605          alpha[2] = -beta0 * H0[irow+2] ;
606          DVaxpy31(ncolA, rowA0, rowA1, rowA2, alpha, W0) ;
607          nops += 6*ncolA + 3 ;
608       }
609       if ( irow == nrowA - 2 ) {
610          rowA0 = A2_row(mtxA, irow) ;
611          rowA1 = A2_row(mtxA, irow+1) ;
612          alpha[0] = -beta0 * H0[irow] ;
613          alpha[1] = -beta0 * H0[irow+1] ;
614          DVaxpy21(ncolA, rowA0, rowA1, alpha, W0) ;
615          nops += 4*ncolA + 2 ;
616       } else if ( irow == nrowA - 1 ) {
617          rowA0 = A2_row(mtxA, irow) ;
618          alpha[0] = -beta0 * H0[irow] ;
619          DVaxpy11(ncolA, rowA0, alpha, W0) ;
620          nops += 2*ncolA + 1 ;
621       }
622    }
623 } else if ( A2_IS_COMPLEX(mtxA) ) {
624    int      irow, jcol ;
625 
626    if ( inc1 == 1 ) {
627       double   alpha[6] ;
628       double   *colA0, *colA1, *colA2 ;
629 /*
630       -----------------------------------------
631       A is column major
632       compute A(:,jcol) -= beta * W0(jcol) * H0
633       -----------------------------------------
634 */
635       for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
636          colA0 = A2_column(mtxA, jcol) ;
637          colA1 = A2_column(mtxA, jcol+1) ;
638          colA2 = A2_column(mtxA, jcol+2) ;
639          alpha[0] = -beta0 * W0[2*jcol] ;
640          alpha[1] = -beta0 * W0[2*jcol+1] ;
641          alpha[2] = -beta0 * W0[2*(jcol+1)] ;
642          alpha[3] = -beta0 * W0[2*(jcol+1)+1] ;
643          alpha[4] = -beta0 * W0[2*(jcol+2)] ;
644          alpha[5] = -beta0 * W0[2*(jcol+2)+1] ;
645          ZVaxpy31(nrowA, colA0, colA1, colA2, alpha, H0) ;
646          nops += 24*nrowA ;
647       }
648       if ( jcol == ncolA - 2 ) {
649          colA0 = A2_column(mtxA, jcol) ;
650          colA1 = A2_column(mtxA, jcol+1) ;
651          alpha[0] = -beta0 * W0[2*jcol] ;
652          alpha[1] = -beta0 * W0[2*jcol+1] ;
653          alpha[2] = -beta0 * W0[2*(jcol+1)] ;
654          alpha[3] = -beta0 * W0[2*(jcol+1)+1] ;
655          ZVaxpy21(nrowA, colA0, colA1, alpha, H0) ;
656          nops += 16*nrowA ;
657       } else if ( jcol == ncolA - 1 ) {
658          colA0 = A2_column(mtxA, jcol) ;
659          alpha[0] = -beta0 * W0[2*jcol] ;
660          alpha[1] = -beta0 * W0[2*jcol+1] ;
661          ZVaxpy11(nrowA, colA0, alpha, H0) ;
662          nops += 8*nrowA ;
663       }
664    } else {
665       double   alpha[6] ;
666       double   *rowA0, *rowA1, *rowA2 ;
667 /*
668       -----------------------------------------
669       A is row major
670       compute A(irow,:) -= H0[irow]*beta0*W0(:)
671       -----------------------------------------
672 */
673       for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
674          rowA0 = A2_row(mtxA, irow) ;
675          rowA1 = A2_row(mtxA, irow+1) ;
676          rowA2 = A2_row(mtxA, irow+2) ;
677          alpha[0] = -beta0 * H0[2*irow] ;
678          alpha[1] = -beta0 * H0[2*irow+1] ;
679          alpha[2] = -beta0 * H0[2*(irow+1)] ;
680          alpha[3] = -beta0 * H0[2*(irow+1)+1] ;
681          alpha[4] = -beta0 * H0[2*(irow+2)] ;
682          alpha[5] = -beta0 * H0[2*(irow+2)+1] ;
683          ZVaxpy31(ncolA, rowA0, rowA1, rowA2, alpha, W0) ;
684          nops += 24*ncolA + 12 ;
685       }
686       if( irow == nrowA - 2 ) {
687          rowA0 = A2_row(mtxA, irow) ;
688          rowA1 = A2_row(mtxA, irow+1) ;
689          alpha[0] = -beta0 * H0[2*irow] ;
690          alpha[1] = -beta0 * H0[2*irow+1] ;
691          alpha[2] = -beta0 * H0[2*(irow+1)] ;
692          alpha[3] = -beta0 * H0[2*(irow+1)+1] ;
693          ZVaxpy21(ncolA, rowA0, rowA1, alpha, W0) ;
694          nops += 16*ncolA + 8 ;
695       } else if( irow == nrowA - 1 ) {
696          rowA0 = A2_row(mtxA, irow) ;
697          alpha[0] = -beta0 * H0[2*irow] ;
698          alpha[1] = -beta0 * H0[2*irow+1] ;
699          ZVaxpy11(ncolA, rowA0, alpha, W0) ;
700          nops += 8*ncolA + 4 ;
701       }
702    }
703 }
704 return(nops) ; }
705 
706 /*--------------------------------------------------------------------*/
707 /*
708    -----------------------------------------------------------
709    A contains the following data from the A = QR factorization
710 
711    A(1:ncolA,1:ncolA) = R
712    A(j+1:nrowA,j) is v_j, the j-th householder vector,
713        where v_j[j] = 1.0
714 
715    NOTE: A and Q must be column major
716 
717    created -- 98dec10, cca
718    -----------------------------------------------------------
719 */
720 void
A2_computeQ(A2 * Q,A2 * A,DV * workDV,int msglvl,FILE * msgFile)721 A2_computeQ (
722    A2     *Q,
723    A2     *A,
724    DV     *workDV,
725    int    msglvl,
726    FILE   *msgFile
727 ) {
728 double   *betas ;
729 int      irowA, jcolA, ncolA, nrowA ;
730 /*
731    ---------------
732    check the input
733    ---------------
734 */
735 if ( Q == NULL ) {
736    fprintf(stderr, "\n fatal error in A2_computeQ()"
737            "\n Q is NULL\n") ;
738    exit(-1) ;
739 }
740 if ( A == NULL ) {
741    fprintf(stderr, "\n fatal error in A2_computeQ()"
742            "\n A is NULL\n") ;
743    exit(-1) ;
744 }
745 if ( workDV == NULL ) {
746    fprintf(stderr, "\n fatal error in A2_computeQ()"
747            "\n workDV is NULL\n") ;
748    exit(-1) ;
749 }
750 if ( msglvl > 0 && msgFile == NULL ) {
751    fprintf(stderr, "\n fatal error in A2_computeQ()"
752            "\n msglvl > 0 and msgFile is NULL\n") ;
753    exit(-1) ;
754 }
755 nrowA = A2_nrow(A) ;
756 ncolA = A2_ncol(A) ;
757 if ( nrowA <= 0 ) {
758    fprintf(stderr, "\n fatal error in A2_computeQ()"
759            "\n nrowA = %d\n", nrowA) ;
760    exit(-1) ;
761 }
762 if ( ncolA <= 0 ) {
763    fprintf(stderr, "\n fatal error in A2_computeQ()"
764            "\n ncolA = %d\n", nrowA) ;
765    exit(-1) ;
766 }
767 if ( nrowA != A2_nrow(Q) ) {
768    fprintf(stderr, "\n fatal error in A2_computeQ()"
769            "\n nrowA = %d, nrowQ = %d\n", nrowA, A2_nrow(Q)) ;
770    exit(-1) ;
771 }
772 if ( ncolA != A2_ncol(Q) ) {
773    fprintf(stderr, "\n fatal error in A2_computeQ()"
774            "\n ncolA = %d, ncolQ = %d\n", ncolA, A2_ncol(Q)) ;
775    exit(-1) ;
776 }
777 switch ( A->type ) {
778 case SPOOLES_REAL :
779 case SPOOLES_COMPLEX :
780    break ;
781 default :
782    fprintf(stderr, "\n fatal error in A2_computeQ()"
783            "\n invalid type for A\n") ;
784    exit(-1) ;
785 }
786 if ( A->type != Q->type ) {
787    fprintf(stderr, "\n fatal error in A2_computeQ()"
788            "\n A->type = %d, Q->type = %d\n", A->type, Q->type) ;
789    exit(-1) ;
790 }
791 if ( A2_inc1(A) != 1 ) {
792    fprintf(stderr, "\n fatal error in A2_computeQ()"
793            "\n A->inc1 = %d \n", A2_inc1(A)) ;
794    exit(-1) ;
795 }
796 if ( A2_inc1(Q) != 1 ) {
797    fprintf(stderr, "\n fatal error in A2_computeQ()"
798            "\n Q->inc1 = %d, \n", A2_inc1(Q)) ;
799    exit(-1) ;
800 }
801 /*
802    --------------------------------------------------
803    compute the beta values, beta_j = 2./(V_j^H * V_j)
804    --------------------------------------------------
805 */
806 DV_setSize(workDV, ncolA) ;
807 betas = DV_entries(workDV) ;
808 if ( A2_IS_REAL(A) ) {
809    int   irowA, jcolA ;
810    double   sum ;
811    double   *colA ;
812 
813    for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) {
814       sum = 1.0 ;
815       colA = A2_column(A, jcolA) ;
816       for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) {
817          sum += colA[irowA] * colA[irowA] ;
818       }
819       betas[jcolA] = 2./sum ;
820    }
821 } else {
822    double   ival, rval, sum ;
823    double   *colA ;
824 
825    for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) {
826       sum = 1.0 ;
827       colA = A2_column(A, jcolA) ;
828       for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) {
829          rval = colA[2*irowA] ; ival = colA[2*irowA+1] ;
830          sum += rval*rval + ival*ival ;
831       }
832       betas[jcolA] = 2./sum ;
833    }
834 }
835 /*
836    -------------------------------------------
837    loop over the number of householder vectors
838    -------------------------------------------
839 */
840 for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) {
841    double   *V, *X ;
842    int      jcolV ;
843    if ( msglvl > 2 ) {
844       fprintf(msgFile, "\n\n %% jcolA = %d", jcolA) ;
845       fflush(msgFile) ;
846    }
847 /*
848    ------------------
849    set X[] to e_jcolA
850    ------------------
851 */
852    X = A2_column(Q, jcolA) ;
853    if ( A2_IS_REAL(Q) ) {
854       DVzero(nrowA, X) ;
855       X[jcolA] = 1.0 ;
856    } else {
857       DVzero(2*nrowA, X) ;
858       X[2*jcolA] = 1.0 ;
859    }
860    for ( jcolV = jcolA ; jcolV >= 0 ; jcolV-- ) {
861       double   beta ;
862       if ( msglvl > 2 ) {
863          fprintf(msgFile, "\n   %% jcolV = %d", jcolV) ;
864          fflush(msgFile) ;
865       }
866 /*
867       -----------------------------------------------------
868       update X = (I - beta_jcolV * V_jcolV * V_jcolV^T)X
869                = X - beta_jcolV * V_jcolV * V_jcolV^T * X
870                = X - (beta_jcolV * V_jcolV^T * X) * V_jcolV
871       -----------------------------------------------------
872 */
873       V = A2_column(A, jcolV) ;
874       beta = betas[jcolV] ;
875       if ( msglvl > 2 ) {
876          fprintf(msgFile, "\n   %% beta = %12.4e", beta) ;
877          fflush(msgFile) ;
878       }
879       if ( A2_IS_REAL(Q) ) {
880          double   fac, sum = X[jcolV] ;
881          int      irow ;
882          for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
883             if ( msglvl > 2 ) {
884                fprintf(msgFile,
885                        "\n      %% V[%d] = %12.4e, X[%d] = %12.4e",
886                        irow, V[irow], irow, X[irow]) ;
887                fflush(msgFile) ;
888             }
889             sum += V[irow] * X[irow] ;
890          }
891          if ( msglvl > 2 ) {
892             fprintf(msgFile, "\n   %% sum = %12.4e", sum) ;
893             fflush(msgFile) ;
894          }
895          fac = beta * sum ;
896          if ( msglvl > 2 ) {
897             fprintf(msgFile, "\n   %% fac = %12.4e", fac) ;
898             fflush(msgFile) ;
899          }
900          X[jcolV] -= fac ;
901          for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
902             X[irow] -= fac * V[irow] ;
903          }
904       } else {
905          double   rfac, ifac, rsum = X[2*jcolV], isum = X[2*jcolV+1] ;
906          int      irow ;
907          for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
908             double   Vi, Vr, Xi, Xr ;
909             Vr = V[2*irow] ; Vi = V[2*irow+1] ;
910             Xr = X[2*irow] ; Xi = X[2*irow+1] ;
911             rsum += Vr*Xr + Vi*Xi ;
912             isum += Vr*Xi - Vi*Xr ;
913          }
914          rfac = beta * rsum ;
915          ifac = beta * isum ;
916          X[2*jcolV]   -= rfac ;
917          X[2*jcolV+1] -= ifac ;
918          for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
919             double   Vi, Vr ;
920             Vr = V[2*irow] ; Vi = V[2*irow+1] ;
921             X[2*irow]   -= rfac*Vr - ifac*Vi ;
922             X[2*irow+1] -= rfac*Vi + ifac*Vr ;
923          }
924       }
925    }
926 }
927 return ; }
928 
929 /*--------------------------------------------------------------------*/
930 /*
931    -----------------------------------------------------------
932    A contains the following data from the A = QR factorization
933 
934    A(1:ncolA,1:ncolA) = R
935    A(j+1:nrowA,j) is v_j, the j-th householder vector,
936        where v_j[j] = 1.0
937 
938    we compute Y = Q^T X when A is real
939           and Y = Q^H X when A is complex
940 
941    NOTE: A, Y and X must be column major.
942    NOTE: Y and X can be the same object,
943          in which case X is overwritten with Y
944 
945    created -- 98dec10, cca
946    -----------------------------------------------------------
947 */
948 void
A2_applyQT(A2 * Y,A2 * A,A2 * X,DV * workDV,int msglvl,FILE * msgFile)949 A2_applyQT (
950    A2     *Y,
951    A2     *A,
952    A2     *X,
953    DV     *workDV,
954    int    msglvl,
955    FILE   *msgFile
956 ) {
957 double   *betas ;
958 int      irowA, jcolA, jcolX, ncolA, ncolX, nrowA ;
959 /*
960    ---------------
961    check the input
962    ---------------
963 */
964 if ( A == NULL ) {
965    fprintf(stderr, "\n fatal error in A2_applyQT()"
966            "\n A is NULL\n") ;
967    exit(-1) ;
968 }
969 if ( X == NULL ) {
970    fprintf(stderr, "\n fatal error in A2_applyQT()"
971            "\n X is NULL\n") ;
972    exit(-1) ;
973 }
974 if ( workDV == NULL ) {
975    fprintf(stderr, "\n fatal error in A2_applyQT()"
976            "\n workDV is NULL\n") ;
977    exit(-1) ;
978 }
979 if ( msglvl > 0 && msgFile == NULL ) {
980    fprintf(stderr, "\n fatal error in A2_applyQT()"
981            "\n msglvl > 0 and msgFile is NULL\n") ;
982    exit(-1) ;
983 }
984 nrowA = A2_nrow(A) ;
985 ncolA = A2_ncol(A) ;
986 ncolX = A2_ncol(X) ;
987 if ( nrowA <= 0 ) {
988    fprintf(stderr, "\n fatal error in A2_applyQT()"
989            "\n nrowA = %d\n", nrowA) ;
990    exit(-1) ;
991 }
992 if ( ncolA <= 0 ) {
993    fprintf(stderr, "\n fatal error in A2_applyQT()"
994            "\n ncolA = %d\n", nrowA) ;
995    exit(-1) ;
996 }
997 if ( nrowA != A2_nrow(X) ) {
998    fprintf(stderr, "\n fatal error in A2_applyQT()"
999            "\n nrowA = %d, nrowX = %d\n", nrowA, A2_nrow(X)) ;
1000    exit(-1) ;
1001 }
1002 switch ( A->type ) {
1003 case SPOOLES_REAL :
1004 case SPOOLES_COMPLEX :
1005    break ;
1006 default :
1007    fprintf(stderr, "\n fatal error in A2_applyQT()"
1008            "\n invalid type for A\n") ;
1009    exit(-1) ;
1010 }
1011 if ( A->type != X->type ) {
1012    fprintf(stderr, "\n fatal error in A2_applyQT()"
1013            "\n A->type = %d, X->type = %d\n", A->type, X->type) ;
1014    exit(-1) ;
1015 }
1016 if ( A2_inc1(A) != 1 ) {
1017    fprintf(stderr, "\n fatal error in A2_applyQT()"
1018            "\n A->inc1 = %d \n", A2_inc1(A)) ;
1019    exit(-1) ;
1020 }
1021 if ( A2_inc1(X) != 1 ) {
1022    fprintf(stderr, "\n fatal error in A2_applyQT()"
1023            "\n X->inc1 = %d, \n", A2_inc1(X)) ;
1024    exit(-1) ;
1025 }
1026 /*
1027    --------------------------------------------------
1028    compute the beta values, beta_j = 2./(V_j^H * V_j)
1029    --------------------------------------------------
1030 */
1031 DV_setSize(workDV, ncolA) ;
1032 betas = DV_entries(workDV) ;
1033 if ( A2_IS_REAL(A) ) {
1034    int   irowA, jcolA ;
1035    double   sum ;
1036    double   *colA ;
1037 
1038    for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) {
1039       sum = 1.0 ;
1040       colA = A2_column(A, jcolA) ;
1041       for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) {
1042          sum += colA[irowA] * colA[irowA] ;
1043       }
1044       betas[jcolA] = 2./sum ;
1045    }
1046 } else {
1047    double   ival, rval, sum ;
1048    double   *colA ;
1049 
1050    for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) {
1051       sum = 1.0 ;
1052       colA = A2_column(A, jcolA) ;
1053       for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) {
1054          rval = colA[2*irowA] ; ival = colA[2*irowA+1] ;
1055          sum += rval*rval + ival*ival ;
1056       }
1057       betas[jcolA] = 2./sum ;
1058    }
1059 }
1060 /*
1061    ------------------------------------------
1062    loop over the number of columns in X and Y
1063    ------------------------------------------
1064 */
1065 for ( jcolX = 0 ; jcolX < ncolX ; jcolX++ ) {
1066    double   *V, *colX, *colY ;
1067    int      jcolV ;
1068    if ( msglvl > 2 ) {
1069       fprintf(msgFile, "\n\n %% jcolX = %d", jcolX) ;
1070       fflush(msgFile) ;
1071    }
1072 /*
1073    -------------------------------
1074    copy X(:,jcolX) into Y(:,jcolX)
1075    -------------------------------
1076 */
1077    colY = A2_column(Y, jcolX) ;
1078    colX = A2_column(X, jcolX) ;
1079    if ( A2_IS_REAL(A) ) {
1080       DVcopy(nrowA, colY, colX) ;
1081    } else {
1082       DVcopy(2*nrowA, colY, colX) ;
1083    }
1084    for ( jcolV = 0 ; jcolV < ncolA ; jcolV++ ) {
1085       double   beta ;
1086       if ( msglvl > 2 ) {
1087          fprintf(msgFile, "\n   %% jcolV = %d", jcolV) ;
1088          fflush(msgFile) ;
1089       }
1090 /*
1091       ------------------------------------------------------------
1092       update colY = (I - beta_jcolV * V_jcolV * V_jcolV^T)colY
1093                   = colY - beta_jcolV * V_jcolV * V_jcolV^T * colY
1094                   = colY - (beta_jcolV * V_jcolV^T * Y) * V_jcolV
1095       ------------------------------------------------------------
1096 */
1097       V = A2_column(A, jcolV) ;
1098       beta = betas[jcolV] ;
1099       if ( msglvl > 2 ) {
1100          fprintf(msgFile, "\n   %% beta = %12.4e", beta) ;
1101          fflush(msgFile) ;
1102       }
1103       if ( A2_IS_REAL(A) ) {
1104          double   fac, sum = colY[jcolV] ;
1105          int      irow ;
1106          for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
1107             if ( msglvl > 2 ) {
1108                fprintf(msgFile,
1109                        "\n      %% V[%d] = %12.4e, X[%d] = %12.4e",
1110                        irow, V[irow], irow, colY[irow]) ;
1111                fflush(msgFile) ;
1112             }
1113             sum += V[irow] * colY[irow] ;
1114          }
1115          if ( msglvl > 2 ) {
1116             fprintf(msgFile, "\n   %% sum = %12.4e", sum) ;
1117             fflush(msgFile) ;
1118          }
1119          fac = beta * sum ;
1120          if ( msglvl > 2 ) {
1121             fprintf(msgFile, "\n   %% fac = %12.4e", fac) ;
1122             fflush(msgFile) ;
1123          }
1124          colY[jcolV] -= fac ;
1125          for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
1126             colY[irow] -= fac * V[irow] ;
1127          }
1128       } else {
1129          double   rfac, ifac,
1130                   rsum = colY[2*jcolV], isum = colY[2*jcolV+1] ;
1131          int      irow ;
1132          for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
1133             double   Vi, Vr, Yi, Yr ;
1134             Vr = V[2*irow] ; Vi = V[2*irow+1] ;
1135             Yr = colY[2*irow] ; Yi = colY[2*irow+1] ;
1136             rsum += Vr*Yr + Vi*Yi ;
1137             isum += Vr*Yi - Vi*Yr ;
1138          }
1139          rfac = beta * rsum ;
1140          ifac = beta * isum ;
1141          colY[2*jcolV]   -= rfac ;
1142          colY[2*jcolV+1] -= ifac ;
1143          for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
1144             double   Vi, Vr ;
1145             Vr = V[2*irow] ; Vi = V[2*irow+1] ;
1146             colY[2*irow]   -= rfac*Vr - ifac*Vi ;
1147             colY[2*irow+1] -= rfac*Vi + ifac*Vr ;
1148          }
1149       }
1150    }
1151 }
1152 return ; }
1153 
1154 /*--------------------------------------------------------------------*/
1155