1 /* QRreduce.c */
2
3 #include "../A2.h"
4
5 /*--------------------------------------------------------------------*/
6 static int copyIntoVec1 ( A2 *mtxA, double H0[],
7 int msglvl, FILE *msgFile ) ;
8 static double getHouseholderVector1 ( int type, int n, double H0[],
9 double *pbeta0, int msglvl, FILE *msgFile ) ;
10 static double computeW1 ( A2 *mtxA, double H0[], double W0[],
11 int msglvl, FILE *msgFile ) ;
12 static double updateA1 ( A2 *mtxA, double H0[], double beta0,
13 double W0[], int msglvl, FILE *msgFile ) ;
14 /*--------------------------------------------------------------------*/
15 /*
16 --------------------------------------------------------------
17 purpose -- compute A = QR, where Q is a product of householder
18 vectors, (I - beta_j v_j v_j^T). on return, v_j is
19 found in the lower triangle of A, v_j(j) = 1.0.
20
21 return value -- # of floating point operations
22
23 created -- 98may25, cca
24 --------------------------------------------------------------
25 */
26 double
A2_QRreduce(A2 * mtxA,DV * workDV,int msglvl,FILE * msgFile)27 A2_QRreduce (
28 A2 *mtxA,
29 DV *workDV,
30 int msglvl,
31 FILE *msgFile
32 ) {
33 A2 tempA ;
34 double nops ;
35 double beta0 ;
36 double *colA, *H0, *W0 ;
37 int inc1, inc2, jcol, lastrow, length, ncolA, nrowA, nstep ;
38 /*
39 ---------------
40 check the input
41 ---------------
42 */
43 if ( mtxA == NULL || workDV == NULL
44 || (msglvl > 0 && msgFile == NULL) ) {
45 fprintf(stderr, "\n fatal error in A2_QRreduce()"
46 "\n bad input\n") ;
47 exit(-1) ;
48 }
49 if ( ! (A2_IS_REAL(mtxA) || A2_IS_COMPLEX(mtxA)) ) {
50 fprintf(stderr, "\n fatal error in A2_QRreduce()"
51 "\n matrix must be real or complex\n") ;
52 exit(-1) ;
53 }
54 nrowA = A2_nrow(mtxA) ;
55 ncolA = A2_ncol(mtxA) ;
56 inc1 = A2_inc1(mtxA) ;
57 inc2 = A2_inc2(mtxA) ;
58 if ( A2_IS_REAL(mtxA) ) {
59 DV_setSize(workDV, nrowA + ncolA) ;
60 H0 = DV_entries(workDV) ;
61 W0 = H0 + nrowA ;
62 } else if ( A2_IS_COMPLEX(mtxA) ) {
63 DV_setSize(workDV, 2*(nrowA + ncolA)) ;
64 H0 = DV_entries(workDV) ;
65 W0 = H0 + 2*nrowA ;
66 }
67 /*
68 -------------------------------------------------
69 determine the number of steps = min(ncolA, nrowA)
70 -------------------------------------------------
71 */
72 nstep = (ncolA <= nrowA) ? ncolA : nrowA ;
73 /*
74 -------------------
75 loop over the steps
76 -------------------
77 */
78 nops = 0.0 ;
79 for ( jcol = 0 ; jcol < nstep ; jcol++ ) {
80 if ( msglvl > 3 ) {
81 fprintf(msgFile, "\n %% jcol = %d", jcol) ;
82 }
83 /*
84 ----------------------------------
85 copy the column of A into a vector
86 and find the last nonzero element
87 ----------------------------------
88 */
89 A2_subA2(&tempA, mtxA, jcol, nrowA-1, jcol, ncolA-1) ;
90 length = 1 + copyIntoVec1(&tempA, H0, msglvl, msgFile) ;
91 lastrow = jcol + length - 1 ;
92 if ( msglvl > 5 ) {
93 fprintf(msgFile,
94 "\n %% return from copyIntoVec1, length = %d, lastrow = %d",
95 length, lastrow) ;
96 }
97 /*
98 ------------------------------
99 compute the Householder vector
100 and place into the column of A
101 ------------------------------
102 */
103 colA = A2_column(mtxA, jcol) ;
104 if ( A2_IS_REAL(mtxA) ) {
105 nops += getHouseholderVector1(SPOOLES_REAL, length, H0,
106 &beta0, msglvl, msgFile) ;
107 A2_subA2(&tempA, mtxA, jcol, lastrow, jcol, jcol) ;
108 A2_setColumn(&tempA, H0, 0) ;
109 H0[0] = 1.0 ;
110 } else if ( A2_IS_COMPLEX(mtxA) ) {
111 nops += getHouseholderVector1(SPOOLES_COMPLEX, length, H0,
112 &beta0, msglvl, msgFile) ;
113 A2_subA2(&tempA, mtxA, jcol, lastrow, jcol, jcol) ;
114 A2_setColumn(&tempA, H0, 0) ;
115 H0[0] = 1.0 ; H0[1] = 0.0 ;
116 }
117 if ( msglvl > 5 && jcol == 0 ) {
118 fprintf(msgFile, "\n %% beta0 = %12.4e;", beta0) ;
119 }
120 if ( beta0 != 0.0 && jcol + 1 < ncolA ) {
121 A2_subA2(&tempA, mtxA, jcol, lastrow, jcol+1, ncolA-1) ;
122 /*
123 ------------------------------------------------
124 compute w = v^T * A(jcol:lastrow,jcol+1:nrowA-1)
125 ------------------------------------------------
126 */
127 if ( msglvl > 3 ) {
128 fprintf(msgFile, "\n %% compute w") ;
129 }
130 nops += computeW1(&tempA, H0, W0, msglvl, msgFile) ;
131 /*
132 -------------------------------------------------
133 update A(jcol:lastrow,jcol+1:nrowA-1) -= beta*v*w
134 -------------------------------------------------
135 */
136 if ( msglvl > 3 ) {
137 fprintf(msgFile, "\n %% update A") ;
138 }
139 nops += updateA1(&tempA, H0, beta0, W0, msglvl, msgFile) ;
140 }
141 }
142 return(nops) ; }
143
144 /*--------------------------------------------------------------------*/
145 /*
146 --------------------------------------------------
147 copy the first column of mtxA into the vector H0[]
148
149 created -- 98may30, cca
150 --------------------------------------------------
151 */
152 static int
copyIntoVec1(A2 * mtxA,double H0[],int msglvl,FILE * msgFile)153 copyIntoVec1 (
154 A2 *mtxA,
155 double H0[],
156 int msglvl,
157 FILE *msgFile
158 ) {
159 double ival, rval ;
160 double *colA ;
161 int ii, inc1, irow, jj, lastrow, ncolA, nrowA ;
162 /*
163 ----------------------------------
164 copy the column of A into a vector
165 and find the last nonzero element
166 ----------------------------------
167 */
168 nrowA = mtxA->n1 ;
169 ncolA = mtxA->n2 ;
170 inc1 = mtxA->inc1 ;
171 lastrow = -1 ;
172 colA = A2_column(mtxA, 0) ;
173 if ( A2_IS_REAL(mtxA) ) {
174 for ( irow = ii = jj = 0 ;
175 irow < nrowA ;
176 irow++, ii += inc1, jj++ ) {
177 rval = colA[ii] ;
178 if ( rval != 0.0 ) {
179 H0[jj] = rval ;
180 lastrow = irow ;
181 }
182 }
183 } else if ( A2_IS_COMPLEX(mtxA) ) {
184 for ( irow = ii = jj = 0 ;
185 irow < nrowA ;
186 irow++, ii += 2*inc1, jj += 2 ) {
187 rval = colA[ii] ; ival = colA[ii+1] ;
188 if ( rval != 0.0 || ival != 0.0 ) {
189 H0[jj] = rval ; H0[jj+1] = ival ;
190 lastrow = irow ;
191 }
192 }
193 }
194 return(lastrow) ; }
195
196 /*--------------------------------------------------------------------*/
197 /*
198 ----------------------------------------------------------------
199 compute a householder transformation
200 (I - beta*v*v^H)x = alpha*e_1
201 where v[0] = 1.0
202 on input,
203 H0 contains x,
204 on output,
205 beta0 contains beta
206 H0[0] = alpha
207 H0[1:n-1] = v[1:n] ;
208
209 created -- 98may30, cca
210 ----------------------------------------------------------------
211 */
212 static double
getHouseholderVector1(int type,int n,double H0[],double * pbeta0,int msglvl,FILE * msgFile)213 getHouseholderVector1 (
214 int type,
215 int n,
216 double H0[],
217 double *pbeta0,
218 int msglvl,
219 FILE *msgFile
220 ) {
221 double beta0, ifac, ival, nops, normx, rfac, rval, sigma,
222 sum, v0imag, v0real, y0imag, y0real ;
223 int ii, jj ;
224 /*
225 --------------------------------------------
226 compute ||H0(1:n-1)||_2^2 and the
227 row that contains the last nonzero entry
228 --------------------------------------------
229 */
230 sigma = 0.0 ;
231 beta0 = 0.0 ;
232 nops = 0.0 ;
233 if ( type == SPOOLES_REAL ) {
234 for ( ii = 1 ; ii < n ; ii++ ) {
235 rval = H0[ii] ;
236 sigma += rval*rval ;
237 }
238 nops += 2*(n-1) ;
239 } else if ( type == SPOOLES_COMPLEX ) {
240 for ( ii = 1, jj = 2 ; ii < n ; ii++, jj += 2 ) {
241 rval = H0[jj] ; ival = H0[jj+1] ;
242 sigma += rval*rval + ival*ival ;
243 }
244 nops += 4*(n-1) ;
245 }
246 if ( msglvl > 3 ) {
247 fprintf(msgFile, "\n %% sigma = %12.4e", sigma) ;
248 }
249 if ( sigma != 0.0 ) {
250 /*
251 --------------------------------------------
252 there are nonzero entries below the diagonal
253 --------------------------------------------
254 */
255 if ( type == SPOOLES_REAL ) {
256 rval = H0[0] ;
257 if ( rval == 0.0 ) {
258 normx = sqrt(sigma) ;
259 v0real = normx ;
260 y0real = - normx ;
261 nops++ ;
262 } else {
263 normx = sqrt(sigma + rval*rval) ;
264 rfac = normx/fabs(rval) ;
265 v0real = rval*(1 + rfac) ;
266 y0real = -rfac*rval ;
267 nops += 7 ;
268 }
269 } else if ( type == SPOOLES_COMPLEX ) {
270 rval = H0[0] ; ival = H0[1] ;
271 if ( rval == 0.0 && ival == 0.0 ) {
272 normx = sqrt(sigma) ;
273 v0real = normx ; v0imag = 0.0 ;
274 y0real = - normx ; y0imag = 0.0 ;
275 nops += 2 ;
276 } else {
277 normx = sqrt(sigma + rval*rval + ival*ival) ;
278 rfac = normx/Zabs(rval, ival) ;
279 v0real = rval + rfac*rval ; v0imag = ival + rfac*ival ;
280 y0real = -rfac*rval ; y0imag = -rfac*ival ;
281 nops += 16 ;
282 }
283 }
284 if ( msglvl > 3 ) {
285 fprintf(msgFile, "\n %% normx = %12.4e", normx) ;
286 }
287 /*
288 -------------------------------------
289 scale u so u1 = 1.0 and compute beta0
290 -------------------------------------
291 */
292 if ( type == SPOOLES_REAL ) {
293 rfac = 1./v0real ;
294 for ( ii = 1 ; ii < n ; ii++ ) {
295 H0[ii] *= rfac ;
296 }
297 sum = 1.0 ;
298 for ( ii = 1 ; ii < n ; ii++ ) {
299 rval = H0[ii] ;
300 sum += rval*rval ;
301 }
302 nops += 3*(n-1) ;
303 beta0 = 2./sum ;
304 /*
305 rfac = 1./v0real ;
306 sum = 1.0 ;
307 for ( ii = 1 ; ii < n ; ii++ ) {
308 rval = H0[ii] = rfac*H0[ii] ;
309 sum += rval*rval ;
310 }
311 nops += 3*(n-1) ;
312 beta0 = 2./sum ;
313 */
314 } else if ( type == SPOOLES_COMPLEX ) {
315 Zrecip(v0real, v0imag, &rfac, &ifac) ;
316 sum = 1.0 ;
317 for ( ii = 1, jj = 2 ; ii < n ; ii++, jj += 2 ) {
318 rval = H0[jj] ; ival = H0[jj+1] ;
319 H0[jj] = rfac*rval - ifac*ival ;
320 H0[jj+1] = rfac*ival + ifac*rval ;
321 rval = H0[jj] ; ival = H0[jj+1] ;
322 sum += rval*rval + ival*ival ;
323 }
324 nops += 10*(n-1) + 5 ;
325 beta0 = 2./sum ;
326 }
327 if ( msglvl > 2 ) {
328 fprintf(msgFile, "\n %% sum = %12.4e, beta0 = %12.4e",
329 sum, beta0) ;
330 }
331 /*
332 ---------------------------------------------
333 set the first entry of the transformed column
334 ---------------------------------------------
335 */
336 if ( type == SPOOLES_REAL ) {
337 H0[0] = y0real ;
338 } else if ( type == SPOOLES_COMPLEX ) {
339 H0[0] = y0real ; H0[1] = y0imag ;
340 }
341 }
342 *pbeta0 = beta0 ;
343 /*
344 fprintf(msgFile, "\n H0") ;
345 DVfprintf(msgFile, n, H0) ;
346 */
347
348 return(nops) ; }
349
350 /*--------------------------------------------------------------------*/
351 /*
352 -----------------------
353 compute W0 = v^H * A
354
355 created -- 98may30, cca
356 -----------------------
357 */
358 static double
computeW1(A2 * mtxA,double H0[],double W0[],int msglvl,FILE * msgFile)359 computeW1 (
360 A2 *mtxA,
361 double H0[],
362 double W0[],
363 int msglvl,
364 FILE *msgFile
365 ) {
366 double nops ;
367 int inc1, inc2, ncolA, nrowA ;
368
369 if ( msglvl > 5 ) {
370 fprintf(msgFile, "\n %% inside computeW1, nrow %d, ncol %d",
371 mtxA->n1, mtxA->n2) ;
372 }
373
374 nrowA = mtxA->n1 ;
375 ncolA = mtxA->n2 ;
376 inc1 = mtxA->inc1 ;
377 inc2 = mtxA->inc2 ;
378 if ( inc1 != 1 && inc2 != 1 ) {
379 fprintf(stderr, "\n error in computeW1"
380 "\n inc1 = %d, inc2 = %d\n", inc1, inc2) ;
381 exit(-1) ;
382 }
383 nops = 0.0 ;
384 if ( A2_IS_REAL(mtxA) ) {
385 int irow, jcol ;
386
387 if ( inc1 == 1 ) {
388 double sums[3] ;
389 double *colA0, *colA1, *colA2 ;
390 /*
391 ----------------------------
392 A is column major,
393 compute W(j) = H0^T * A(*,j)
394 ----------------------------
395 */
396 for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
397 colA0 = A2_column(mtxA, jcol) ;
398 colA1 = A2_column(mtxA, jcol+1) ;
399 colA2 = A2_column(mtxA, jcol+2) ;
400 DVdot13(nrowA, H0, colA0, colA1, colA2, sums) ;
401 W0[jcol] = sums[0] ;
402 W0[jcol+1] = sums[1] ;
403 W0[jcol+2] = sums[2] ;
404 nops += 6*nrowA ;
405 }
406 if ( jcol == ncolA - 2 ) {
407 colA0 = A2_column(mtxA, jcol) ;
408 colA1 = A2_column(mtxA, jcol+1) ;
409 DVdot12(nrowA, H0, colA0, colA1, sums) ;
410 W0[jcol] = sums[0] ;
411 W0[jcol+1] = sums[1] ;
412 nops += 4*nrowA ;
413 } else if ( jcol == ncolA - 1 ) {
414 colA0 = A2_column(mtxA, jcol) ;
415 DVdot11(nrowA, H0, colA0, sums) ;
416 W0[jcol] = sums[0] ;
417 nops += 2*nrowA ;
418 }
419 } else {
420 double alpha[3] ;
421 double *rowA0, *rowA1, *rowA2 ;
422 /*
423 -------------------------------
424 A is row major
425 compute W := W + H0(j) * A(j,*)
426 -------------------------------
427 */
428 DVzero(ncolA, W0) ;
429 for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
430 rowA0 = A2_row(mtxA, irow) ;
431 rowA1 = A2_row(mtxA, irow+1) ;
432 rowA2 = A2_row(mtxA, irow+2) ;
433 alpha[0] = H0[irow] ;
434 alpha[1] = H0[irow+1] ;
435 alpha[2] = H0[irow+2] ;
436 DVaxpy13(ncolA, W0, alpha, rowA0, rowA1, rowA2) ;
437 nops += 6*ncolA ;
438 }
439 if ( irow == nrowA - 2 ) {
440 rowA0 = A2_row(mtxA, irow) ;
441 rowA1 = A2_row(mtxA, irow+1) ;
442 alpha[0] = H0[irow] ;
443 alpha[1] = H0[irow+1] ;
444 DVaxpy12(ncolA, W0, alpha, rowA0, rowA1) ;
445 nops += 4*ncolA ;
446 } else if ( irow == nrowA - 1 ) {
447 rowA0 = A2_row(mtxA, irow) ;
448 alpha[0] = H0[irow] ;
449 DVaxpy11(ncolA, W0, alpha, rowA0) ;
450 nops += 2*ncolA ;
451 }
452 }
453 } else if ( A2_IS_COMPLEX(mtxA) ) {
454 int irow, jcol ;
455
456 if ( inc1 == 1 ) {
457 double sums[6] ;
458 double *colA0, *colA1, *colA2 ;
459 /*
460 ----------------------------
461 A is column major
462 compute W(j) = H0^H * A(*,j)
463 ----------------------------
464 */
465 for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
466 colA0 = A2_column(mtxA, jcol) ;
467 colA1 = A2_column(mtxA, jcol+1) ;
468 colA2 = A2_column(mtxA, jcol+2) ;
469 ZVdotC13(nrowA, H0, colA0, colA1, colA2, sums) ;
470 W0[2*jcol] = sums[0] ; W0[2*jcol+1] = sums[1] ;
471 W0[2*(jcol+1)] = sums[2] ; W0[2*(jcol+1)+1] = sums[3] ;
472 W0[2*(jcol+2)] = sums[4] ; W0[2*(jcol+2)+1] = sums[5] ;
473 nops += 24*nrowA ;
474 }
475 if ( jcol == ncolA - 2 ) {
476 colA0 = A2_column(mtxA, jcol) ;
477 colA1 = A2_column(mtxA, jcol+1) ;
478 ZVdotC12(nrowA, H0, colA0, colA1, sums) ;
479 W0[2*jcol] = sums[0] ; W0[2*jcol+1] = sums[1] ;
480 W0[2*(jcol+1)] = sums[2] ; W0[2*(jcol+1)+1] = sums[3] ;
481 nops += 16*nrowA ;
482 } else if ( jcol == ncolA - 1 ) {
483 colA0 = A2_column(mtxA, jcol) ;
484 ZVdotC11(nrowA, H0, colA0, sums) ;
485 W0[2*jcol] = sums[0] ; W0[2*jcol+1] = sums[1] ;
486 nops += 8*nrowA ;
487 }
488 } else {
489 double alpha[6] ;
490 double *rowA0, *rowA1, *rowA2 ;
491 /*
492 ---------------------------------
493 A is row major
494 compute W := W + H0(j)^H * A(j,*)
495 ---------------------------------
496 */
497 DVzero(2*ncolA, W0) ;
498 for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
499 rowA0 = A2_row(mtxA, irow) ;
500 rowA1 = A2_row(mtxA, irow+1) ;
501 rowA2 = A2_row(mtxA, irow+2) ;
502 alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ;
503 alpha[2] = H0[2*(irow+1)] ; alpha[3] = -H0[2*(irow+1)+1] ;
504 alpha[4] = H0[2*(irow+2)] ; alpha[5] = -H0[2*(irow+2)+1] ;
505 ZVaxpy13(ncolA, W0, alpha, rowA0, rowA1, rowA2) ;
506 nops += 24*ncolA ;
507 }
508 if ( irow == nrowA - 2 ) {
509 rowA0 = A2_row(mtxA, irow) ;
510 rowA1 = A2_row(mtxA, irow+1) ;
511 alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ;
512 alpha[2] = H0[2*(irow+1)] ; alpha[3] = -H0[2*(irow+1)+1] ;
513 ZVaxpy12(ncolA, W0, alpha, rowA0, rowA1) ;
514 nops += 16*ncolA ;
515 } else if ( irow == nrowA - 1 ) {
516 rowA0 = A2_row(mtxA, irow) ;
517 alpha[0] = H0[2*irow] ; alpha[1] = -H0[2*irow+1] ;
518 ZVaxpy11(ncolA, W0, alpha, rowA0) ;
519 nops += 8*ncolA ;
520 }
521 }
522 }
523 return(nops) ; }
524
525 /*--------------------------------------------------------------------*/
526 /*
527 ---------------------------------
528 compute A := A - H0 * beta0 * W0
529
530 created -- 98may30, cca
531 ---------------------------------
532 */
533 static double
updateA1(A2 * mtxA,double H0[],double beta0,double W0[],int msglvl,FILE * msgFile)534 updateA1 (
535 A2 *mtxA,
536 double H0[],
537 double beta0,
538 double W0[],
539 int msglvl,
540 FILE *msgFile
541 ) {
542 double nops ;
543 int inc1, inc2, ncolA, nrowA ;
544
545 if ( msglvl > 5 ) {
546 fprintf(msgFile, "\n %% inside updateA1, nrow %d, ncol %d",
547 mtxA->n1, mtxA->n2) ;
548 }
549
550 nrowA = mtxA->n1 ;
551 ncolA = mtxA->n2 ;
552 inc1 = mtxA->inc1 ;
553 inc2 = mtxA->inc2 ;
554 nops = 0.0 ;
555 if ( A2_IS_REAL(mtxA) ) {
556 int irow, jcol ;
557
558 if ( inc1 == 1 ) {
559 double alpha[3] ;
560 double *colA0, *colA1, *colA2 ;
561 /*
562 -----------------------------------------
563 A is column major
564 compute A(:,jcol) -= beta * W0(jcol) * H0
565 -----------------------------------------
566 */
567 for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
568 colA0 = A2_column(mtxA, jcol) ;
569 colA1 = A2_column(mtxA, jcol+1) ;
570 colA2 = A2_column(mtxA, jcol+2) ;
571 alpha[0] = -beta0 * W0[jcol] ;
572 alpha[1] = -beta0 * W0[jcol+1] ;
573 alpha[2] = -beta0 * W0[jcol+2] ;
574 DVaxpy31(nrowA, colA0, colA1, colA2, alpha, H0) ;
575 nops += 6*nrowA ;
576 }
577 if ( jcol == ncolA - 2 ) {
578 colA0 = A2_column(mtxA, jcol) ;
579 colA1 = A2_column(mtxA, jcol+1) ;
580 alpha[0] = -beta0 * W0[jcol] ;
581 alpha[1] = -beta0 * W0[jcol+1] ;
582 DVaxpy21(nrowA, colA0, colA1, alpha, H0) ;
583 nops += 4*nrowA ;
584 } else if ( jcol == ncolA - 1 ) {
585 colA0 = A2_column(mtxA, jcol) ;
586 alpha[0] = -beta0 * W0[jcol] ;
587 DVaxpy11(nrowA, colA0, alpha, H0) ;
588 nops += 2*nrowA ;
589 }
590 } else {
591 double alpha[3] ;
592 double *rowA0, *rowA1, *rowA2 ;
593 /*
594 -----------------------------------------
595 A is row major
596 compute A(irow,:) -= H0[irow]*beta0*W0(:)
597 -----------------------------------------
598 */
599 for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
600 rowA0 = A2_row(mtxA, irow) ;
601 rowA1 = A2_row(mtxA, irow+1) ;
602 rowA2 = A2_row(mtxA, irow+2) ;
603 alpha[0] = -beta0 * H0[irow] ;
604 alpha[1] = -beta0 * H0[irow+1] ;
605 alpha[2] = -beta0 * H0[irow+2] ;
606 DVaxpy31(ncolA, rowA0, rowA1, rowA2, alpha, W0) ;
607 nops += 6*ncolA + 3 ;
608 }
609 if ( irow == nrowA - 2 ) {
610 rowA0 = A2_row(mtxA, irow) ;
611 rowA1 = A2_row(mtxA, irow+1) ;
612 alpha[0] = -beta0 * H0[irow] ;
613 alpha[1] = -beta0 * H0[irow+1] ;
614 DVaxpy21(ncolA, rowA0, rowA1, alpha, W0) ;
615 nops += 4*ncolA + 2 ;
616 } else if ( irow == nrowA - 1 ) {
617 rowA0 = A2_row(mtxA, irow) ;
618 alpha[0] = -beta0 * H0[irow] ;
619 DVaxpy11(ncolA, rowA0, alpha, W0) ;
620 nops += 2*ncolA + 1 ;
621 }
622 }
623 } else if ( A2_IS_COMPLEX(mtxA) ) {
624 int irow, jcol ;
625
626 if ( inc1 == 1 ) {
627 double alpha[6] ;
628 double *colA0, *colA1, *colA2 ;
629 /*
630 -----------------------------------------
631 A is column major
632 compute A(:,jcol) -= beta * W0(jcol) * H0
633 -----------------------------------------
634 */
635 for ( jcol = 0 ; jcol < ncolA - 2 ; jcol += 3 ) {
636 colA0 = A2_column(mtxA, jcol) ;
637 colA1 = A2_column(mtxA, jcol+1) ;
638 colA2 = A2_column(mtxA, jcol+2) ;
639 alpha[0] = -beta0 * W0[2*jcol] ;
640 alpha[1] = -beta0 * W0[2*jcol+1] ;
641 alpha[2] = -beta0 * W0[2*(jcol+1)] ;
642 alpha[3] = -beta0 * W0[2*(jcol+1)+1] ;
643 alpha[4] = -beta0 * W0[2*(jcol+2)] ;
644 alpha[5] = -beta0 * W0[2*(jcol+2)+1] ;
645 ZVaxpy31(nrowA, colA0, colA1, colA2, alpha, H0) ;
646 nops += 24*nrowA ;
647 }
648 if ( jcol == ncolA - 2 ) {
649 colA0 = A2_column(mtxA, jcol) ;
650 colA1 = A2_column(mtxA, jcol+1) ;
651 alpha[0] = -beta0 * W0[2*jcol] ;
652 alpha[1] = -beta0 * W0[2*jcol+1] ;
653 alpha[2] = -beta0 * W0[2*(jcol+1)] ;
654 alpha[3] = -beta0 * W0[2*(jcol+1)+1] ;
655 ZVaxpy21(nrowA, colA0, colA1, alpha, H0) ;
656 nops += 16*nrowA ;
657 } else if ( jcol == ncolA - 1 ) {
658 colA0 = A2_column(mtxA, jcol) ;
659 alpha[0] = -beta0 * W0[2*jcol] ;
660 alpha[1] = -beta0 * W0[2*jcol+1] ;
661 ZVaxpy11(nrowA, colA0, alpha, H0) ;
662 nops += 8*nrowA ;
663 }
664 } else {
665 double alpha[6] ;
666 double *rowA0, *rowA1, *rowA2 ;
667 /*
668 -----------------------------------------
669 A is row major
670 compute A(irow,:) -= H0[irow]*beta0*W0(:)
671 -----------------------------------------
672 */
673 for ( irow = 0 ; irow < nrowA - 2 ; irow += 3 ) {
674 rowA0 = A2_row(mtxA, irow) ;
675 rowA1 = A2_row(mtxA, irow+1) ;
676 rowA2 = A2_row(mtxA, irow+2) ;
677 alpha[0] = -beta0 * H0[2*irow] ;
678 alpha[1] = -beta0 * H0[2*irow+1] ;
679 alpha[2] = -beta0 * H0[2*(irow+1)] ;
680 alpha[3] = -beta0 * H0[2*(irow+1)+1] ;
681 alpha[4] = -beta0 * H0[2*(irow+2)] ;
682 alpha[5] = -beta0 * H0[2*(irow+2)+1] ;
683 ZVaxpy31(ncolA, rowA0, rowA1, rowA2, alpha, W0) ;
684 nops += 24*ncolA + 12 ;
685 }
686 if( irow == nrowA - 2 ) {
687 rowA0 = A2_row(mtxA, irow) ;
688 rowA1 = A2_row(mtxA, irow+1) ;
689 alpha[0] = -beta0 * H0[2*irow] ;
690 alpha[1] = -beta0 * H0[2*irow+1] ;
691 alpha[2] = -beta0 * H0[2*(irow+1)] ;
692 alpha[3] = -beta0 * H0[2*(irow+1)+1] ;
693 ZVaxpy21(ncolA, rowA0, rowA1, alpha, W0) ;
694 nops += 16*ncolA + 8 ;
695 } else if( irow == nrowA - 1 ) {
696 rowA0 = A2_row(mtxA, irow) ;
697 alpha[0] = -beta0 * H0[2*irow] ;
698 alpha[1] = -beta0 * H0[2*irow+1] ;
699 ZVaxpy11(ncolA, rowA0, alpha, W0) ;
700 nops += 8*ncolA + 4 ;
701 }
702 }
703 }
704 return(nops) ; }
705
706 /*--------------------------------------------------------------------*/
707 /*
708 -----------------------------------------------------------
709 A contains the following data from the A = QR factorization
710
711 A(1:ncolA,1:ncolA) = R
712 A(j+1:nrowA,j) is v_j, the j-th householder vector,
713 where v_j[j] = 1.0
714
715 NOTE: A and Q must be column major
716
717 created -- 98dec10, cca
718 -----------------------------------------------------------
719 */
720 void
A2_computeQ(A2 * Q,A2 * A,DV * workDV,int msglvl,FILE * msgFile)721 A2_computeQ (
722 A2 *Q,
723 A2 *A,
724 DV *workDV,
725 int msglvl,
726 FILE *msgFile
727 ) {
728 double *betas ;
729 int irowA, jcolA, ncolA, nrowA ;
730 /*
731 ---------------
732 check the input
733 ---------------
734 */
735 if ( Q == NULL ) {
736 fprintf(stderr, "\n fatal error in A2_computeQ()"
737 "\n Q is NULL\n") ;
738 exit(-1) ;
739 }
740 if ( A == NULL ) {
741 fprintf(stderr, "\n fatal error in A2_computeQ()"
742 "\n A is NULL\n") ;
743 exit(-1) ;
744 }
745 if ( workDV == NULL ) {
746 fprintf(stderr, "\n fatal error in A2_computeQ()"
747 "\n workDV is NULL\n") ;
748 exit(-1) ;
749 }
750 if ( msglvl > 0 && msgFile == NULL ) {
751 fprintf(stderr, "\n fatal error in A2_computeQ()"
752 "\n msglvl > 0 and msgFile is NULL\n") ;
753 exit(-1) ;
754 }
755 nrowA = A2_nrow(A) ;
756 ncolA = A2_ncol(A) ;
757 if ( nrowA <= 0 ) {
758 fprintf(stderr, "\n fatal error in A2_computeQ()"
759 "\n nrowA = %d\n", nrowA) ;
760 exit(-1) ;
761 }
762 if ( ncolA <= 0 ) {
763 fprintf(stderr, "\n fatal error in A2_computeQ()"
764 "\n ncolA = %d\n", nrowA) ;
765 exit(-1) ;
766 }
767 if ( nrowA != A2_nrow(Q) ) {
768 fprintf(stderr, "\n fatal error in A2_computeQ()"
769 "\n nrowA = %d, nrowQ = %d\n", nrowA, A2_nrow(Q)) ;
770 exit(-1) ;
771 }
772 if ( ncolA != A2_ncol(Q) ) {
773 fprintf(stderr, "\n fatal error in A2_computeQ()"
774 "\n ncolA = %d, ncolQ = %d\n", ncolA, A2_ncol(Q)) ;
775 exit(-1) ;
776 }
777 switch ( A->type ) {
778 case SPOOLES_REAL :
779 case SPOOLES_COMPLEX :
780 break ;
781 default :
782 fprintf(stderr, "\n fatal error in A2_computeQ()"
783 "\n invalid type for A\n") ;
784 exit(-1) ;
785 }
786 if ( A->type != Q->type ) {
787 fprintf(stderr, "\n fatal error in A2_computeQ()"
788 "\n A->type = %d, Q->type = %d\n", A->type, Q->type) ;
789 exit(-1) ;
790 }
791 if ( A2_inc1(A) != 1 ) {
792 fprintf(stderr, "\n fatal error in A2_computeQ()"
793 "\n A->inc1 = %d \n", A2_inc1(A)) ;
794 exit(-1) ;
795 }
796 if ( A2_inc1(Q) != 1 ) {
797 fprintf(stderr, "\n fatal error in A2_computeQ()"
798 "\n Q->inc1 = %d, \n", A2_inc1(Q)) ;
799 exit(-1) ;
800 }
801 /*
802 --------------------------------------------------
803 compute the beta values, beta_j = 2./(V_j^H * V_j)
804 --------------------------------------------------
805 */
806 DV_setSize(workDV, ncolA) ;
807 betas = DV_entries(workDV) ;
808 if ( A2_IS_REAL(A) ) {
809 int irowA, jcolA ;
810 double sum ;
811 double *colA ;
812
813 for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) {
814 sum = 1.0 ;
815 colA = A2_column(A, jcolA) ;
816 for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) {
817 sum += colA[irowA] * colA[irowA] ;
818 }
819 betas[jcolA] = 2./sum ;
820 }
821 } else {
822 double ival, rval, sum ;
823 double *colA ;
824
825 for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) {
826 sum = 1.0 ;
827 colA = A2_column(A, jcolA) ;
828 for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) {
829 rval = colA[2*irowA] ; ival = colA[2*irowA+1] ;
830 sum += rval*rval + ival*ival ;
831 }
832 betas[jcolA] = 2./sum ;
833 }
834 }
835 /*
836 -------------------------------------------
837 loop over the number of householder vectors
838 -------------------------------------------
839 */
840 for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) {
841 double *V, *X ;
842 int jcolV ;
843 if ( msglvl > 2 ) {
844 fprintf(msgFile, "\n\n %% jcolA = %d", jcolA) ;
845 fflush(msgFile) ;
846 }
847 /*
848 ------------------
849 set X[] to e_jcolA
850 ------------------
851 */
852 X = A2_column(Q, jcolA) ;
853 if ( A2_IS_REAL(Q) ) {
854 DVzero(nrowA, X) ;
855 X[jcolA] = 1.0 ;
856 } else {
857 DVzero(2*nrowA, X) ;
858 X[2*jcolA] = 1.0 ;
859 }
860 for ( jcolV = jcolA ; jcolV >= 0 ; jcolV-- ) {
861 double beta ;
862 if ( msglvl > 2 ) {
863 fprintf(msgFile, "\n %% jcolV = %d", jcolV) ;
864 fflush(msgFile) ;
865 }
866 /*
867 -----------------------------------------------------
868 update X = (I - beta_jcolV * V_jcolV * V_jcolV^T)X
869 = X - beta_jcolV * V_jcolV * V_jcolV^T * X
870 = X - (beta_jcolV * V_jcolV^T * X) * V_jcolV
871 -----------------------------------------------------
872 */
873 V = A2_column(A, jcolV) ;
874 beta = betas[jcolV] ;
875 if ( msglvl > 2 ) {
876 fprintf(msgFile, "\n %% beta = %12.4e", beta) ;
877 fflush(msgFile) ;
878 }
879 if ( A2_IS_REAL(Q) ) {
880 double fac, sum = X[jcolV] ;
881 int irow ;
882 for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
883 if ( msglvl > 2 ) {
884 fprintf(msgFile,
885 "\n %% V[%d] = %12.4e, X[%d] = %12.4e",
886 irow, V[irow], irow, X[irow]) ;
887 fflush(msgFile) ;
888 }
889 sum += V[irow] * X[irow] ;
890 }
891 if ( msglvl > 2 ) {
892 fprintf(msgFile, "\n %% sum = %12.4e", sum) ;
893 fflush(msgFile) ;
894 }
895 fac = beta * sum ;
896 if ( msglvl > 2 ) {
897 fprintf(msgFile, "\n %% fac = %12.4e", fac) ;
898 fflush(msgFile) ;
899 }
900 X[jcolV] -= fac ;
901 for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
902 X[irow] -= fac * V[irow] ;
903 }
904 } else {
905 double rfac, ifac, rsum = X[2*jcolV], isum = X[2*jcolV+1] ;
906 int irow ;
907 for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
908 double Vi, Vr, Xi, Xr ;
909 Vr = V[2*irow] ; Vi = V[2*irow+1] ;
910 Xr = X[2*irow] ; Xi = X[2*irow+1] ;
911 rsum += Vr*Xr + Vi*Xi ;
912 isum += Vr*Xi - Vi*Xr ;
913 }
914 rfac = beta * rsum ;
915 ifac = beta * isum ;
916 X[2*jcolV] -= rfac ;
917 X[2*jcolV+1] -= ifac ;
918 for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
919 double Vi, Vr ;
920 Vr = V[2*irow] ; Vi = V[2*irow+1] ;
921 X[2*irow] -= rfac*Vr - ifac*Vi ;
922 X[2*irow+1] -= rfac*Vi + ifac*Vr ;
923 }
924 }
925 }
926 }
927 return ; }
928
929 /*--------------------------------------------------------------------*/
930 /*
931 -----------------------------------------------------------
932 A contains the following data from the A = QR factorization
933
934 A(1:ncolA,1:ncolA) = R
935 A(j+1:nrowA,j) is v_j, the j-th householder vector,
936 where v_j[j] = 1.0
937
938 we compute Y = Q^T X when A is real
939 and Y = Q^H X when A is complex
940
941 NOTE: A, Y and X must be column major.
942 NOTE: Y and X can be the same object,
943 in which case X is overwritten with Y
944
945 created -- 98dec10, cca
946 -----------------------------------------------------------
947 */
948 void
A2_applyQT(A2 * Y,A2 * A,A2 * X,DV * workDV,int msglvl,FILE * msgFile)949 A2_applyQT (
950 A2 *Y,
951 A2 *A,
952 A2 *X,
953 DV *workDV,
954 int msglvl,
955 FILE *msgFile
956 ) {
957 double *betas ;
958 int irowA, jcolA, jcolX, ncolA, ncolX, nrowA ;
959 /*
960 ---------------
961 check the input
962 ---------------
963 */
964 if ( A == NULL ) {
965 fprintf(stderr, "\n fatal error in A2_applyQT()"
966 "\n A is NULL\n") ;
967 exit(-1) ;
968 }
969 if ( X == NULL ) {
970 fprintf(stderr, "\n fatal error in A2_applyQT()"
971 "\n X is NULL\n") ;
972 exit(-1) ;
973 }
974 if ( workDV == NULL ) {
975 fprintf(stderr, "\n fatal error in A2_applyQT()"
976 "\n workDV is NULL\n") ;
977 exit(-1) ;
978 }
979 if ( msglvl > 0 && msgFile == NULL ) {
980 fprintf(stderr, "\n fatal error in A2_applyQT()"
981 "\n msglvl > 0 and msgFile is NULL\n") ;
982 exit(-1) ;
983 }
984 nrowA = A2_nrow(A) ;
985 ncolA = A2_ncol(A) ;
986 ncolX = A2_ncol(X) ;
987 if ( nrowA <= 0 ) {
988 fprintf(stderr, "\n fatal error in A2_applyQT()"
989 "\n nrowA = %d\n", nrowA) ;
990 exit(-1) ;
991 }
992 if ( ncolA <= 0 ) {
993 fprintf(stderr, "\n fatal error in A2_applyQT()"
994 "\n ncolA = %d\n", nrowA) ;
995 exit(-1) ;
996 }
997 if ( nrowA != A2_nrow(X) ) {
998 fprintf(stderr, "\n fatal error in A2_applyQT()"
999 "\n nrowA = %d, nrowX = %d\n", nrowA, A2_nrow(X)) ;
1000 exit(-1) ;
1001 }
1002 switch ( A->type ) {
1003 case SPOOLES_REAL :
1004 case SPOOLES_COMPLEX :
1005 break ;
1006 default :
1007 fprintf(stderr, "\n fatal error in A2_applyQT()"
1008 "\n invalid type for A\n") ;
1009 exit(-1) ;
1010 }
1011 if ( A->type != X->type ) {
1012 fprintf(stderr, "\n fatal error in A2_applyQT()"
1013 "\n A->type = %d, X->type = %d\n", A->type, X->type) ;
1014 exit(-1) ;
1015 }
1016 if ( A2_inc1(A) != 1 ) {
1017 fprintf(stderr, "\n fatal error in A2_applyQT()"
1018 "\n A->inc1 = %d \n", A2_inc1(A)) ;
1019 exit(-1) ;
1020 }
1021 if ( A2_inc1(X) != 1 ) {
1022 fprintf(stderr, "\n fatal error in A2_applyQT()"
1023 "\n X->inc1 = %d, \n", A2_inc1(X)) ;
1024 exit(-1) ;
1025 }
1026 /*
1027 --------------------------------------------------
1028 compute the beta values, beta_j = 2./(V_j^H * V_j)
1029 --------------------------------------------------
1030 */
1031 DV_setSize(workDV, ncolA) ;
1032 betas = DV_entries(workDV) ;
1033 if ( A2_IS_REAL(A) ) {
1034 int irowA, jcolA ;
1035 double sum ;
1036 double *colA ;
1037
1038 for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) {
1039 sum = 1.0 ;
1040 colA = A2_column(A, jcolA) ;
1041 for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) {
1042 sum += colA[irowA] * colA[irowA] ;
1043 }
1044 betas[jcolA] = 2./sum ;
1045 }
1046 } else {
1047 double ival, rval, sum ;
1048 double *colA ;
1049
1050 for ( jcolA = 0 ; jcolA < ncolA ; jcolA++ ) {
1051 sum = 1.0 ;
1052 colA = A2_column(A, jcolA) ;
1053 for ( irowA = jcolA + 1 ; irowA < nrowA ; irowA++ ) {
1054 rval = colA[2*irowA] ; ival = colA[2*irowA+1] ;
1055 sum += rval*rval + ival*ival ;
1056 }
1057 betas[jcolA] = 2./sum ;
1058 }
1059 }
1060 /*
1061 ------------------------------------------
1062 loop over the number of columns in X and Y
1063 ------------------------------------------
1064 */
1065 for ( jcolX = 0 ; jcolX < ncolX ; jcolX++ ) {
1066 double *V, *colX, *colY ;
1067 int jcolV ;
1068 if ( msglvl > 2 ) {
1069 fprintf(msgFile, "\n\n %% jcolX = %d", jcolX) ;
1070 fflush(msgFile) ;
1071 }
1072 /*
1073 -------------------------------
1074 copy X(:,jcolX) into Y(:,jcolX)
1075 -------------------------------
1076 */
1077 colY = A2_column(Y, jcolX) ;
1078 colX = A2_column(X, jcolX) ;
1079 if ( A2_IS_REAL(A) ) {
1080 DVcopy(nrowA, colY, colX) ;
1081 } else {
1082 DVcopy(2*nrowA, colY, colX) ;
1083 }
1084 for ( jcolV = 0 ; jcolV < ncolA ; jcolV++ ) {
1085 double beta ;
1086 if ( msglvl > 2 ) {
1087 fprintf(msgFile, "\n %% jcolV = %d", jcolV) ;
1088 fflush(msgFile) ;
1089 }
1090 /*
1091 ------------------------------------------------------------
1092 update colY = (I - beta_jcolV * V_jcolV * V_jcolV^T)colY
1093 = colY - beta_jcolV * V_jcolV * V_jcolV^T * colY
1094 = colY - (beta_jcolV * V_jcolV^T * Y) * V_jcolV
1095 ------------------------------------------------------------
1096 */
1097 V = A2_column(A, jcolV) ;
1098 beta = betas[jcolV] ;
1099 if ( msglvl > 2 ) {
1100 fprintf(msgFile, "\n %% beta = %12.4e", beta) ;
1101 fflush(msgFile) ;
1102 }
1103 if ( A2_IS_REAL(A) ) {
1104 double fac, sum = colY[jcolV] ;
1105 int irow ;
1106 for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
1107 if ( msglvl > 2 ) {
1108 fprintf(msgFile,
1109 "\n %% V[%d] = %12.4e, X[%d] = %12.4e",
1110 irow, V[irow], irow, colY[irow]) ;
1111 fflush(msgFile) ;
1112 }
1113 sum += V[irow] * colY[irow] ;
1114 }
1115 if ( msglvl > 2 ) {
1116 fprintf(msgFile, "\n %% sum = %12.4e", sum) ;
1117 fflush(msgFile) ;
1118 }
1119 fac = beta * sum ;
1120 if ( msglvl > 2 ) {
1121 fprintf(msgFile, "\n %% fac = %12.4e", fac) ;
1122 fflush(msgFile) ;
1123 }
1124 colY[jcolV] -= fac ;
1125 for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
1126 colY[irow] -= fac * V[irow] ;
1127 }
1128 } else {
1129 double rfac, ifac,
1130 rsum = colY[2*jcolV], isum = colY[2*jcolV+1] ;
1131 int irow ;
1132 for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
1133 double Vi, Vr, Yi, Yr ;
1134 Vr = V[2*irow] ; Vi = V[2*irow+1] ;
1135 Yr = colY[2*irow] ; Yi = colY[2*irow+1] ;
1136 rsum += Vr*Yr + Vi*Yi ;
1137 isum += Vr*Yi - Vi*Yr ;
1138 }
1139 rfac = beta * rsum ;
1140 ifac = beta * isum ;
1141 colY[2*jcolV] -= rfac ;
1142 colY[2*jcolV+1] -= ifac ;
1143 for ( irow = jcolV + 1 ; irow < nrowA ; irow++ ) {
1144 double Vi, Vr ;
1145 Vr = V[2*irow] ; Vi = V[2*irow+1] ;
1146 colY[2*irow] -= rfac*Vr - ifac*Vi ;
1147 colY[2*irow+1] -= rfac*Vi + ifac*Vr ;
1148 }
1149 }
1150 }
1151 }
1152 return ; }
1153
1154 /*--------------------------------------------------------------------*/
1155