1 // =============================================================================
2 // === spqr_factorize ==========================================================
3 // =============================================================================
4
5 // Given an m-by-n sparse matrix A and its symbolic analsys, compute its
6 // numeric QR factorization.
7 //
8 // Note that the first part of the code allocates workspace, and the resulting
9 // QR factors. It then does all its work in that amount of space, with no
10 // reallocations. The size of the memory blocks that are allocated are known
11 // a priori, from the symbolic analysis, and do not depend upon the specific
12 // numerical values. When done, it frees its workspace.
13 //
14 // Furthermore, if rank detection is disabled (with tol < 0), the actual flops
15 // become perfectly repeatable; the code does the same thing regardless of
16 // the actual numerical values.
17 //
18 // These characteristics make this QR factorization a candidate for a
19 // hardware-in-the-loop solution, for control applications (for example).
20 // However, the Stacks must not be shrunk when the factorization is done.
21 //
22 // FUTURE: split this function into three parts:
23 // (a) allocate workspace and resulting QR factors
24 // (b) factorize (no malloc/free)
25 // (c) free workspace and optionally shrink the Stacks
26 // Then part (b) could be called many times with different values but same
27 // nonzero pattern.
28
29 // =============================================================================
30 // === macros ==================================================================
31 // =============================================================================
32
33 #include "spqr.hpp"
34
35 #define FCHUNK 32 // FUTURE: make a parameter; Householder block size
36
37 // Free Sx, A, and all of Work except the Work [0:ns-1] array itself
38 #define FREE_WORK_PART1 \
39 { \
40 free_Work <Entry> (Work, ns, n, maxfn, wtsize, cc) ; \
41 if (freeA) cholmod_l_free_sparse (Ahandle, cc) ; \
42 cholmod_l_free (anz, sizeof (Entry), Sx, cc) ; \
43 Sx = NULL ; \
44 }
45
46 // Free Cblock and the Work array itself
47 #define FREE_WORK_PART2 \
48 { \
49 cholmod_l_free (ns, sizeof (spqr_work <Entry>), Work, cc) ; \
50 Work = NULL ; \
51 cholmod_l_free (nf+1, sizeof (Entry *), Cblock, cc) ; \
52 Cblock = NULL ; \
53 }
54
55 // Free all workspace
56 #define FREE_WORK \
57 { \
58 FREE_WORK_PART1 ; \
59 FREE_WORK_PART2 ; \
60 }
61
62
63 // =============================================================================
64 // === get_Work ================================================================
65 // =============================================================================
66
67 // Allocate the Work object, which is an array of structs. The entry Work [s]
68 // contains workspace for the Stack s, for each Stack 0 to ns-1.
69
get_Work(Long ns,Long n,Long maxfn,Long keepH,Long fchunk,Long * p_wtsize,cholmod_common * cc)70 template <typename Entry> spqr_work <Entry> *get_Work
71 (
72 Long ns, // number of stacks
73 Long n, // number of columns of A
74 Long maxfn, // largest number of columns in any front
75 Long keepH, // if true, H is kept
76 Long fchunk,
77 Long *p_wtsize, // size of WTwork for each
78 cholmod_common *cc
79 )
80 {
81 int ok = TRUE ;
82 spqr_work <Entry> *Work ;
83 Long wtsize ;
84 *p_wtsize = 0 ;
85
86 // wtsize = (fchunk + (keepH ? 0:1)) * maxfn ;
87 wtsize = spqr_mult (fchunk + (keepH ? 0:1), maxfn, &ok) ;
88
89 Work = (spqr_work <Entry> *)
90 cholmod_l_malloc (ns, sizeof (spqr_work <Entry>), cc) ;
91
92 if (!ok || cc->status < CHOLMOD_OK)
93 {
94 // out of memory or Long overflow
95 cholmod_l_free (ns, sizeof (spqr_work <Entry>), Work, cc) ;
96 ERROR (CHOLMOD_OUT_OF_MEMORY, "out of memory") ;
97 return (NULL) ;
98 }
99
100 for (Long stack = 0 ; stack < ns ; stack++)
101 {
102 Work [stack].Fmap = (Long *) cholmod_l_malloc (n, sizeof (Long), cc) ;
103 Work [stack].Cmap = (Long *) cholmod_l_malloc (maxfn, sizeof(Long), cc);
104 if (keepH)
105 {
106 // Staircase is a permanent part of H
107 Work [stack].Stair1 = NULL ;
108 }
109 else
110 {
111 // Staircase workspace reused for each front
112 Work [stack].Stair1 =
113 (Long *) cholmod_l_malloc (maxfn, sizeof (Long), cc) ;
114 }
115 Work [stack].WTwork =
116 (Entry *) cholmod_l_malloc (wtsize, sizeof (Entry), cc) ;
117 Work [stack].sumfrank = 0 ;
118 Work [stack].maxfrank = 0 ;
119
120 Work [stack].wscale = 0 ;
121 Work [stack].wssq = 0 ;
122 }
123
124 *p_wtsize = wtsize ;
125 return (Work) ;
126 }
127
128
129 // =============================================================================
130 // === free_Work ===============================================================
131 // =============================================================================
132
133 // Free the contents of Work, but not the Work array itself
134
free_Work(spqr_work<Entry> * Work,Long ns,Long n,Long maxfn,Long wtsize,cholmod_common * cc)135 template <typename Entry> void free_Work
136 (
137 spqr_work <Entry> *Work,
138 Long ns, // number of stacks
139 Long n, // number of columns of A
140 Long maxfn, // largest number of columns in any front
141 Long wtsize, // size of WTwork array for each Stack
142 cholmod_common *cc
143 )
144 {
145 if (Work != NULL)
146 {
147 for (Long stack = 0 ; stack < ns ; stack++)
148 {
149 cholmod_l_free (n, sizeof (Long), Work [stack].Fmap, cc) ;
150 cholmod_l_free (maxfn, sizeof (Long), Work [stack].Cmap, cc) ;
151 cholmod_l_free (maxfn, sizeof (Long), Work [stack].Stair1, cc) ;
152 cholmod_l_free (wtsize, sizeof (Entry), Work [stack].WTwork, cc) ;
153 Work [stack].Fmap = NULL ;
154 Work [stack].Cmap = NULL ;
155 Work [stack].Stair1 = NULL ;
156 Work [stack].WTwork = NULL ;
157 }
158 }
159 }
160
161
162 // =============================================================================
163 // === spqr_factorize ==========================================================
164 // =============================================================================
165
spqr_factorize(cholmod_sparse ** Ahandle,Long freeA,double tol,Long ntol,spqr_symbolic * QRsym,cholmod_common * cc)166 template <typename Entry> spqr_numeric <Entry> *spqr_factorize
167 (
168 // input, optionally freed on output
169 cholmod_sparse **Ahandle,
170
171 // inputs, not modified
172 Long freeA, // if TRUE, free A on output
173 double tol, // for rank detection
174 Long ntol, // apply tol only to first ntol columns
175 spqr_symbolic *QRsym,
176
177 // workspace and parameters
178 cholmod_common *cc
179 )
180 {
181 Long *Wi, *Qfill, *PLinv, *Cm, *Sp, *Stack_size,
182 *TaskFront, *TaskFrontp, *TaskStack, *Stack_maxstack ;
183 Entry *Sx, **Rblock, **Cblock, **Stacks ;
184 spqr_numeric <Entry> *QRnum ;
185 Long nf, m, n, anz, fchunk, maxfn, rank, maxfrank, rjsize, rank1,
186 maxstack,j, wtsize, stack, ns, ntasks, keepH, hisize ;
187 char *Rdead ;
188 cholmod_sparse *A ;
189 spqr_work <Entry> *Work ;
190
191 // -------------------------------------------------------------------------
192 // get inputs and contents of symbolic object
193 // -------------------------------------------------------------------------
194
195 if (QRsym == NULL)
196 {
197 // out of memory in caller
198 if (freeA)
199 {
200 // if freeA is true, A must always be freed, even on error
201 cholmod_l_free_sparse (Ahandle, cc) ;
202 }
203 return (NULL) ;
204 }
205
206 A = *Ahandle ;
207
208 nf = QRsym->nf ; // number of frontal matrices
209 m = QRsym->m ; // A is m-by-n
210 n = QRsym->n ;
211 anz = QRsym->anz ; // nnz (A)
212
213 keepH = QRsym->keepH ;
214
215 rjsize = QRsym->rjsize ;
216
217 Sp = QRsym->Sp ; // size m+1, row pointers for S
218 Qfill = QRsym->Qfill ; // fill-reducing ordering
219 PLinv = QRsym->PLinv ; // size m, leftmost column sort
220
221 ns = QRsym->ns ; // number of stacks
222 ntasks = QRsym->ntasks ; // number of tasks
223
224 // FUTURE: compute a unique maxfn for each stack. Current maxfn is OK, but
225 // it's a global max of the fn of all fronts, and need only be max fn of
226 // the fronts in any given stack.
227
228 maxfn = QRsym->maxfn ; // max # of columns in any front
229 ASSERT (maxfn <= n) ;
230 hisize = QRsym->hisize ; // # of integers in Hii, Householder vectors
231
232 TaskFrontp = QRsym->TaskFrontp ;
233 TaskFront = QRsym->TaskFront ;
234 TaskStack = QRsym->TaskStack ;
235
236 maxstack = QRsym->maxstack ;
237 Stack_maxstack = QRsym->Stack_maxstack ;
238
239 if (!(QRsym->do_rank_detection))
240 {
241 // disable rank detection if not accounted for in analysis
242 tol = -1 ;
243 }
244
245 // If there is one task, there is only one stack, and visa versa
246 ASSERT ((ns == 1) == (ntasks == 1)) ;
247
248 PR (("factorize with ns %ld ntasks %ld\n", ns, ntasks)) ;
249
250 // -------------------------------------------------------------------------
251 // allocate workspace
252 // -------------------------------------------------------------------------
253
254 cholmod_l_allocate_work (0, MAX (m,nf), 0, cc) ;
255
256 // shared Long workspace
257 Wi = (Long *) cc->Iwork ; // size m, aliased with the rest of Iwork
258 Cm = Wi ; // size nf
259
260 // Cblock is workspace shared by all threads
261 Cblock = (Entry **) cholmod_l_malloc (nf+1, sizeof (Entry *), cc) ;
262
263 Work = NULL ; // Work and its contents not yet allocated
264 fchunk = MIN (m, FCHUNK) ;
265 wtsize = 0 ;
266
267 // -------------------------------------------------------------------------
268 // create S
269 // -------------------------------------------------------------------------
270
271 // create numeric values of S = A(p,q) in row-form in Sx
272 Sx = (Entry *) cholmod_l_malloc (anz, sizeof (Entry), cc) ;
273
274 if (cc->status == CHOLMOD_OK)
275 {
276 // use Wi as workspace (Iwork (0:m-1)) [
277 spqr_stranspose2 (A, Qfill, Sp, PLinv, Sx, Wi) ;
278 // Wi no longer needed ]
279 }
280
281 PR (("in spqr_factorize, status after creating Sx: %d\n", cc->status)) ;
282
283 // -------------------------------------------------------------------------
284 // input matrix A no longer needed; free it if the user doesn't need it
285 // -------------------------------------------------------------------------
286
287 if (freeA)
288 {
289 // this is done even if out of memory, above
290 cholmod_l_free_sparse (Ahandle, cc) ;
291 ASSERT (*Ahandle == NULL) ;
292 }
293 PR (("in spqr_factorize, freed A, status %d\n", cc->status)) ;
294
295 if (cc->status < CHOLMOD_OK)
296 {
297 PR (("in spqr_factorize, failure %d\n", cc->status)) ;
298 // out of memory
299 FREE_WORK ;
300 return (NULL) ;
301 }
302
303 // -------------------------------------------------------------------------
304 // allocate numeric object
305 // -------------------------------------------------------------------------
306
307 QRnum = (spqr_numeric<Entry> *)
308 cholmod_l_malloc (1, sizeof (spqr_numeric<Entry>), cc) ;
309 PR (("after allocating numeric object header, status %d\n", cc->status)) ;
310
311 if (cc->status < CHOLMOD_OK)
312 {
313 // out of memory
314 FREE_WORK ;
315 return (NULL) ;
316 }
317
318 Rblock = (Entry **) cholmod_l_malloc (nf, sizeof (Entry *), cc) ;
319 Rdead = (char *) cholmod_l_calloc (n, sizeof (char), cc) ;
320
321 // these may be revised (with ns=1) if we run out of memory
322 Stacks = (Entry **) cholmod_l_calloc (ns, sizeof (Entry *), cc) ;
323 Stack_size = (Long *) cholmod_l_calloc (ns, sizeof (Long), cc) ;
324
325 QRnum->Rblock = Rblock ;
326 QRnum->Rdead = Rdead ;
327 QRnum->Stacks = Stacks ;
328 QRnum->Stack_size = Stack_size ;
329
330 if (keepH)
331 {
332 // allocate permanent space for Stair, Tau, Hii for each front
333 QRnum->HStair= (Long *) cholmod_l_malloc (rjsize, sizeof (Long), cc) ;
334 QRnum->HTau = (Entry *) cholmod_l_malloc (rjsize, sizeof (Entry), cc) ;
335 QRnum->Hii = (Long *) cholmod_l_malloc (hisize, sizeof (Long), cc) ;
336 QRnum->Hm = (Long *) cholmod_l_malloc (nf, sizeof (Long), cc) ;
337 QRnum->Hr = (Long *) cholmod_l_malloc (nf, sizeof (Long), cc) ;
338 QRnum->HPinv = (Long *) cholmod_l_malloc (m, sizeof (Long), cc) ;
339 }
340 else
341 {
342 // H is not kept; this part of the numeric object is not used
343 QRnum->HStair = NULL ;
344 QRnum->HTau = NULL ;
345 QRnum->Hii = NULL ;
346 QRnum->Hm = NULL ;
347 QRnum->Hr = NULL ;
348 QRnum->HPinv = NULL ;
349 }
350
351 QRnum->n = n ;
352 QRnum->m = m ;
353 QRnum->nf = nf ;
354 QRnum->rjsize = rjsize ;
355 QRnum->hisize = hisize ;
356 QRnum->keepH = keepH ;
357 QRnum->maxstack = maxstack ;
358 QRnum->ns = ns ;
359 QRnum->ntasks = ntasks ;
360 QRnum->maxfm = EMPTY ; // max (Hm [0:nf-1]), computed only if H is kept
361
362 if (cc->status < CHOLMOD_OK)
363 {
364 // out of memory
365 spqr_freenum (&QRnum, cc) ;
366 FREE_WORK ;
367 return (NULL) ;
368 }
369
370 PR (("after allocating rest of numeric object, status %d\n", cc->status)) ;
371
372 // -------------------------------------------------------------------------
373 // allocate workspace
374 // -------------------------------------------------------------------------
375
376 Work = get_Work <Entry> (ns, n, maxfn, keepH, fchunk, &wtsize, cc) ;
377 PR (("after allocating work, status %d\n", cc->status)) ;
378
379 // -------------------------------------------------------------------------
380 // allocate and initialize each Stack
381 // -------------------------------------------------------------------------
382
383 if (cc->status == CHOLMOD_OK)
384 {
385 for (stack = 0 ; stack < ns ; stack++)
386 {
387 Entry *Stack ;
388 size_t stacksize = (ntasks == 1) ?
389 maxstack : Stack_maxstack [stack] ;
390 Stack_size [stack] = stacksize ;
391 Stack = (Entry *) cholmod_l_malloc (stacksize, sizeof (Entry), cc) ;
392 Stacks [stack] = Stack ;
393 Work [stack].Stack_head = Stack ;
394 Work [stack].Stack_top = Stack + stacksize ;
395 }
396 }
397
398 PR (("after allocating the stacks, status %d\n", cc->status)) ;
399
400 // -------------------------------------------------------------------------
401 // punt to sequential case and fchunk = 1 if out of memory
402 // -------------------------------------------------------------------------
403
404 if (cc->status < CHOLMOD_OK)
405 {
406 // PUNT: ran out of memory; try again with smaller workspace
407 // out of memory; free any stacks that were successfully allocated
408 if (Stacks != NULL)
409 {
410 for (stack = 0 ; stack < ns ; stack++)
411 {
412 size_t stacksize = (ntasks == 1) ?
413 maxstack : Stack_maxstack [stack] ;
414 cholmod_l_free (stacksize, sizeof (Entry), Stacks [stack], cc) ;
415 }
416 }
417 cholmod_l_free (ns, sizeof (Entry *), Stacks, cc) ;
418 cholmod_l_free (ns, sizeof (Long), Stack_size, cc) ;
419
420 // free the contents of Work, and the Work array itself
421 free_Work <Entry> (Work, ns, n, maxfn, wtsize, cc) ;
422 cholmod_l_free (ns, sizeof (spqr_work <Entry>), Work, cc) ;
423
424 // punt to a single stack, a single task, and fchunk of 1
425 ns = 1 ;
426 ntasks = 1 ;
427 fchunk = 1 ;
428 cc->status = CHOLMOD_OK ;
429 Work = get_Work <Entry> (ns, n, maxfn, keepH, fchunk, &wtsize, cc) ;
430 Stacks = (Entry **) cholmod_l_calloc (ns, sizeof (Entry *), cc) ;
431 Stack_size = (Long *) cholmod_l_calloc (ns, sizeof (Long), cc) ;
432 QRnum->Stacks = Stacks ;
433 QRnum->Stack_size = Stack_size ;
434 if (cc->status == CHOLMOD_OK)
435 {
436 Entry *Stack ;
437 Stack_size [0] = maxstack ;
438 Stack = (Entry *) cholmod_l_malloc (maxstack, sizeof (Entry), cc) ;
439 Stacks [0] = Stack ;
440 Work [0].Stack_head = Stack ;
441 Work [0].Stack_top = Stack + maxstack ;
442 }
443 }
444
445 // actual # of stacks and tasks used
446 QRnum->ns = ns ;
447 QRnum->ntasks = ntasks ;
448
449 // -------------------------------------------------------------------------
450 // check if everything was allocated OK
451 // -------------------------------------------------------------------------
452
453 if (cc->status < CHOLMOD_OK)
454 {
455 spqr_freenum (&QRnum, cc) ;
456 FREE_WORK ;
457 return (NULL) ;
458 }
459
460 // At this point, the factorization is guaranteed to succeed, unless
461 // sizeof (BLAS_INT) < sizeof (Long), in which case, you really should get
462 // a 64-bit BLAS.
463
464 // -------------------------------------------------------------------------
465 // create the Blob : everything the numeric factorization kernel needs
466 // -------------------------------------------------------------------------
467
468 spqr_blob <Entry> Blob ;
469 Blob.QRsym = QRsym ;
470 Blob.QRnum = QRnum ;
471 Blob.tol = tol ;
472 Blob.Work = Work ;
473 Blob.Cm = Cm ;
474 Blob.Cblock = Cblock ;
475 Blob.Sx = Sx ;
476 Blob.ntol = ntol ;
477 Blob.fchunk = fchunk ;
478 Blob.cc = cc ;
479
480 // -------------------------------------------------------------------------
481 // initialize the "pure" flop count (for performance testing only)
482 // -------------------------------------------------------------------------
483
484 cc->SPQR_flopcount = 0 ;
485
486 // -------------------------------------------------------------------------
487 // numeric QR factorization
488 // -------------------------------------------------------------------------
489
490 PR (("[ calling the kernel\n")) ;
491
492 if (ntasks == 1)
493 {
494 // Just one task, with or without TBB installed: don't use TBB
495 spqr_kernel (0, &Blob) ; // sequential case
496 }
497 else
498 {
499 #ifdef HAVE_TBB
500 // parallel case: TBB is installed, and there is more than one task
501 int nthreads = MAX (0, cc->SPQR_nthreads) ;
502 spqr_parallel (ntasks, nthreads, &Blob) ;
503 #else
504 // TBB not installed, but the work is still split into multiple tasks.
505 // do tasks 0 to ntasks-2 (skip the placeholder root task id = ntasks-1)
506 for (Long id = 0 ; id < ntasks-1 ; id++)
507 {
508 spqr_kernel (id, &Blob) ;
509 }
510 #endif
511 }
512
513 PR (("] did the kernel\n")) ;
514
515 // -------------------------------------------------------------------------
516 // check for BLAS Long overflow on the CPU, or GPU failure
517 // -------------------------------------------------------------------------
518
519 if (cc->status < CHOLMOD_OK)
520 {
521 // On the CPU, this case can only occur if the problem is too large for
522 // the BLAS. This can only occur if, for example you're on a 64-bit
523 // platform (with sizeof (Long) = 8) and using a 32-bit BLAS (with
524 // sizeof (BLAS_INT) = 4). On the GPU, this case can more easily
525 // occur, if we run out of memory in spqrgpu_kernel, or if we fail to
526 // communicate with the GPU.
527 spqr_freenum (&QRnum, cc) ;
528 FREE_WORK ;
529 return (NULL) ;
530 }
531
532 // -------------------------------------------------------------------------
533 // finalize the rank
534 // -------------------------------------------------------------------------
535
536 PR (("finalize the rank\n")) ;
537 rank = 0 ;
538 maxfrank = 1 ;
539 for (stack = 0 ; stack < ns ; stack++)
540 {
541 PR (("stack: %ld Work [stack].sumfrank: %ld\n", stack, Work [stack].sumfrank)) ;
542 rank += Work [stack].sumfrank ;
543 maxfrank = MAX (maxfrank, Work [stack].maxfrank) ;
544 }
545 QRnum->rank = rank ; // required by spqr_hpinv
546 QRnum->maxfrank = maxfrank ;
547 PR (("m %ld n %ld my QR rank %ld\n", m, n, rank)) ;
548
549 // -------------------------------------------------------------------------
550 // finalize norm(w) for the dead column 2-norms
551 // -------------------------------------------------------------------------
552
553 double wscale = 0 ;
554 double wssq = 1 ;
555 for (stack = 0 ; stack < ns ; stack++)
556 {
557 // norm_E_fro = norm (s.*sqrt(q)) ; see also LAPACK's dnrm2
558 double ws = Work [stack].wscale ;
559 double wq = Work [stack].wssq ;
560 if (wq != 0)
561 {
562 double wk = ws * sqrt (wq) ;
563 if (wscale < wk)
564 {
565 double rr = wscale / wk ;
566 wssq = 1 + wssq * rr * rr ;
567 wscale = wk ;
568 }
569 else
570 {
571 double rr = wk / wscale ;
572 wssq += rr * rr ;
573 }
574 }
575 }
576 QRnum->norm_E_fro = wscale * sqrt (wssq) ;
577 cc->SPQR_norm_E_fro = QRnum->norm_E_fro ;
578
579 // -------------------------------------------------------------------------
580 // free all workspace, except Cblock and Work
581 // -------------------------------------------------------------------------
582
583 FREE_WORK_PART1 ;
584
585 // -------------------------------------------------------------------------
586 // shrink the Stacks to hold just R (and H, if H kept)
587 // -------------------------------------------------------------------------
588
589 // If shrink is <= 0, then the Stacks are not modified.
590 // If shrink is 1, each Stack is realloc'ed to the right size (default)
591 // If shrink is > 1, then each Stack is forcibly moved and shrunk.
592 // This option is mainly meant for testing, not production use.
593 // It should be left at 1 for production use.
594
595 Long any_moved = FALSE ;
596
597 int shrink = cc->SPQR_shrink ;
598
599 if (shrink > 0)
600 {
601 for (stack = 0 ; stack < ns ; stack++)
602 {
603 // stacksize is the current size of the this Stack
604 size_t stacksize = Stack_size [stack] ;
605 Entry *Stack = Stacks [stack] ;
606 // Work [stack].Stack_head points to the first empty slot in stack,
607 // so newstacksize is the size of the space in use by R and H.
608 size_t newstacksize = Work [stack].Stack_head - Stack ;
609 ASSERT (newstacksize <= stacksize) ;
610 // Reduce the size of this stack. Cblock [0:nf-1] is no longer
611 // needed for holding pointers to the C blocks of each frontal
612 // matrix. Reuse it to hold the reallocated stacks.
613 if (shrink > 1)
614 {
615 // force the block to move by malloc'ing a new one;
616 // this option is mainly for testing only.
617 Cblock [stack] = (Entry *) cholmod_l_malloc (newstacksize,
618 sizeof (Entry), cc) ;
619 if (Cblock [stack] == NULL)
620 {
621 // oops, the malloc failed; just use the old block
622 cc->status = CHOLMOD_OK ;
623 Cblock [stack] = Stack ;
624 // update the memory usage statistics, however
625 cc->memory_inuse +=
626 ((newstacksize-stacksize) * sizeof (Entry)) ;
627 }
628 else
629 {
630 // malloc is OK; copy the block over and free the old one
631 memcpy (Cblock [stack], Stack, newstacksize*sizeof(Entry)) ;
632 cholmod_l_free (stacksize, sizeof (Entry), Stack, cc) ;
633 }
634 // the Stack has been shrunk to the new size
635 stacksize = newstacksize ;
636 }
637 else
638 {
639 // normal method; just realloc the block
640 PR (("Normal shrink of the stack: %ld to %ld\n",
641 stacksize, newstacksize)) ;
642 Cblock [stack] = // pointer to the new Stack
643 (Entry *) cholmod_l_realloc (
644 newstacksize, // requested size of Stack, in # of Entries
645 sizeof (Entry), // size of each Entry in the Stack
646 Stack, // pointer to the old Stack
647 &stacksize, // input: old stack size; output: new size
648 cc) ;
649 }
650 Stack_size [stack] = stacksize ;
651 any_moved = any_moved || (Cblock [stack] != Stack) ;
652 // reducing the size of a block of memory always succeeds
653 ASSERT (cc->status == CHOLMOD_OK) ;
654 }
655 }
656
657 // -------------------------------------------------------------------------
658 // adjust the Rblock pointers if the Stacks have been moved
659 // -------------------------------------------------------------------------
660
661 if (any_moved)
662 {
663 // at least one Stack has moved; check all fronts and adjust them
664 for (Long task = 0 ; task < ntasks ; task++)
665 {
666 Long kfirst, klast ;
667 if (ntasks == 1)
668 {
669 // sequential case
670 kfirst = 0 ;
671 klast = nf ;
672 stack = 0 ;
673 }
674 else
675 {
676 kfirst = TaskFrontp [task] ;
677 klast = TaskFrontp [task+1] ;
678 stack = TaskStack [task] ;
679 }
680 ASSERT (stack >= 0 && stack < ns) ;
681 Entry *Old_Stack = Stacks [stack] ;
682 Entry *New_Stack = Cblock [stack] ;
683 if (New_Stack != Old_Stack)
684 {
685 for (Long kf = kfirst ; kf < klast ; kf++)
686 {
687 Long f = (ntasks == 1) ? kf : TaskFront [kf] ;
688 Rblock [f] = New_Stack + (Rblock [f] - Old_Stack) ;
689 }
690 }
691 }
692 // finalize the Stacks
693 for (stack = 0 ; stack < ns ; stack++)
694 {
695 Stacks [stack] = Cblock [stack] ;
696 }
697 }
698
699 // -------------------------------------------------------------------------
700 // free the rest of the workspace
701 // -------------------------------------------------------------------------
702
703 FREE_WORK_PART2 ;
704
705 // -------------------------------------------------------------------------
706 // extract the implicit row permutation for H
707 // -------------------------------------------------------------------------
708
709 // this must be done sequentially, when all threads are finished
710 if (keepH)
711 {
712 // use Wi as workspace (Iwork (0:m-1)) [
713 spqr_hpinv (QRsym, QRnum, Wi) ;
714 // Wi no longer needed ]
715 }
716
717 // -------------------------------------------------------------------------
718 // find the rank and return the result
719 // -------------------------------------------------------------------------
720
721 // find the rank of the first ntol columns of A
722 PR (("find rank of first ntol cols of A: ntol %ld n %ld\n", ntol, n)) ;
723 if (ntol >= n)
724 {
725 rank1 = rank ;
726 PR (("rank1 is rank: %ld\n", rank1)) ;
727 }
728 else
729 {
730 rank1 = 0 ;
731 for (j = 0 ; j < ntol ; j++)
732 {
733 PR (("column %ld Rdead: %d\n", j, (int) Rdead [j])) ;
734 if (!Rdead [j])
735 {
736 rank1++ ;
737 }
738 }
739 PR (("rank1 is sum of non-Rdead: %ld\n", rank1)) ;
740 }
741 QRnum->rank1 = rank1 ;
742 return (QRnum) ;
743 }
744
745
746 // =============================================================================
747
748 template spqr_numeric <double> *spqr_factorize <double>
749 (
750 // input, optionally freed on output
751 cholmod_sparse **Ahandle,
752
753 // inputs, not modified
754 Long freeA, // if TRUE, free A on output
755 double tol, // for rank detection
756 Long ntol, // apply tol only to first ntol columns
757 spqr_symbolic *QRsym,
758
759 // workspace and parameters
760 cholmod_common *cc
761 ) ;
762
763 // =============================================================================
764
765 template spqr_numeric <Complex> *spqr_factorize <Complex>
766 (
767 // input, optionally freed on output
768 cholmod_sparse **Ahandle,
769
770 // inputs, not modified
771 Long freeA, // if TRUE, free A on output
772 double tol, // for rank detection
773 Long ntol, // apply tol only to first ntol columns
774 spqr_symbolic *QRsym,
775
776 // workspace and parameters
777 cholmod_common *cc
778 ) ;
779