1 /* ========================================================================== */
2 /* === klu mexFunction ====================================================== */
3 /* ========================================================================== */
4
5 /* KLU: a MATLAB interface to a "Clark Kent" sparse LU factorization algorithm.
6
7 3 or 4 input arguments: factorize and solve, returning the solution:
8
9 x = klu (A, '\', b)
10 x = klu (A, '\', b, opts)
11 x = klu (b, '/', A)
12 x = klu (b, '/', A, opts)
13
14 A can be the LU struct, instead:
15
16 x = klu (LU, '\', b)
17 x = klu (LU, '\', b, opts)
18 x = klu (b, '/', LU)
19 x = klu (b, '/', LU, opts)
20
21 where LU is a struct containing members: L, U, p, q, R, F, and r. Only L
22 and U are required. The factorization is L*U+F = R\A(p,q), where r defines
23 the block boundaries of the BTF form, and F contains the entries in the
24 upper block triangular part.
25
26 with 1 or 2 input arguments: factorize, returning the LU struct:
27
28 LU = klu (A)
29 LU = klu (A, opts)
30
31 2nd optional output: info, which is only meaningful if A was factorized.
32
33 A must be square. b can be a matrix, but it cannot be sparse.
34
35 Obscure options, mainly for testing:
36
37 opts.memgrow 1.2 when L and U need to grow, inc. by this ratio.
38 valid range: 1 or more.
39 opts.imemamd 1.2 initial size of L and U with AMD or other
40 symmetric ordering is 1.2*nnz(L)+n;
41 valid range 1 or more.
42 opts.imem 10 initial size of L and U is 10*nnz(A)+n if a
43 symmetric ordering not used; valid range 1 or
44 more
45 */
46
47 /* ========================================================================== */
48
49 #include "klu.h"
50 #include <string.h>
51 #define Long SuiteSparse_long
52
53 #ifndef NCHOLMOD
54 #include "klu_cholmod.h"
55 #endif
56
57 #include "mex.h"
58 #define MAX(a,b) (((a) > (b)) ? (a) : (b))
59 #define MIN(a,b) (((a) < (b)) ? (a) : (b))
60 #define ABS(x) (((x) < 0) ? -(x) : (x))
61 #define STRING_MATCH(s1,s2) (strcmp ((s1), (s2)) == 0)
62
63 /* Complex division. This uses ACM Algo 116, by R. L. Smith, 1962. */
64 /* Note that c cannot be the same variable as a or b */
65 #define DIV(cx,cz,ax,az,bx,bz) \
66 { \
67 double r, den ; \
68 if (ABS (bx) >= ABS (bz)) \
69 { \
70 r = bz / bx ; \
71 den = bx + r * bz ; \
72 cx = (ax + az * r) / den ; \
73 cz = (az - ax * r) / den ; \
74 } \
75 else \
76 { \
77 r = bx / bz ; \
78 den = r * bx + bz ; \
79 cx = (ax * r + az) / den ; \
80 cz = (az * r - ax) / den ; \
81 } \
82 }
83
84 /* complex multiply/subtract, c -= a*b */
85 /* Note that c cannot be the same variable as a or b */
86 #define MULT_SUB(cx,cz,ax,az,bx,bz) \
87 { \
88 cx -= ax * bx - az * bz ; \
89 cz -= az * bx + ax * bz ; \
90 }
91
92 /* complex multiply/subtract, c -= a*conj(b) */
93 /* Note that c cannot be the same variable as a or b */
94 #define MULT_SUB_CONJ(cx,cz,ax,az,bx,bz) \
95 { \
96 cx -= ax * bx + az * bz ; \
97 cz -= az * bx - ax * bz ; \
98 }
99
100 /* ========================================================================== */
101 /* === klu mexFunction ====================================================== */
102 /* ========================================================================== */
103
mexFunction(int nargout,mxArray * pargout[],int nargin,const mxArray * pargin[])104 void mexFunction
105 (
106 int nargout,
107 mxArray *pargout [ ],
108 int nargin,
109 const mxArray *pargin [ ]
110 )
111 {
112 double ukk, lkk, rs, s, lik, uik, x [4], offik, z, ukkz, lkkz, sz, wx, wz ;
113 double *X, *B, *Xz, *Xx, *Bx, *Bz, *A, *Ax, *Az, *Lx, *Ux, *Rs, *Offx, *Wx,
114 *Uz, *Lz, *Offz, *Wz, *W, *Xi, *Bi ;
115 Long *Ap, *Ai, *Lp, *Li, *Up, *Ui, *P, *Q, *R, *Rp, *Ri, *Offp, *Offi ;
116 char *operator ;
117 mxArray *L_matlab, *U_matlab, *p_matlab, *q_matlab, *R_matlab, *F_matlab,
118 *r_matlab, *field ;
119 const mxArray *A_matlab = NULL, *LU_matlab, *B_matlab = NULL, *opts_matlab ;
120 klu_l_symbolic *Symbolic ;
121 klu_l_numeric *Numeric ;
122 klu_l_common Common ;
123 Long n = 0, k, nrhs = 0, do_solve, do_factorize, symmetric,
124 A_complex = 0, B_complex, nz, do_transpose = 0, p, pend, nblocks,
125 R1 [2], chunk, nr, i, j, block, k1, k2, nk, bn = 0, ordering ;
126 int mx_int ;
127 static const char *fnames [ ] = {
128 "noffdiag", /* # of off-diagonal pivots */
129 "nrealloc", /* # of memory reallocations */
130 "rcond", /* cheap reciprocal number estimate */
131 "rgrowth", /* reciprocal pivot growth */
132 "flops", /* flop count */
133 "nblocks", /* # of blocks in BTF form (1 if not computed) */
134 "ordering", /* AMD, COLAMD, natural, cholmod(AA'), cholmod(A+A') */
135 "scale", /* scaling (<=0: none, 1: sum, 2: max */
136 "lnz", /* nnz(L), including diagonal */
137 "unz", /* nnz(U), including diagonal */
138 "offnz", /* nnz(F), including diagonal */
139 "tol", /* pivot tolerance used */
140 "memory" /* peak memory usage */
141 },
142 *LUnames [ ] = { "L", "U", "p", "q", "R", "F", "r" } ;
143
144 /* ---------------------------------------------------------------------- */
145 /* get inputs */
146 /* ---------------------------------------------------------------------- */
147
148 if (nargin < 1 || nargin > 4 || nargout > 3)
149 {
150 mexErrMsgTxt (
151 "Usage: x = klu(A,'\',b), x = klu(A,'/',b) or LU = klu(A)") ;
152 }
153
154 /* return the solution x, or just do LU factorization */
155 do_solve = (nargin > 2) ;
156
157 /* determine size of the MATLAB integer */
158 if (sizeof (Long) == sizeof (INT32_T))
159 {
160 mx_int = mxINT32_CLASS ;
161 }
162 else
163 {
164 mx_int = mxINT64_CLASS ;
165 }
166
167 if (do_solve)
168 {
169
170 /* ------------------------------------------------------------------ */
171 /* slash or backslash */
172 /* ------------------------------------------------------------------ */
173
174 /* usage, where opts is the optional 4th input argument:
175 x = klu (A, '\', b)
176 x = klu (LU, '\', b)
177 x = klu (b, '/', A)
178 x = klu (b, '/', LU)
179 */
180
181 /* determine the operator, slash (/) or backslash (\) */
182 if (!mxIsChar (pargin [1]))
183 {
184 mexErrMsgTxt ("invalid operator") ;
185 }
186 operator = mxArrayToString (pargin [1]) ;
187 if (STRING_MATCH (operator, "\\"))
188 {
189 do_transpose = 0 ;
190 A_matlab = pargin [0] ;
191 B_matlab = pargin [2] ;
192 nrhs = mxGetN (B_matlab) ;
193 bn = mxGetM (B_matlab) ;
194 }
195 else if (STRING_MATCH (operator, "/"))
196 {
197 do_transpose = 1 ;
198 A_matlab = pargin [2] ;
199 B_matlab = pargin [0] ;
200 nrhs = mxGetM (B_matlab) ;
201 bn = mxGetN (B_matlab) ;
202 }
203 else
204 {
205 mexErrMsgTxt ("invalid operator") ;
206 }
207
208 if (mxIsSparse (B_matlab))
209 {
210 mexErrMsgTxt ("B cannot be sparse") ;
211 }
212
213 opts_matlab = (nargin > 3) ? pargin [3] : NULL ;
214
215 /* determine if the factorization needs to be performed */
216 do_factorize = !mxIsStruct (A_matlab) ;
217 if (do_factorize)
218 {
219 LU_matlab = NULL ;
220 }
221 else
222 {
223 LU_matlab = A_matlab ;
224 A_matlab = NULL ;
225 }
226
227 }
228 else
229 {
230
231 /* ------------------------------------------------------------------ */
232 /* factorize A and return LU factorization */
233 /* ------------------------------------------------------------------ */
234
235 /* usage, where opts in the optional 2nd input argument:
236 LU = klu (A)
237 */
238
239 LU_matlab = NULL ;
240 A_matlab = pargin [0] ;
241 B_matlab = NULL ;
242 opts_matlab = (nargin > 1) ? pargin [1] : NULL ;
243 do_factorize = 1 ;
244 if (mxIsStruct (A_matlab))
245 {
246 mexErrMsgTxt ("invalid input, A must be a sparse matrix") ;
247 }
248 }
249
250 /* ---------------------------------------------------------------------- */
251 /* get options and set Common defaults */
252 /* ---------------------------------------------------------------------- */
253
254 klu_l_defaults (&Common) ;
255
256 /* factorization options */
257 if (opts_matlab != NULL && mxIsStruct (opts_matlab))
258 {
259 if ((field = mxGetField (opts_matlab, 0, "tol")) != NULL)
260 {
261 Common.tol = mxGetScalar (field) ;
262 }
263 if ((field = mxGetField (opts_matlab, 0, "memgrow")) != NULL)
264 {
265 Common.memgrow = mxGetScalar (field) ;
266 }
267 if ((field = mxGetField (opts_matlab, 0, "imemamd")) != NULL)
268 {
269 Common.initmem_amd = mxGetScalar (field) ;
270 }
271 if ((field = mxGetField (opts_matlab, 0, "imem")) != NULL)
272 {
273 Common.initmem = mxGetScalar (field) ;
274 }
275 if ((field = mxGetField (opts_matlab, 0, "btf")) != NULL)
276 {
277 Common.btf = mxGetScalar (field) ;
278 }
279 if ((field = mxGetField (opts_matlab, 0, "ordering")) != NULL)
280 {
281 Common.ordering = mxGetScalar (field) ;
282 }
283 if ((field = mxGetField (opts_matlab, 0, "scale")) != NULL)
284 {
285 Common.scale = mxGetScalar (field) ;
286 }
287 if ((field = mxGetField (opts_matlab, 0, "maxwork")) != NULL)
288 {
289 Common.maxwork = mxGetScalar (field) ;
290 }
291 }
292
293 if (Common.ordering < 0 || Common.ordering > 4)
294 {
295 mexErrMsgTxt ("invalid ordering option") ;
296 }
297 ordering = Common.ordering ;
298
299 #ifndef NCHOLMOD
300 /* ordering option 3,4 becomes KLU option 3, with symmetric 0 or 1 */
301 symmetric = (Common.ordering == 4) ;
302 if (symmetric) Common.ordering = 3 ;
303 Common.user_order = klu_l_cholmod ;
304 Common.user_data = &symmetric ;
305 #else
306 /* CHOLMOD, METIS, CAMD, CCOLAMD, not available */
307 if (Common.ordering > 2)
308 {
309 mexErrMsgTxt ("invalid ordering option") ;
310 }
311 #endif
312
313 if (Common.scale < 1 || Common.scale > 2)
314 {
315 Common.scale = -1 ; /* no scaling, and no error checking either */
316 }
317
318 /* ---------------------------------------------------------------------- */
319 /* factorize, if needed */
320 /* ---------------------------------------------------------------------- */
321
322 if (do_factorize)
323 {
324
325 /* get input matrix A to factorize */
326 n = mxGetN (A_matlab) ;
327 if (!mxIsSparse (A_matlab) || n != mxGetM (A_matlab) || n == 0)
328 {
329 mexErrMsgTxt ("A must be sparse, square, and non-empty") ;
330 }
331
332 Ap = (Long *) mxGetJc (A_matlab) ;
333 Ai = (Long *) mxGetIr (A_matlab) ;
334 Ax = mxGetPr (A_matlab) ;
335 Az = mxGetPi (A_matlab) ;
336 nz = Ap [n] ;
337 A_complex = mxIsComplex (A_matlab) ;
338
339 if (do_solve && (n != bn || nrhs == 0))
340 {
341 mexErrMsgTxt ("B must be non-empty with same number of rows as A") ;
342 }
343
344 /* ------------------------------------------------------------------ */
345 /* analyze */
346 /* ------------------------------------------------------------------ */
347
348 Symbolic = klu_l_analyze (n, Ap, Ai, &Common) ;
349 if (Symbolic == (klu_l_symbolic *) NULL)
350 {
351 mexErrMsgTxt ("klu symbolic analysis failed") ;
352 }
353
354 /* ------------------------------------------------------------------ */
355 /* factorize */
356 /* ------------------------------------------------------------------ */
357
358 if (A_complex)
359 {
360 /* A is complex */
361 A = mxMalloc (nz * 2 * sizeof (double)) ;
362 for (k = 0 ; k < nz ; k++)
363 {
364 A [2*k ] = Ax [k] ; /* real part */
365 A [2*k+1] = Az [k] ; /* imaginary part */
366 }
367 Numeric = klu_zl_factor (Ap, Ai, A, Symbolic, &Common) ;
368 if (nargout > 1)
369 {
370 /* flops and rgrowth, if requested */
371 klu_zl_flops (Symbolic, Numeric, &Common) ;
372 klu_zl_rgrowth (Ap, Ai, A, Symbolic, Numeric, &Common) ;
373 }
374 mxFree (A) ;
375 }
376 else
377 {
378 /* A is real */
379 Numeric = klu_l_factor (Ap, Ai, Ax, Symbolic, &Common) ;
380 if (nargout > 1)
381 {
382 /* flops, if requested */
383 klu_l_flops (Symbolic, Numeric, &Common) ;
384 klu_l_rgrowth (Ap, Ai, Ax, Symbolic, Numeric, &Common) ;
385 }
386 }
387 if (Common.status != KLU_OK)
388 {
389 mexErrMsgTxt ("klu numeric factorization failed") ;
390 }
391
392 /* ------------------------------------------------------------------ */
393 /* compute cheap condition number estimate */
394 /* ------------------------------------------------------------------ */
395
396 if (A_complex)
397 {
398 klu_zl_rcond (Symbolic, Numeric, &Common) ;
399 }
400 else
401 {
402 klu_l_rcond (Symbolic, Numeric, &Common) ;
403 }
404
405 /* ------------------------------------------------------------------ */
406 /* return info, if requested */
407 /* ------------------------------------------------------------------ */
408
409 #define INFO(i,x) \
410 mxSetFieldByNumber (pargout [1], 0, i, mxCreateDoubleScalar (x))
411
412 if (nargout > 1)
413 {
414 pargout [1] = mxCreateStructMatrix (1, 1, 13, fnames) ;
415 INFO (0, Common.noffdiag) ;
416 INFO (1, Common.nrealloc) ;
417 INFO (2, Common.rcond) ;
418 INFO (3, Common.rgrowth) ;
419 INFO (4, Common.flops) ;
420 INFO (5, Symbolic->nblocks) ;
421 INFO (6, ordering) ;
422 INFO (7, Common.scale) ;
423 INFO (8, Numeric->lnz) ;
424 INFO (9, Numeric->unz) ;
425 INFO (10, Numeric->nzoff) ;
426 INFO (11, Common.tol) ;
427 INFO (12, Common.mempeak) ;
428 }
429 if (nargout > 2)
430 {
431 /* this is done separately, since it's costly */
432 klu_l_condest (Ap, Ax, Symbolic, Numeric, &Common) ;
433 pargout [2] = mxCreateDoubleMatrix (1, 1, mxREAL) ;
434 Wx = mxGetPr (pargout [2]) ;
435 Wx [0] = Common.condest ;
436 }
437
438 }
439 else
440 {
441 /* create an empty "info" and "condest" output */
442 if (nargout > 1)
443 {
444 pargout [1] = mxCreateDoubleMatrix (0, 0, mxREAL) ;
445 }
446 if (nargout > 2)
447 {
448 pargout [2] = mxCreateDoubleMatrix (0, 0, mxREAL) ;
449 }
450 }
451
452 /* ---------------------------------------------------------------------- */
453 /* solve, or return LU factorization */
454 /* ---------------------------------------------------------------------- */
455
456 if (do_solve)
457 {
458
459 /* ------------------------------------------------------------------ */
460 /* solve, x = klu ( ... ) usage */
461 /* ------------------------------------------------------------------ */
462
463 B_complex = mxIsComplex (B_matlab) ;
464
465 if (do_factorize)
466 {
467
468 /* -------------------------------------------------------------- */
469 /* solve using KLU factors computed above */
470 /* -------------------------------------------------------------- */
471
472 /* klu (A,'\',b) or klu (b,'/',A) usage */
473
474 /* create X */
475 if (do_transpose)
476 {
477 pargout [0] = mxCreateDoubleMatrix (nrhs, n,
478 (A_complex || B_complex) ? mxCOMPLEX : mxREAL) ;
479 }
480 else
481 {
482 pargout [0] = mxCreateDoubleMatrix (n, nrhs,
483 (A_complex || B_complex) ? mxCOMPLEX : mxREAL) ;
484 }
485
486 if (A_complex)
487 {
488
489 /* ---------------------------------------------------------- */
490 /* A is complex, but B might be real */
491 /* ---------------------------------------------------------- */
492
493 X = mxMalloc (n * nrhs * 2 * sizeof (double)) ;
494 Bx = mxGetPr (B_matlab) ;
495 Bz = mxGetPi (B_matlab) ;
496
497 if (do_transpose)
498 {
499
500 /* X = B', merge and transpose B */
501 for (j = 0 ; j < nrhs ; j++)
502 {
503 for (i = 0 ; i < n ; i++)
504 {
505 X [2*(i+j*n) ] = Bx [j+i*nrhs] ; /* real */
506 X [2*(i+j*n)+1] = Bz ? (-Bz [j+i*nrhs]) : 0 ;
507 }
508 }
509
510 /* solve A'x=b (complex conjugate) */
511 klu_zl_tsolve (Symbolic, Numeric, n, nrhs, X, 1, &Common) ;
512
513 /* split and transpose the solution */
514 Xx = mxGetPr (pargout [0]) ;
515 Xz = mxGetPi (pargout [0]) ;
516 for (j = 0 ; j < nrhs ; j++)
517 {
518 for (i = 0 ; i < n ; i++)
519 {
520 Xx [j+i*nrhs] = X [2*(i+j*n) ] ; /* real part */
521 Xz [j+i*nrhs] = -X [2*(i+j*n)+1] ; /* imag part */
522 }
523 }
524
525 }
526 else
527 {
528
529 /* X = B, but create merged X from a split B */
530 for (k = 0 ; k < n*nrhs ; k++)
531 {
532 X [2*k ] = Bx [k] ; /* real part */
533 X [2*k+1] = Bz ? (Bz [k]) : 0 ; /* imaginary part */
534 }
535
536 /* solve Ax=b */
537 klu_zl_solve (Symbolic, Numeric, n, nrhs, X, &Common) ;
538
539 /* split the solution into real and imaginary parts */
540 Xx = mxGetPr (pargout [0]) ;
541 Xz = mxGetPi (pargout [0]) ;
542 for (k = 0 ; k < n*nrhs ; k++)
543 {
544 Xx [k] = X [2*k ] ; /* real part */
545 Xz [k] = X [2*k+1] ; /* imaginary part */
546 }
547 }
548
549 mxFree (X) ;
550 }
551 else
552 {
553
554 if (do_transpose)
555 {
556
557 /* solve in chunks of 4 columns at a time */
558 W = mxMalloc (n * MAX (nrhs,4) * sizeof (double)) ;
559 X = mxGetPr (pargout [0]) ;
560 B = mxGetPr (B_matlab) ;
561 Xi = mxGetPi (pargout [0]) ;
562 Bi = mxGetPi (B_matlab) ;
563
564 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
565 {
566
567 /* A is real: real(X) = real(b) / real(A) */
568 Long chunksize = MIN (nrhs - chunk, 4) ;
569 for (j = 0 ; j < chunksize ; j++)
570 {
571 for (i = 0 ; i < n ; i++)
572 {
573 W [i+j*n] = B [i*nrhs+j] ;
574 }
575 }
576 klu_l_tsolve (Symbolic, Numeric, n, chunksize, W,
577 &Common) ;
578 for (j = 0 ; j < chunksize ; j++)
579 {
580 for (i = 0 ; i < n ; i++)
581 {
582 X [i*nrhs+j] = W [i+j*n] ;
583 }
584 }
585 X += 4 ;
586 B += 4 ;
587
588 if (B_complex)
589 {
590 /* B is complex: imag(X) = imag(B) / real(A) */
591
592 for (j = 0 ; j < chunksize ; j++)
593 {
594 for (i = 0 ; i < n ; i++)
595 {
596 W [i+j*n] = Bi [i*nrhs+j] ;
597 }
598 }
599 klu_l_tsolve (Symbolic, Numeric, n, chunksize, W,
600 &Common) ;
601 for (j = 0 ; j < chunksize ; j++)
602 {
603 for (i = 0 ; i < n ; i++)
604 {
605 Xi [i*nrhs+j] = W [i+j*n] ;
606 }
607 }
608 Xi += 4 ;
609 Bi += 4 ;
610 }
611
612 }
613 mxFree (W) ;
614
615 }
616 else
617 {
618
619 /* A is real: real(X) = real(A) \ real(b) */
620 X = mxGetPr (pargout [0]) ;
621 B = mxGetPr (B_matlab) ;
622 for (k = 0 ; k < n*nrhs ; k++)
623 {
624 X [k] = B [k] ;
625 }
626 klu_l_solve (Symbolic, Numeric, n, nrhs, X, &Common) ;
627 if (B_complex)
628 {
629 /* B is complex: imag(X) = real(A) \ imag(B) */
630 X = mxGetPi (pargout [0]) ;
631 B = mxGetPi (B_matlab) ;
632 for (k = 0 ; k < n*nrhs ; k++)
633 {
634 X [k] = B [k] ;
635 }
636 klu_l_solve (Symbolic, Numeric, n, nrhs, X, &Common) ;
637 }
638 }
639 }
640
641 /* -------------------------------------------------------------- */
642 /* free Symbolic and Numeric objects */
643 /* -------------------------------------------------------------- */
644
645 klu_l_free_symbolic (&Symbolic, &Common) ;
646 if (A_complex)
647 {
648 klu_zl_free_numeric (&Numeric, &Common) ;
649 }
650 else
651 {
652 klu_l_free_numeric (&Numeric, &Common) ;
653 }
654
655 }
656 else
657 {
658
659 /* -------------------------------------------------------------- */
660 /* solve using LU struct given on input */
661 /* -------------------------------------------------------------- */
662
663 /* the factorization is L*U+F = R\A(p,q), where L*U is block
664 diagonal, and F contains the entries in the upper block
665 triangular part */
666
667 L_matlab = mxGetField (LU_matlab, 0, "L") ;
668 U_matlab = mxGetField (LU_matlab, 0, "U") ;
669 p_matlab = mxGetField (LU_matlab, 0, "p") ;
670 q_matlab = mxGetField (LU_matlab, 0, "q") ;
671 R_matlab = mxGetField (LU_matlab, 0, "R") ;
672 F_matlab = mxGetField (LU_matlab, 0, "F") ;
673 r_matlab = mxGetField (LU_matlab, 0, "r") ;
674
675 if (!L_matlab || !U_matlab || !mxIsSparse (L_matlab) ||
676 !mxIsSparse (U_matlab))
677 {
678 mexErrMsgTxt ("invalid LU struct") ;
679 }
680
681 n = mxGetM (L_matlab) ;
682 if (n != mxGetN (L_matlab) ||
683 n != mxGetM (U_matlab) || n != mxGetN (U_matlab)
684 /* ... */
685 )
686 {
687 mexErrMsgTxt ("invalid LU struct") ;
688 }
689
690 if (n != bn || nrhs == 0)
691 {
692 mexErrMsgTxt (
693 "B must be non-empty with same number of rows as L and U") ;
694 }
695
696 /* get L */
697 if (!mxIsSparse (L_matlab) ||
698 n != mxGetM (L_matlab) || n != mxGetN (L_matlab))
699 {
700 mexErrMsgTxt ("LU.L must be sparse and same size as A") ;
701 }
702
703 Lp = (Long *) mxGetJc (L_matlab) ;
704 Li = (Long *) mxGetIr (L_matlab) ;
705 Lx = mxGetPr (L_matlab) ;
706 Lz = mxGetPi (L_matlab) ;
707
708 /* get U */
709 if (!mxIsSparse (U_matlab) ||
710 n != mxGetM (U_matlab) || n != mxGetN (U_matlab))
711 {
712 mexErrMsgTxt ("LU.U must be sparse and same size as A") ;
713 }
714 Up = (Long *) mxGetJc (U_matlab) ;
715 Ui = (Long *) mxGetIr (U_matlab) ;
716 Ux = mxGetPr (U_matlab) ;
717 Uz = mxGetPi (U_matlab) ;
718
719 /* get p */
720 if (p_matlab)
721 {
722 if (mxGetNumberOfElements (p_matlab) != n
723 || mxIsSparse (p_matlab)
724 || mxGetClassID (p_matlab) != mx_int)
725 {
726 mexErrMsgTxt ("P invalid") ;
727 }
728 P = (Long *) mxGetData (p_matlab) ;
729 for (k = 0 ; k < n ; k++)
730 {
731 if (P [k] < 1 || P [k] > n) mexErrMsgTxt ("P invalid") ;
732 }
733 }
734 else
735 {
736 /* no P, use identity instead */
737 P = NULL ;
738 }
739
740 /* get q */
741 if (q_matlab)
742 {
743 if (mxGetNumberOfElements (q_matlab) != n
744 || mxIsSparse (q_matlab)
745 || mxGetClassID (q_matlab) != mx_int)
746 {
747 mexErrMsgTxt ("Q invalid") ;
748 }
749 Q = (Long *) mxGetData (q_matlab) ;
750 for (k = 0 ; k < n ; k++)
751 {
752 if (Q [k] < 1 || Q [k] > n) mexErrMsgTxt ("Q invalid.") ;
753 }
754 }
755 else
756 {
757 /* no Q, use identity instead */
758 Q = NULL ;
759 }
760
761 /* get r */
762 R1 [0] = 1 ;
763 R1 [1] = n+1 ;
764 if (r_matlab)
765 {
766 nblocks = mxGetNumberOfElements (r_matlab) - 1 ;
767 if (nblocks < 1 || nblocks > n || mxIsSparse (r_matlab)
768 || mxGetClassID (r_matlab) != mx_int)
769 {
770 mexErrMsgTxt ("r invalid") ;
771 }
772 R = (Long *) mxGetData (r_matlab) ;
773 if (R [0] != 1) mexErrMsgTxt ("r invalid") ;
774 for (k = 1 ; k <= nblocks ; k++)
775 {
776 if (R [k] <= R [k-1] || R [k] > n+1)
777 {
778 mexErrMsgTxt ("rinvalid") ;
779 }
780 }
781 if (R [nblocks] != n+1) mexErrMsgTxt ("r invalid") ;
782 }
783 else
784 {
785 /* no r */
786 nblocks = 1 ;
787 R = R1 ;
788 }
789
790 /* get R, scale factors */
791 if (R_matlab)
792 {
793 /* ensure R is sparse, real, and has the right size */
794 if (!mxIsSparse (R_matlab) ||
795 n != mxGetM (R_matlab) || n != mxGetN (R_matlab))
796 {
797 mexErrMsgTxt ("LU.R must be sparse and same size as A") ;
798 }
799 Rp = (Long *) mxGetJc (R_matlab) ;
800 Rs = mxGetPr (R_matlab) ;
801 if (Rp [n] != n)
802 {
803 mexErrMsgTxt ("LU.R invalid, must be diagonal") ;
804 }
805 }
806 else
807 {
808 /* no scale factors */
809 Rs = NULL ;
810 }
811
812 /* get F, off diagonal entries */
813 if (F_matlab)
814 {
815 if (!mxIsSparse (F_matlab) ||
816 n != mxGetM (F_matlab) || n != mxGetN (F_matlab))
817 {
818 mexErrMsgTxt ("LU.F must be sparse and same size as A") ;
819 }
820 Offp = (Long *) mxGetJc (F_matlab) ;
821 Offi = (Long *) mxGetIr (F_matlab) ;
822 Offx = mxGetPr (F_matlab) ;
823 Offz = mxGetPi (F_matlab) ;
824 }
825 else
826 {
827 /* no off-diagonal entries */
828 Offp = NULL ;
829 Offi = NULL ;
830 Offx = NULL ;
831 Offz = NULL ;
832 }
833
834 /* -------------------------------------------------------------- */
835 /* solve */
836 /* -------------------------------------------------------------- */
837
838 if (mxIsComplex (L_matlab) || mxIsComplex (U_matlab) ||
839 (F_matlab && mxIsComplex (F_matlab)) || B_complex)
840 {
841
842 /* ========================================================== */
843 /* === complex case ========================================= */
844 /* ========================================================== */
845
846 /* create X */
847 if (do_transpose)
848 {
849 pargout [0] = mxCreateDoubleMatrix (nrhs, n, mxCOMPLEX) ;
850 }
851 else
852 {
853 pargout [0] = mxCreateDoubleMatrix (n, nrhs, mxCOMPLEX) ;
854 }
855 Xx = mxGetPr (pargout [0]) ;
856 Xz = mxGetPi (pargout [0]) ;
857
858 Bx = mxGetPr (B_matlab) ;
859 Bz = mxGetPi (B_matlab) ;
860
861 /* get workspace */
862 Wx = mxMalloc (n * sizeof (double)) ;
863 Wz = mxMalloc (n * sizeof (double)) ;
864
865 /* ---------------------------------------------------------- */
866 /* do just one row/column of the right-hand-side at a time */
867 /* ---------------------------------------------------------- */
868
869 if (do_transpose)
870 {
871
872 for (chunk = 0 ; chunk < nrhs ; chunk++)
873 {
874
875 /* -------------------------------------------------- */
876 /* transpose and permute right hand side, W = Q'*B' */
877 /* -------------------------------------------------- */
878
879 for (k = 0 ; k < n ; k++)
880 {
881 i = Q ? (Q [k] - 1) : k ;
882 Wx [k] = Bx [i*nrhs] ;
883 Wz [k] = Bz ? (-Bz [i*nrhs]) : 0 ;
884 }
885
886 /* -------------------------------------------------- */
887 /* solve W = (L*U + Off)'\W */
888 /* -------------------------------------------------- */
889
890 for (block = 0 ; block < nblocks ; block++)
891 {
892
893 /* ---------------------------------------------- */
894 /* block of size nk, rows/columns k1 to k2-1 */
895 /* ---------------------------------------------- */
896
897 k1 = R [block] - 1 ; /* R is 1-based */
898 k2 = R [block+1] - 1 ;
899 nk = k2 - k1 ;
900
901 /* ---------------------------------------------- */
902 /* block back-substitution for off-diagonal-block */
903 /* ---------------------------------------------- */
904
905 if (block > 0 && Offp != NULL)
906 {
907 for (k = k1 ; k < k2 ; k++)
908 {
909 pend = Offp [k+1] ;
910 for (p = Offp [k] ; p < pend ; p++)
911 {
912 i = Offi [p] ;
913 /* W [k] -= W [i] * conj(Off [p]) ; */
914 z = Offz ? Offz [p] : 0 ;
915 MULT_SUB_CONJ (Wx [k], Wz [k],
916 Wx [i], Wz [i], Offx [p], z) ;
917 }
918 }
919 }
920
921
922 /* solve the block system */
923 if (nk == 1)
924 {
925
926 /* W [k1] /= conj (L(k1,k1)) ; */
927 p = Lp [k1] ;
928 s = Lx [p] ;
929 sz = Lz ? (-Lz [p]) : 0 ;
930 DIV (wx, wz, Wx [k1], Wz [k1], s, sz) ;
931 Wx [k1] = wx ;
932 Wz [k1] = wz ;
933
934 /* W [k1] /= conj (U(k1,k1)) ; */
935 p = Up [k1] ;
936 s = Ux [p] ;
937 sz = Uz ? (-Uz [p]) : 0 ;
938 DIV (wx, wz, Wx [k1], Wz [k1], s, sz) ;
939 Wx [k1] = wx ;
940 Wz [k1] = wz ;
941
942 }
943 else
944 {
945
946 /* ------------------------------------------ */
947 /* W = U'\W and then W=L'\W */
948 /* ------------------------------------------ */
949
950 /* W = U'\W */
951 for (k = k1 ; k < k2 ; k++)
952 {
953 pend = Up [k+1] - 1 ;
954 /* w = W [k] */
955 wx = Wx [k] ;
956 wz = Wz [k] ;
957 for (p = Up [k] ; p < pend ; p++)
958 {
959 i = Ui [p] ;
960 /* w -= W [i] * conj(U [p]) */
961 z = Uz ? Uz [p] : 0 ;
962 MULT_SUB_CONJ (wx, wz,
963 Wx [i], Wz [i], Ux [p], z) ;
964 }
965 /* W [k] = w / conj(ukk) ; */
966 ukk = Ux [pend] ;
967 ukkz = Uz ? (-Uz [pend]) : 0 ;
968 DIV (Wx [k], Wz [k], wx, wz, ukk, ukkz) ;
969 }
970
971 /* W = L'\W */
972 for (k = k2-1 ; k >= k1 ; k--)
973 {
974 p = Lp [k] ;
975 pend = Lp [k+1] ;
976 /* w = W [k] */
977 wx = Wx [k] ;
978 wz = Wz [k] ;
979 lkk = Lx [p] ;
980 lkkz = Lz ? (-Lz [p]) : 0 ;
981 for (p++ ; p < pend ; p++)
982 {
983 i = Li [p] ;
984 /* w -= W [i] * conj (Lx [p]) ; */
985 z = Lz ? Lz [p] : 0 ;
986 MULT_SUB_CONJ (wx, wz,
987 Wx [i], Wz [i], Lx [p], z) ;
988 }
989 /* W [k] = w / conj(lkk) ; */
990 DIV (Wx [k], Wz [k], wx, wz, lkk, lkkz) ;
991 }
992 }
993 }
994
995 /* -------------------------------------------------- */
996 /* scale, permute, and tranpose: X = (P*(R\W))' */
997 /* -------------------------------------------------- */
998
999 if (Rs == NULL)
1000 {
1001 /* no scaling */
1002 for (k = 0 ; k < n ; k++)
1003 {
1004 i = P ? (P [k] - 1) : k ;
1005 Xx [i*nrhs] = Wx [k] ;
1006 Xz [i*nrhs] = Wz ? (-Wz [k]) : 0 ;
1007 }
1008 }
1009 else
1010 {
1011 /* with scaling */
1012 for (k = 0 ; k < n ; k++)
1013 {
1014 i = P ? (P [k] - 1) : k ;
1015 rs = Rs [k] ;
1016 Xx [i*nrhs] = Wx [k] / rs ;
1017 Xz [i*nrhs] = Wz ? (-Wz [k] / rs) : 0 ;
1018 }
1019 }
1020
1021 /* -------------------------------------------------- */
1022 /* go to the next row of B and X */
1023 /* -------------------------------------------------- */
1024
1025 Xx++ ;
1026 Xz++ ;
1027 Bx++ ;
1028 if (Bz) Bz++ ;
1029 }
1030
1031 }
1032 else
1033 {
1034
1035 for (chunk = 0 ; chunk < nrhs ; chunk++)
1036 {
1037
1038 /* -------------------------------------------------- */
1039 /* scale and permute the right hand side, W = P*(R\B) */
1040 /* -------------------------------------------------- */
1041
1042 if (Rs == NULL)
1043 {
1044 /* no scaling */
1045 for (k = 0 ; k < n ; k++)
1046 {
1047 i = P ? (P [k] - 1) : k ;
1048 Wx [k] = Bx [i] ;
1049 Wz [k] = Bz ? Bz [i] : 0 ;
1050 }
1051 }
1052 else
1053 {
1054 /* with scaling */
1055 for (k = 0 ; k < n ; k++)
1056 {
1057 i = P ? (P [k] - 1) : k ;
1058 rs = Rs [k] ;
1059 Wx [k] = Bx [i] / rs ;
1060 Wz [k] = Bz ? (Bz [i] / rs) : 0 ;
1061 }
1062 }
1063
1064 /* -------------------------------------------------- */
1065 /* solve W = (L*U + Off)\W */
1066 /* -------------------------------------------------- */
1067
1068 for (block = nblocks-1 ; block >= 0 ; block--)
1069 {
1070
1071 /* ---------------------------------------------- */
1072 /* block of size nk, rows/columns k1 to k2-1 */
1073 /* ---------------------------------------------- */
1074
1075 k1 = R [block] - 1 ; /* R is 1-based */
1076 k2 = R [block+1] - 1 ;
1077 nk = k2 - k1 ;
1078
1079 /* solve the block system */
1080 if (nk == 1)
1081 {
1082
1083 /* W [k1] /= L(k1,k1) ; */
1084 p = Lp [k1] ;
1085 s = Lx [p] ;
1086 sz = Lz ? Lz [p] : 0 ;
1087 DIV (wx, wz, Wx [k1], Wz [k1], s, sz) ;
1088 Wx [k1] = wx ;
1089 Wz [k1] = wz ;
1090
1091 /* W [k1] /= U(k1,k1) ; */
1092 p = Up [k1] ;
1093 s = Ux [p] ;
1094 sz = Uz ? Uz [p] : 0 ;
1095 DIV (wx, wz, Wx [k1], Wz [k1], s, sz) ;
1096 Wx [k1] = wx ;
1097 Wz [k1] = wz ;
1098
1099 }
1100 else
1101 {
1102
1103 /* ------------------------------------------ */
1104 /* W = L\W and then W=U\W */
1105 /* ------------------------------------------ */
1106
1107 /* W = L\W */
1108 for (k = k1 ; k < k2 ; k++)
1109 {
1110 p = Lp [k] ;
1111 pend = Lp [k+1] ;
1112 lkk = Lx [p] ;
1113 lkkz = Lz ? Lz [p] : 0 ;
1114 /* w = W [k] / lkk ; */
1115 DIV (wx, wz, Wx [k], Wz [k], lkk, lkkz) ;
1116 Wx [k] = wx ;
1117 Wz [k] = wz ;
1118 for (p++ ; p < pend ; p++)
1119 {
1120 i = Li [p] ;
1121 /* W [i] -= Lx [p] * w ; */
1122 z = Lz ? Lz [p] : 0 ;
1123 MULT_SUB (Wx [i], Wz [i], Lx [p], z,
1124 wx, wz) ;
1125 }
1126 }
1127
1128 /* W = U\W */
1129 for (k = k2-1 ; k >= k1 ; k--)
1130 {
1131 pend = Up [k+1] - 1 ;
1132 ukk = Ux [pend] ;
1133 ukkz = Uz ? Uz [pend] : 0 ;
1134 /* w = W [k] / ukk ; */
1135 DIV (wx, wz, Wx [k], Wz [k], ukk, ukkz) ;
1136 Wx [k] = wx ;
1137 Wz [k] = wz ;
1138 for (p = Up [k] ; p < pend ; p++)
1139 {
1140 i = Ui [p] ;
1141 /* W [i] -= U [p] * w ; */
1142 z = Uz ? Uz [p] : 0 ;
1143 MULT_SUB (Wx [i], Wz [i], Ux [p], z,
1144 wx, wz) ;
1145 }
1146 }
1147 }
1148
1149 /* ---------------------------------------------- */
1150 /* block back-substitution for off-diagonal-block */
1151 /* ---------------------------------------------- */
1152
1153 if (block > 0 && Offp != NULL)
1154 {
1155 for (k = k1 ; k < k2 ; k++)
1156 {
1157 pend = Offp [k+1] ;
1158 wx = Wx [k] ;
1159 wz = Wz [k] ;
1160 for (p = Offp [k] ; p < pend ; p++)
1161 {
1162 i = Offi [p] ;
1163 /* W [Offi [p]] -= Offx [p] * w ; */
1164 z = Offz ? Offz [p] : 0 ;
1165 MULT_SUB (Wx [i], Wz [i], Offx [p], z,
1166 wx, wz) ;
1167 }
1168 }
1169 }
1170 }
1171
1172 /* -------------------------------------------------- */
1173 /* permute the result, X = Q*W */
1174 /* -------------------------------------------------- */
1175
1176 for (k = 0 ; k < n ; k++)
1177 {
1178 i = Q ? (Q [k] - 1) : k ;
1179 Xx [i] = Wx [k] ;
1180 Xz [i] = Wz [k] ;
1181 }
1182
1183 /* -------------------------------------------------- */
1184 /* go to the next column of B and X */
1185 /* -------------------------------------------------- */
1186
1187 Xx += n ;
1188 Xz += n ;
1189 Bx += n ;
1190 if (Bz) Bz += n ;
1191 }
1192 }
1193
1194 /* free workspace */
1195 mxFree (Wx) ;
1196 mxFree (Wz) ;
1197
1198 }
1199 else
1200 {
1201
1202 /* ========================================================== */
1203 /* === real case ============================================ */
1204 /* ========================================================== */
1205
1206 /* create X */
1207 if (do_transpose)
1208 {
1209 pargout [0] = mxCreateDoubleMatrix (nrhs, n, mxREAL) ;
1210 }
1211 else
1212 {
1213 pargout [0] = mxCreateDoubleMatrix (n, nrhs, mxREAL) ;
1214 }
1215
1216 Xx = mxGetPr (pargout [0]) ;
1217 Bx = mxGetPr (B_matlab) ;
1218
1219 if (do_transpose)
1220 {
1221
1222 /* ------------------------------------------------------ */
1223 /* solve in chunks of one row at a time */
1224 /* ------------------------------------------------------ */
1225
1226 /* get workspace */
1227 Wx = mxMalloc (n * sizeof (double)) ;
1228
1229 for (chunk = 0 ; chunk < nrhs ; chunk++)
1230 {
1231
1232 /* -------------------------------------------------- */
1233 /* transpose and permute right hand side, W = Q'*B' */
1234 /* -------------------------------------------------- */
1235
1236 for (k = 0 ; k < n ; k++)
1237 {
1238 i = Q ? (Q [k] - 1) : k ;
1239 Wx [k] = Bx [i*nrhs] ;
1240 }
1241
1242 /* -------------------------------------------------- */
1243 /* solve W = (L*U + Off)'\W */
1244 /* -------------------------------------------------- */
1245
1246 for (block = 0 ; block < nblocks ; block++)
1247 {
1248
1249 /* ---------------------------------------------- */
1250 /* block of size nk, rows/columns k1 to k2-1 */
1251 /* ---------------------------------------------- */
1252
1253 k1 = R [block] - 1 ; /* R is 1-based */
1254 k2 = R [block+1] - 1 ;
1255 nk = k2 - k1 ;
1256
1257 /* ---------------------------------------------- */
1258 /* block back-substitution for off-diagonal-block */
1259 /* ---------------------------------------------- */
1260
1261 if (block > 0 && Offp != NULL)
1262 {
1263 for (k = k1 ; k < k2 ; k++)
1264 {
1265 pend = Offp [k+1] ;
1266 for (p = Offp [k] ; p < pend ; p++)
1267 {
1268 Wx [k] -= Wx [Offi [p]] * Offx [p] ;
1269 }
1270 }
1271 }
1272
1273 /* solve the block system */
1274 if (nk == 1)
1275 {
1276 Wx [k1] /= Lx [Lp [k1]] ;
1277 Wx [k1] /= Ux [Up [k1]] ;
1278 }
1279 else
1280 {
1281
1282 /* ------------------------------------------ */
1283 /* W = U'\W and then W=L'\W */
1284 /* ------------------------------------------ */
1285
1286 /* W = U'\W */
1287 for (k = k1 ; k < k2 ; k++)
1288 {
1289 pend = Up [k+1] - 1 ;
1290 for (p = Up [k] ; p < pend ; p++)
1291 {
1292 Wx [k] -= Wx [Ui [p]] * Ux [p] ;
1293 }
1294 Wx [k] /= Ux [pend] ;
1295 }
1296
1297 /* W = L'\W */
1298 for (k = k2-1 ; k >= k1 ; k--)
1299 {
1300 p = Lp [k] ;
1301 pend = Lp [k+1] ;
1302 lkk = Lx [p] ;
1303 for (p++ ; p < pend ; p++)
1304 {
1305 Wx [k] -= Wx [Li [p]] * Lx [p] ;
1306 }
1307 Wx [k] /= lkk ;
1308 }
1309 }
1310 }
1311
1312 /* -------------------------------------------------- */
1313 /* scale, permute, and tranpose: X = (P*(R\W))' */
1314 /* -------------------------------------------------- */
1315
1316 if (Rs == NULL)
1317 {
1318 /* no scaling */
1319 for (k = 0 ; k < n ; k++)
1320 {
1321 i = P ? (P [k] - 1) : k ;
1322 Xx [i*nrhs] = Wx [k] ;
1323 }
1324 }
1325 else
1326 {
1327 /* with scaling */
1328 for (k = 0 ; k < n ; k++)
1329 {
1330 i = P ? (P [k] - 1) : k ;
1331 rs = Rs [k] ;
1332 Xx [i*nrhs] = Wx [k] / rs ;
1333 }
1334 }
1335
1336 /* -------------------------------------------------- */
1337 /* go to the next row of B and X */
1338 /* -------------------------------------------------- */
1339
1340 Xx++ ;
1341 Bx++ ;
1342 }
1343
1344 }
1345 else
1346 {
1347
1348 /* ------------------------------------------------------ */
1349 /* solve in chunks of 4 columns at a time */
1350 /* ------------------------------------------------------ */
1351
1352 /* get workspace */
1353 Wx = mxMalloc (n * MAX (4, nrhs) * sizeof (double)) ;
1354
1355 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
1356 {
1357 /* -------------------------------------------------- */
1358 /* get the size of the current chunk */
1359 /* -------------------------------------------------- */
1360
1361 nr = MIN (nrhs - chunk, 4) ;
1362
1363 /* -------------------------------------------------- */
1364 /* scale and permute the right hand side, W = P*(R\B) */
1365 /* -------------------------------------------------- */
1366
1367 if (Rs == NULL)
1368 {
1369
1370 /* no scaling */
1371 switch (nr)
1372 {
1373
1374 case 1:
1375
1376 for (k = 0 ; k < n ; k++)
1377 {
1378 i = P ? (P [k] - 1) : k ;
1379 Wx [k] = Bx [i] ;
1380 }
1381 break ;
1382
1383 case 2:
1384
1385 for (k = 0 ; k < n ; k++)
1386 {
1387 i = P ? (P [k] - 1) : k ;
1388 Wx [2*k ] = Bx [i ] ;
1389 Wx [2*k + 1] = Bx [i + n ] ;
1390 }
1391 break ;
1392
1393 case 3:
1394
1395 for (k = 0 ; k < n ; k++)
1396 {
1397 i = P ? (P [k] - 1) : k ;
1398 Wx [3*k ] = Bx [i ] ;
1399 Wx [3*k + 1] = Bx [i + n ] ;
1400 Wx [3*k + 2] = Bx [i + n*2] ;
1401 }
1402 break ;
1403
1404 case 4:
1405
1406 for (k = 0 ; k < n ; k++)
1407 {
1408 i = P ? (P [k] - 1) : k ;
1409 Wx [4*k ] = Bx [i ] ;
1410 Wx [4*k + 1] = Bx [i + n ] ;
1411 Wx [4*k + 2] = Bx [i + n*2] ;
1412 Wx [4*k + 3] = Bx [i + n*3] ;
1413 }
1414 break ;
1415 }
1416
1417 }
1418 else
1419 {
1420
1421 switch (nr)
1422 {
1423
1424 case 1:
1425
1426 for (k = 0 ; k < n ; k++)
1427 {
1428 i = P ? (P [k] - 1) : k ;
1429 rs = Rs [k] ;
1430 Wx [k] = Bx [i] / rs ;
1431 }
1432 break ;
1433
1434 case 2:
1435
1436 for (k = 0 ; k < n ; k++)
1437 {
1438 i = P ? (P [k] - 1) : k ;
1439 rs = Rs [k] ;
1440 Wx [2*k ] = Bx [i ] / rs ;
1441 Wx [2*k + 1] = Bx [i + n ] / rs ;
1442 }
1443 break ;
1444
1445 case 3:
1446
1447 for (k = 0 ; k < n ; k++)
1448 {
1449 i = P ? (P [k] - 1) : k ;
1450 rs = Rs [k] ;
1451 Wx [3*k ] = Bx [i ] / rs ;
1452 Wx [3*k + 1] = Bx [i + n ] / rs ;
1453 Wx [3*k + 2] = Bx [i + n*2] / rs ;
1454 }
1455 break ;
1456
1457 case 4:
1458
1459 for (k = 0 ; k < n ; k++)
1460 {
1461 i = P ? (P [k] - 1) : k ;
1462 rs = Rs [k] ;
1463 Wx [4*k ] = Bx [i ] / rs ;
1464 Wx [4*k + 1] = Bx [i + n ] / rs ;
1465 Wx [4*k + 2] = Bx [i + n*2] / rs ;
1466 Wx [4*k + 3] = Bx [i + n*3] / rs ;
1467 }
1468 break ;
1469 }
1470 }
1471
1472 /* -------------------------------------------------- */
1473 /* solve W = (L*U + Off)\W */
1474 /* -------------------------------------------------- */
1475
1476 for (block = nblocks-1 ; block >= 0 ; block--)
1477 {
1478
1479 /* ---------------------------------------------- */
1480 /* block of size nk is rows/columns k1 to k2-1 */
1481 /* ---------------------------------------------- */
1482
1483 k1 = R [block] - 1 ; /* R is 1-based */
1484 k2 = R [block+1] - 1 ;
1485 nk = k2 - k1 ;
1486
1487 /* solve the block system */
1488 if (nk == 1)
1489 {
1490
1491 /* this is not done if L comes from KLU, since
1492 in that case, L is unit lower triangular */
1493 s = Lx [Lp [k1]] ;
1494 if (s != 1.0) switch (nr)
1495 {
1496 case 1:
1497 Wx [k1] /= s ;
1498 break ;
1499 case 2:
1500 Wx [2*k1] /= s ;
1501 Wx [2*k1 + 1] /= s ;
1502 break ;
1503 case 3:
1504 Wx [3*k1] /= s ;
1505 Wx [3*k1 + 1] /= s ;
1506 Wx [3*k1 + 2] /= s ;
1507 break ;
1508 case 4:
1509 Wx [4*k1] /= s ;
1510 Wx [4*k1 + 1] /= s ;
1511 Wx [4*k1 + 2] /= s ;
1512 Wx [4*k1 + 3] /= s ;
1513 break ;
1514 }
1515
1516 s = Ux [Up [k1]] ;
1517 if (s != 1.0) switch (nr)
1518 {
1519 case 1:
1520 Wx [k1] /= s ;
1521 break ;
1522 case 2:
1523 Wx [2*k1] /= s ;
1524 Wx [2*k1 + 1] /= s ;
1525 break ;
1526 case 3:
1527 Wx [3*k1] /= s ;
1528 Wx [3*k1 + 1] /= s ;
1529 Wx [3*k1 + 2] /= s ;
1530 break ;
1531 case 4:
1532 Wx [4*k1] /= s ;
1533 Wx [4*k1 + 1] /= s ;
1534 Wx [4*k1 + 2] /= s ;
1535 Wx [4*k1 + 3] /= s ;
1536 break ;
1537 }
1538
1539 }
1540 else
1541 {
1542
1543 /* ------------------------------------------ */
1544 /* W = L\W and then W=U\W */
1545 /* ------------------------------------------ */
1546
1547 switch (nr)
1548 {
1549
1550 case 1:
1551 /* W = L\W */
1552 for (k = k1 ; k < k2 ; k++)
1553 {
1554 p = Lp [k] ;
1555 pend = Lp [k+1] ;
1556 lkk = Lx [p++] ;
1557 x [0] = Wx [k] / lkk ;
1558 Wx [k] = x [0] ;
1559 for ( ; p < pend ; p++)
1560 {
1561 Wx [Li [p]] -= Lx [p] * x [0] ;
1562 }
1563 }
1564
1565 /* W = U\W */
1566 for (k = k2-1 ; k >= k1 ; k--)
1567 {
1568 pend = Up [k+1] - 1 ;
1569 ukk = Ux [pend] ;
1570 x [0] = Wx [k] / ukk ;
1571 Wx [k] = x [0] ;
1572 for (p = Up [k] ; p < pend ; p++)
1573 {
1574 Wx [Ui [p]] -= Ux [p] * x [0] ;
1575 }
1576 }
1577 break ;
1578
1579 case 2:
1580
1581 /* W = L\W */
1582 for (k = k1 ; k < k2 ; k++)
1583 {
1584 p = Lp [k] ;
1585 pend = Lp [k+1] ;
1586 lkk = Lx [p++] ;
1587 x [0] = Wx [2*k ] / lkk ;
1588 x [1] = Wx [2*k + 1] / lkk ;
1589 Wx [2*k ] = x [0] ;
1590 Wx [2*k + 1] = x [1] ;
1591 for ( ; p < pend ; p++)
1592 {
1593 i = Li [p] ;
1594 lik = Lx [p] ;
1595 Wx [2*i ] -= lik * x [0] ;
1596 Wx [2*i + 1] -= lik * x [1] ;
1597 }
1598 }
1599
1600 /* W = U\W */
1601 for (k = k2-1 ; k >= k1 ; k--)
1602 {
1603 pend = Up [k+1] - 1 ;
1604 ukk = Ux [pend] ;
1605 x [0] = Wx [2*k ] / ukk ;
1606 x [1] = Wx [2*k + 1] / ukk ;
1607 Wx [2*k ] = x [0] ;
1608 Wx [2*k + 1] = x [1] ;
1609 for (p = Up [k] ; p < pend ; p++)
1610 {
1611 i = Ui [p] ;
1612 uik = Ux [p] ;
1613 Wx [2*i ] -= uik * x [0] ;
1614 Wx [2*i + 1] -= uik * x [1] ;
1615 }
1616 }
1617 break ;
1618
1619 case 3:
1620
1621 /* W = L\W */
1622 for (k = k1 ; k < k2 ; k++)
1623 {
1624 p = Lp [k] ;
1625 pend = Lp [k+1] ;
1626 lkk = Lx [p++] ;
1627 x [0] = Wx [3*k ] / lkk ;
1628 x [1] = Wx [3*k + 1] / lkk ;
1629 x [2] = Wx [3*k + 2] / lkk ;
1630 Wx [3*k ] = x [0] ;
1631 Wx [3*k + 1] = x [1] ;
1632 Wx [3*k + 2] = x [2] ;
1633 for ( ; p < pend ; p++)
1634 {
1635 i = Li [p] ;
1636 lik = Lx [p] ;
1637 Wx [3*i ] -= lik * x [0] ;
1638 Wx [3*i + 1] -= lik * x [1] ;
1639 Wx [3*i + 2] -= lik * x [2] ;
1640 }
1641 }
1642
1643 /* W = U\W */
1644 for (k = k2-1 ; k >= k1 ; k--)
1645 {
1646 pend = Up [k+1] - 1 ;
1647 ukk = Ux [pend] ;
1648 x [0] = Wx [3*k ] / ukk ;
1649 x [1] = Wx [3*k + 1] / ukk ;
1650 x [2] = Wx [3*k + 2] / ukk ;
1651 Wx [3*k ] = x [0] ;
1652 Wx [3*k + 1] = x [1] ;
1653 Wx [3*k + 2] = x [2] ;
1654 for (p = Up [k] ; p < pend ; p++)
1655 {
1656 i = Ui [p] ;
1657 uik = Ux [p] ;
1658 Wx [3*i ] -= uik * x [0] ;
1659 Wx [3*i + 1] -= uik * x [1] ;
1660 Wx [3*i + 2] -= uik * x [2] ;
1661 }
1662 }
1663 break ;
1664
1665 case 4:
1666
1667 /* W = L\W */
1668 for (k = k1 ; k < k2 ; k++)
1669 {
1670 p = Lp [k] ;
1671 pend = Lp [k+1] ;
1672 lkk = Lx [p++] ;
1673 x [0] = Wx [4*k ] / lkk ;
1674 x [1] = Wx [4*k + 1] / lkk ;
1675 x [2] = Wx [4*k + 2] / lkk ;
1676 x [3] = Wx [4*k + 3] / lkk ;
1677 Wx [4*k ] = x [0] ;
1678 Wx [4*k + 1] = x [1] ;
1679 Wx [4*k + 2] = x [2] ;
1680 Wx [4*k + 3] = x [3] ;
1681 for ( ; p < pend ; p++)
1682 {
1683 i = Li [p] ;
1684 lik = Lx [p] ;
1685 Wx [4*i ] -= lik * x [0] ;
1686 Wx [4*i + 1] -= lik * x [1] ;
1687 Wx [4*i + 2] -= lik * x [2] ;
1688 Wx [4*i + 3] -= lik * x [3] ;
1689 }
1690 }
1691
1692 /* Wx = U\Wx */
1693 for (k = k2-1 ; k >= k1 ; k--)
1694 {
1695 pend = Up [k+1] - 1 ;
1696 ukk = Ux [pend] ;
1697 x [0] = Wx [4*k ] / ukk ;
1698 x [1] = Wx [4*k + 1] / ukk ;
1699 x [2] = Wx [4*k + 2] / ukk ;
1700 x [3] = Wx [4*k + 3] / ukk ;
1701 Wx [4*k ] = x [0] ;
1702 Wx [4*k + 1] = x [1] ;
1703 Wx [4*k + 2] = x [2] ;
1704 Wx [4*k + 3] = x [3] ;
1705 for (p = Up [k] ; p < pend ; p++)
1706 {
1707 i = Ui [p] ;
1708 uik = Ux [p] ;
1709 Wx [4*i ] -= uik * x [0] ;
1710 Wx [4*i + 1] -= uik * x [1] ;
1711 Wx [4*i + 2] -= uik * x [2] ;
1712 Wx [4*i + 3] -= uik * x [3] ;
1713 }
1714 }
1715 break ;
1716 }
1717 }
1718
1719 /* ---------------------------------------------- */
1720 /* block back-substitution for off-diagonal-block */
1721 /* ---------------------------------------------- */
1722
1723 if (block > 0 && Offp != NULL)
1724 {
1725 switch (nr)
1726 {
1727
1728 case 1:
1729
1730 for (k = k1 ; k < k2 ; k++)
1731 {
1732 pend = Offp [k+1] ;
1733 x [0] = Wx [k] ;
1734 for (p = Offp [k] ; p < pend ; p++)
1735 {
1736 Wx [Offi [p]] -= Offx[p] * x[0];
1737 }
1738 }
1739 break ;
1740
1741 case 2:
1742
1743 for (k = k1 ; k < k2 ; k++)
1744 {
1745 pend = Offp [k+1] ;
1746 x [0] = Wx [2*k ] ;
1747 x [1] = Wx [2*k + 1] ;
1748 for (p = Offp [k] ; p < pend ; p++)
1749 {
1750 i = Offi [p] ;
1751 offik = Offx [p] ;
1752 Wx [2*i ] -= offik * x [0] ;
1753 Wx [2*i + 1] -= offik * x [1] ;
1754 }
1755 }
1756 break ;
1757
1758 case 3:
1759
1760 for (k = k1 ; k < k2 ; k++)
1761 {
1762 pend = Offp [k+1] ;
1763 x [0] = Wx [3*k ] ;
1764 x [1] = Wx [3*k + 1] ;
1765 x [2] = Wx [3*k + 2] ;
1766 for (p = Offp [k] ; p < pend ; p++)
1767 {
1768 i = Offi [p] ;
1769 offik = Offx [p] ;
1770 Wx [3*i ] -= offik * x [0] ;
1771 Wx [3*i + 1] -= offik * x [1] ;
1772 Wx [3*i + 2] -= offik * x [2] ;
1773 }
1774 }
1775 break ;
1776
1777 case 4:
1778
1779 for (k = k1 ; k < k2 ; k++)
1780 {
1781 pend = Offp [k+1] ;
1782 x [0] = Wx [4*k ] ;
1783 x [1] = Wx [4*k + 1] ;
1784 x [2] = Wx [4*k + 2] ;
1785 x [3] = Wx [4*k + 3] ;
1786 for (p = Offp [k] ; p < pend ; p++)
1787 {
1788 i = Offi [p] ;
1789 offik = Offx [p] ;
1790 Wx [4*i ] -= offik * x [0] ;
1791 Wx [4*i + 1] -= offik * x [1] ;
1792 Wx [4*i + 2] -= offik * x [2] ;
1793 Wx [4*i + 3] -= offik * x [3] ;
1794 }
1795 }
1796 break ;
1797 }
1798 }
1799 }
1800
1801 /* -------------------------------------------------- */
1802 /* permute the result, X = Q*W */
1803 /* -------------------------------------------------- */
1804
1805 switch (nr)
1806 {
1807
1808 case 1:
1809
1810 for (k = 0 ; k < n ; k++)
1811 {
1812 i = Q ? (Q [k] - 1) : k ;
1813 Xx [i] = Wx [k] ;
1814 }
1815 break ;
1816
1817 case 2:
1818
1819 for (k = 0 ; k < n ; k++)
1820 {
1821 i = Q ? (Q [k] - 1) : k ;
1822 Xx [i ] = Wx [2*k ] ;
1823 Xx [i + n ] = Wx [2*k + 1] ;
1824 }
1825 break ;
1826
1827 case 3:
1828
1829 for (k = 0 ; k < n ; k++)
1830 {
1831 i = Q ? (Q [k] - 1) : k ;
1832 Xx [i ] = Wx [3*k ] ;
1833 Xx [i + n ] = Wx [3*k + 1] ;
1834 Xx [i + n*2] = Wx [3*k + 2] ;
1835 }
1836 break ;
1837
1838 case 4:
1839
1840 for (k = 0 ; k < n ; k++)
1841 {
1842 i = Q ? (Q [k] - 1) : k ;
1843 Xx [i ] = Wx [4*k ] ;
1844 Xx [i + n ] = Wx [4*k + 1] ;
1845 Xx [i + n*2] = Wx [4*k + 2] ;
1846 Xx [i + n*3] = Wx [4*k + 3] ;
1847 }
1848 break ;
1849 }
1850
1851 /* -------------------------------------------------- */
1852 /* go to the next chunk of B and X */
1853 /* -------------------------------------------------- */
1854
1855 Xx += n*4 ;
1856 Bx += n*4 ;
1857 }
1858 }
1859
1860 /* free workspace */
1861 mxFree (Wx) ;
1862 }
1863
1864 }
1865
1866 }
1867 else
1868 {
1869
1870 /* ------------------------------------------------------------------ */
1871 /* LU = klu (A) usage; extract factorization */
1872 /* ------------------------------------------------------------------ */
1873
1874 /* sort the row indices in each column of L and U */
1875 if (A_complex)
1876 {
1877 klu_zl_sort (Symbolic, Numeric, &Common) ;
1878 }
1879 else
1880 {
1881 klu_l_sort (Symbolic, Numeric, &Common) ;
1882 }
1883
1884 /* L */
1885 L_matlab = mxCreateSparse (n, n, Numeric->lnz,
1886 A_complex ? mxCOMPLEX: mxREAL) ;
1887 Lp = (Long *) mxGetJc (L_matlab) ;
1888 Li = (Long *) mxGetIr (L_matlab) ;
1889 Lx = mxGetPr (L_matlab) ;
1890 Lz = mxGetPi (L_matlab) ;
1891
1892 /* U */
1893 U_matlab = mxCreateSparse (n, n, Numeric->unz,
1894 A_complex ? mxCOMPLEX: mxREAL) ;
1895 Up = (Long *) mxGetJc (U_matlab) ;
1896 Ui = (Long *) mxGetIr (U_matlab) ;
1897 Ux = mxGetPr (U_matlab) ;
1898 Uz = mxGetPi (U_matlab) ;
1899
1900 /* p */
1901 p_matlab = mxCreateNumericMatrix (1, n, mx_int, mxREAL) ;
1902 P = (Long *) mxGetData (p_matlab) ;
1903
1904 /* q */
1905 q_matlab = mxCreateNumericMatrix (1, n, mx_int, mxREAL) ;
1906 Q = (Long *) mxGetData (q_matlab) ;
1907
1908 /* R, as a sparse diagonal matrix */
1909 R_matlab = mxCreateSparse (n, n, n+1, mxREAL) ;
1910 Rp = (Long *) mxGetJc (R_matlab) ;
1911 Ri = (Long *) mxGetIr (R_matlab) ;
1912 Rs = mxGetPr (R_matlab) ;
1913 for (k = 0 ; k <= n ; k++)
1914 {
1915 Rp [k] = k ;
1916 Ri [k] = k ;
1917 }
1918
1919 /* F, off diagonal blocks */
1920 F_matlab = mxCreateSparse (n, n, Numeric->nzoff,
1921 A_complex ? mxCOMPLEX: mxREAL) ;
1922 Offp = (Long *) mxGetJc (F_matlab) ;
1923 Offi = (Long *) mxGetIr (F_matlab) ;
1924 Offx = mxGetPr (F_matlab) ;
1925 Offz = mxGetPi (F_matlab) ;
1926
1927 /* r, block boundaries */
1928 nblocks = Symbolic->nblocks ;
1929 r_matlab = mxCreateNumericMatrix (1, nblocks+1, mx_int, mxREAL) ;
1930 R = (Long *) mxGetData (r_matlab) ;
1931
1932 /* extract the LU factorization from KLU Numeric and Symbolic objects */
1933 if (A_complex)
1934 {
1935 klu_zl_extract (Numeric, Symbolic, Lp, Li, Lx, Lz, Up, Ui, Ux, Uz,
1936 Offp, Offi, Offx, Offz, P, Q, Rs, R, &Common) ;
1937 }
1938 else
1939 {
1940 klu_l_extract (Numeric, Symbolic, Lp, Li, Lx, Up, Ui, Ux,
1941 Offp, Offi, Offx, P, Q, Rs, R, &Common) ;
1942 }
1943
1944 /* fix p and q for 1-based indexing */
1945 for (k = 0 ; k < n ; k++)
1946 {
1947 P [k]++ ;
1948 Q [k]++ ;
1949 }
1950
1951 /* fix r for 1-based indexing */
1952 for (k = 0 ; k <= nblocks ; k++)
1953 {
1954 R [k]++ ;
1955 }
1956
1957 /* create output LU struct */
1958 pargout [0] = mxCreateStructMatrix (1, 1, 7, LUnames) ;
1959 mxSetFieldByNumber (pargout [0], 0, 0, L_matlab) ;
1960 mxSetFieldByNumber (pargout [0], 0, 1, U_matlab) ;
1961 mxSetFieldByNumber (pargout [0], 0, 2, p_matlab) ;
1962 mxSetFieldByNumber (pargout [0], 0, 3, q_matlab) ;
1963 mxSetFieldByNumber (pargout [0], 0, 4, R_matlab) ;
1964 mxSetFieldByNumber (pargout [0], 0, 5, F_matlab) ;
1965 mxSetFieldByNumber (pargout [0], 0, 6, r_matlab) ;
1966
1967 /* ------------------------------------------------------------------ */
1968 /* free Symbolic and Numeric objects */
1969 /* ------------------------------------------------------------------ */
1970
1971 klu_l_free_symbolic (&Symbolic, &Common) ;
1972 klu_l_free_numeric (&Numeric, &Common) ;
1973 }
1974 }
1975