1 /* ========================================================================== */
2 /* === klu mexFunction ====================================================== */
3 /* ========================================================================== */
4 
5 /* KLU: a MATLAB interface to a "Clark Kent" sparse LU factorization algorithm.
6 
7     3 or 4 input arguments: factorize and solve, returning the solution:
8 
9         x = klu (A, '\', b)
10         x = klu (A, '\', b, opts)
11         x = klu (b, '/', A)
12         x = klu (b, '/', A, opts)
13 
14     A can be the LU struct, instead:
15 
16         x = klu (LU, '\', b)
17         x = klu (LU, '\', b, opts)
18         x = klu (b, '/', LU)
19         x = klu (b, '/', LU, opts)
20 
21     where LU is a struct containing members: L, U, p, q, R, F, and r.  Only L
22     and U are required.  The factorization is L*U+F = R\A(p,q), where r defines
23     the block boundaries of the BTF form, and F contains the entries in the
24     upper block triangular part.
25 
26     with 1 or 2 input arguments: factorize, returning the LU struct:
27 
28         LU = klu (A)
29         LU = klu (A, opts)
30 
31     2nd optional output: info, which is only meaningful if A was factorized.
32 
33     A must be square.  b can be a matrix, but it cannot be sparse.
34 
35     Obscure options, mainly for testing:
36 
37         opts.memgrow    1.2     when L and U need to grow, inc. by this ratio.
38                                 valid range: 1 or more.
39         opts.imemamd    1.2     initial size of L and U with AMD or other
40                                 symmetric ordering is 1.2*nnz(L)+n;
41                                 valid range 1 or more.
42         opts.imem       10      initial size of L and U is 10*nnz(A)+n if a
43                                 symmetric ordering not used; valid range 1 or
44                                 more
45 */
46 
47 /* ========================================================================== */
48 
49 #include "klu.h"
50 #include <string.h>
51 #define Long SuiteSparse_long
52 
53 #ifndef NCHOLMOD
54 #include "klu_cholmod.h"
55 #endif
56 
57 #include "mex.h"
58 #define MAX(a,b) (((a) > (b)) ? (a) : (b))
59 #define MIN(a,b) (((a) < (b)) ? (a) : (b))
60 #define ABS(x) (((x) < 0) ? -(x) : (x))
61 #define STRING_MATCH(s1,s2) (strcmp ((s1), (s2)) == 0)
62 
63 /* Complex division.  This uses ACM Algo 116, by R. L. Smith, 1962. */
64 /* Note that c cannot be the same variable as a or b */
65 #define DIV(cx,cz,ax,az,bx,bz) \
66 { \
67     double r, den ; \
68     if (ABS (bx) >= ABS (bz)) \
69     { \
70         r = bz / bx ; \
71         den = bx + r * bz ; \
72         cx = (ax + az * r) / den ; \
73         cz = (az - ax * r) / den ; \
74     } \
75     else \
76     { \
77         r = bx / bz ; \
78         den = r * bx + bz ; \
79         cx = (ax * r + az) / den ; \
80         cz = (az * r - ax) / den ; \
81     } \
82 }
83 
84 /* complex multiply/subtract, c -= a*b */
85 /* Note that c cannot be the same variable as a or b */
86 #define MULT_SUB(cx,cz,ax,az,bx,bz) \
87 { \
88     cx -= ax * bx - az * bz ; \
89     cz -= az * bx + ax * bz ; \
90 }
91 
92 /* complex multiply/subtract, c -= a*conj(b) */
93 /* Note that c cannot be the same variable as a or b */
94 #define MULT_SUB_CONJ(cx,cz,ax,az,bx,bz) \
95 { \
96     cx -= ax * bx + az * bz ; \
97     cz -= az * bx - ax * bz ; \
98 }
99 
100 /* ========================================================================== */
101 /* === klu mexFunction ====================================================== */
102 /* ========================================================================== */
103 
mexFunction(int nargout,mxArray * pargout[],int nargin,const mxArray * pargin[])104 void mexFunction
105 (
106     int nargout,
107     mxArray *pargout [ ],
108     int nargin,
109     const mxArray *pargin [ ]
110 )
111 {
112     double ukk, lkk, rs, s, lik, uik, x [4], offik, z, ukkz, lkkz, sz, wx, wz ;
113     double *X, *B, *Xz, *Xx, *Bx, *Bz, *A, *Ax, *Az, *Lx, *Ux, *Rs, *Offx, *Wx,
114         *Uz, *Lz, *Offz, *Wz, *W, *Xi, *Bi ;
115     Long *Ap, *Ai, *Lp, *Li, *Up, *Ui, *P, *Q, *R, *Rp, *Ri, *Offp, *Offi ;
116     char *operator ;
117     mxArray *L_matlab, *U_matlab, *p_matlab, *q_matlab, *R_matlab, *F_matlab,
118         *r_matlab, *field ;
119     const mxArray *A_matlab = NULL, *LU_matlab, *B_matlab = NULL, *opts_matlab ;
120     klu_l_symbolic *Symbolic ;
121     klu_l_numeric *Numeric ;
122     klu_l_common Common ;
123     Long n = 0, k, nrhs = 0, do_solve, do_factorize, symmetric,
124         A_complex = 0, B_complex, nz, do_transpose = 0, p, pend, nblocks,
125         R1 [2], chunk, nr, i, j, block, k1, k2, nk, bn = 0, ordering ;
126     int mx_int ;
127     static const char *fnames [ ] = {
128         "noffdiag",     /* # of off-diagonal pivots */
129         "nrealloc",     /* # of memory reallocations */
130         "rcond",        /* cheap reciprocal number estimate */
131         "rgrowth",      /* reciprocal pivot growth */
132         "flops",        /* flop count */
133         "nblocks",      /* # of blocks in BTF form (1 if not computed) */
134         "ordering",     /* AMD, COLAMD, natural, cholmod(AA'), cholmod(A+A') */
135         "scale",        /* scaling (<=0: none, 1: sum, 2: max */
136         "lnz",          /* nnz(L), including diagonal */
137         "unz",          /* nnz(U), including diagonal */
138         "offnz",        /* nnz(F), including diagonal */
139         "tol",          /* pivot tolerance used */
140         "memory"        /* peak memory usage */
141         },
142         *LUnames [ ] = { "L", "U", "p", "q", "R", "F", "r" } ;
143 
144     /* ---------------------------------------------------------------------- */
145     /* get inputs */
146     /* ---------------------------------------------------------------------- */
147 
148     if (nargin < 1 || nargin > 4 || nargout > 3)
149     {
150         mexErrMsgTxt (
151             "Usage: x = klu(A,'\',b), x = klu(A,'/',b) or LU = klu(A)") ;
152     }
153 
154     /* return the solution x, or just do LU factorization */
155     do_solve = (nargin > 2) ;
156 
157     /* determine size of the MATLAB integer */
158     if (sizeof (Long) == sizeof (INT32_T))
159     {
160         mx_int = mxINT32_CLASS ;
161     }
162     else
163     {
164         mx_int = mxINT64_CLASS ;
165     }
166 
167     if (do_solve)
168     {
169 
170         /* ------------------------------------------------------------------ */
171         /* slash or backslash */
172         /* ------------------------------------------------------------------ */
173 
174         /* usage, where opts is the optional 4th input argument:
175             x = klu (A,  '\', b)
176             x = klu (LU, '\', b)
177             x = klu (b,  '/', A)
178             x = klu (b,  '/', LU)
179          */
180 
181         /* determine the operator, slash (/) or backslash (\) */
182         if (!mxIsChar (pargin [1]))
183         {
184             mexErrMsgTxt ("invalid operator") ;
185         }
186         operator = mxArrayToString (pargin [1]) ;
187         if (STRING_MATCH (operator, "\\"))
188         {
189             do_transpose = 0 ;
190             A_matlab = pargin [0] ;
191             B_matlab = pargin [2] ;
192             nrhs = mxGetN (B_matlab) ;
193             bn = mxGetM (B_matlab) ;
194         }
195         else if (STRING_MATCH (operator, "/"))
196         {
197             do_transpose = 1 ;
198             A_matlab = pargin [2] ;
199             B_matlab = pargin [0] ;
200             nrhs = mxGetM (B_matlab) ;
201             bn = mxGetN (B_matlab) ;
202         }
203         else
204         {
205             mexErrMsgTxt ("invalid operator") ;
206         }
207 
208         if (mxIsSparse (B_matlab))
209         {
210             mexErrMsgTxt ("B cannot be sparse") ;
211         }
212 
213         opts_matlab = (nargin > 3) ? pargin [3] : NULL ;
214 
215         /* determine if the factorization needs to be performed */
216         do_factorize = !mxIsStruct (A_matlab) ;
217         if (do_factorize)
218         {
219             LU_matlab = NULL ;
220         }
221         else
222         {
223             LU_matlab = A_matlab ;
224             A_matlab = NULL ;
225         }
226 
227     }
228     else
229     {
230 
231         /* ------------------------------------------------------------------ */
232         /* factorize A and return LU factorization */
233         /* ------------------------------------------------------------------ */
234 
235         /* usage, where opts in the optional 2nd input argument:
236             LU = klu (A)
237          */
238 
239         LU_matlab = NULL ;
240         A_matlab = pargin [0] ;
241         B_matlab = NULL ;
242         opts_matlab = (nargin > 1) ? pargin [1] : NULL ;
243         do_factorize = 1 ;
244         if (mxIsStruct (A_matlab))
245         {
246             mexErrMsgTxt ("invalid input, A must be a sparse matrix") ;
247         }
248     }
249 
250     /* ---------------------------------------------------------------------- */
251     /* get options and set Common defaults */
252     /* ---------------------------------------------------------------------- */
253 
254     klu_l_defaults (&Common) ;
255 
256     /* factorization options */
257     if (opts_matlab != NULL && mxIsStruct (opts_matlab))
258     {
259         if ((field = mxGetField (opts_matlab, 0, "tol")) != NULL)
260         {
261             Common.tol = mxGetScalar (field) ;
262         }
263         if ((field = mxGetField (opts_matlab, 0, "memgrow")) != NULL)
264         {
265             Common.memgrow = mxGetScalar (field) ;
266         }
267         if ((field = mxGetField (opts_matlab, 0, "imemamd")) != NULL)
268         {
269             Common.initmem_amd = mxGetScalar (field) ;
270         }
271         if ((field = mxGetField (opts_matlab, 0, "imem")) != NULL)
272         {
273             Common.initmem = mxGetScalar (field) ;
274         }
275         if ((field = mxGetField (opts_matlab, 0, "btf")) != NULL)
276         {
277             Common.btf = mxGetScalar (field) ;
278         }
279         if ((field = mxGetField (opts_matlab, 0, "ordering")) != NULL)
280         {
281             Common.ordering = mxGetScalar (field) ;
282         }
283         if ((field = mxGetField (opts_matlab, 0, "scale")) != NULL)
284         {
285             Common.scale = mxGetScalar (field) ;
286         }
287         if ((field = mxGetField (opts_matlab, 0, "maxwork")) != NULL)
288         {
289             Common.maxwork = mxGetScalar (field) ;
290         }
291     }
292 
293     if (Common.ordering < 0 || Common.ordering > 4)
294     {
295         mexErrMsgTxt ("invalid ordering option") ;
296     }
297     ordering = Common.ordering ;
298 
299 #ifndef NCHOLMOD
300     /* ordering option 3,4 becomes KLU option 3, with symmetric 0 or 1 */
301     symmetric = (Common.ordering == 4) ;
302     if (symmetric) Common.ordering = 3 ;
303     Common.user_order = klu_l_cholmod ;
304     Common.user_data = &symmetric ;
305 #else
306     /* CHOLMOD, METIS, CAMD, CCOLAMD, not available */
307     if (Common.ordering > 2)
308     {
309         mexErrMsgTxt ("invalid ordering option") ;
310     }
311 #endif
312 
313     if (Common.scale < 1 || Common.scale > 2)
314     {
315         Common.scale = -1 ; /* no scaling, and no error checking either */
316     }
317 
318     /* ---------------------------------------------------------------------- */
319     /* factorize, if needed */
320     /* ---------------------------------------------------------------------- */
321 
322     if (do_factorize)
323     {
324 
325         /* get input matrix A to factorize */
326         n = mxGetN (A_matlab) ;
327         if (!mxIsSparse (A_matlab) || n != mxGetM (A_matlab) || n == 0)
328         {
329             mexErrMsgTxt ("A must be sparse, square, and non-empty") ;
330         }
331 
332         Ap = (Long *) mxGetJc (A_matlab) ;
333         Ai = (Long *) mxGetIr (A_matlab) ;
334         Ax = mxGetPr (A_matlab) ;
335         Az = mxGetPi (A_matlab) ;
336         nz = Ap [n] ;
337         A_complex = mxIsComplex (A_matlab) ;
338 
339         if (do_solve && (n != bn || nrhs == 0))
340         {
341             mexErrMsgTxt ("B must be non-empty with same number of rows as A") ;
342         }
343 
344         /* ------------------------------------------------------------------ */
345         /* analyze */
346         /* ------------------------------------------------------------------ */
347 
348         Symbolic = klu_l_analyze (n, Ap, Ai, &Common) ;
349         if (Symbolic == (klu_l_symbolic *) NULL)
350         {
351             mexErrMsgTxt ("klu symbolic analysis failed") ;
352         }
353 
354         /* ------------------------------------------------------------------ */
355         /* factorize */
356         /* ------------------------------------------------------------------ */
357 
358         if (A_complex)
359         {
360             /* A is complex */
361             A = mxMalloc (nz * 2 * sizeof (double)) ;
362             for (k = 0 ; k < nz ; k++)
363             {
364                 A [2*k  ] = Ax [k] ;        /* real part */
365                 A [2*k+1] = Az [k] ;        /* imaginary part */
366             }
367             Numeric = klu_zl_factor (Ap, Ai, A, Symbolic, &Common) ;
368             if (nargout > 1)
369             {
370                 /* flops and rgrowth, if requested */
371                 klu_zl_flops (Symbolic, Numeric, &Common) ;
372                 klu_zl_rgrowth (Ap, Ai, A, Symbolic, Numeric, &Common) ;
373             }
374             mxFree (A) ;
375         }
376         else
377         {
378             /* A is real */
379             Numeric = klu_l_factor (Ap, Ai, Ax, Symbolic, &Common) ;
380             if (nargout > 1)
381             {
382                 /* flops, if requested */
383                 klu_l_flops (Symbolic, Numeric, &Common) ;
384                 klu_l_rgrowth (Ap, Ai, Ax, Symbolic, Numeric, &Common) ;
385             }
386         }
387         if (Common.status != KLU_OK)
388         {
389             mexErrMsgTxt ("klu numeric factorization failed") ;
390         }
391 
392         /* ------------------------------------------------------------------ */
393         /* compute cheap condition number estimate */
394         /* ------------------------------------------------------------------ */
395 
396         if (A_complex)
397         {
398             klu_zl_rcond (Symbolic, Numeric, &Common) ;
399         }
400         else
401         {
402             klu_l_rcond (Symbolic, Numeric, &Common) ;
403         }
404 
405         /* ------------------------------------------------------------------ */
406         /* return info, if requested */
407         /* ------------------------------------------------------------------ */
408 
409 #define INFO(i,x) \
410         mxSetFieldByNumber (pargout [1], 0, i, mxCreateDoubleScalar (x))
411 
412         if (nargout > 1)
413         {
414             pargout [1] = mxCreateStructMatrix (1, 1, 13, fnames) ;
415             INFO (0, Common.noffdiag) ;
416             INFO (1, Common.nrealloc) ;
417             INFO (2, Common.rcond) ;
418             INFO (3, Common.rgrowth) ;
419             INFO (4, Common.flops) ;
420             INFO (5, Symbolic->nblocks) ;
421             INFO (6, ordering) ;
422             INFO (7, Common.scale) ;
423             INFO (8, Numeric->lnz) ;
424             INFO (9, Numeric->unz) ;
425             INFO (10, Numeric->nzoff) ;
426             INFO (11, Common.tol) ;
427             INFO (12, Common.mempeak) ;
428         }
429         if (nargout > 2)
430         {
431             /* this is done separately, since it's costly */
432             klu_l_condest (Ap, Ax, Symbolic, Numeric, &Common) ;
433             pargout [2] = mxCreateDoubleMatrix (1, 1, mxREAL) ;
434             Wx = mxGetPr (pargout [2]) ;
435             Wx [0] = Common.condest ;
436         }
437 
438     }
439     else
440     {
441         /* create an empty "info" and "condest" output */
442         if (nargout > 1)
443         {
444             pargout [1] = mxCreateDoubleMatrix (0, 0, mxREAL) ;
445         }
446         if (nargout > 2)
447         {
448             pargout [2] = mxCreateDoubleMatrix (0, 0, mxREAL) ;
449         }
450     }
451 
452     /* ---------------------------------------------------------------------- */
453     /* solve, or return LU factorization */
454     /* ---------------------------------------------------------------------- */
455 
456     if (do_solve)
457     {
458 
459         /* ------------------------------------------------------------------ */
460         /* solve, x = klu ( ... ) usage */
461         /* ------------------------------------------------------------------ */
462 
463         B_complex = mxIsComplex (B_matlab) ;
464 
465         if (do_factorize)
466         {
467 
468             /* -------------------------------------------------------------- */
469             /* solve using KLU factors computed above */
470             /* -------------------------------------------------------------- */
471 
472             /* klu (A,'\',b) or klu (b,'/',A) usage */
473 
474             /* create X */
475             if (do_transpose)
476             {
477                 pargout [0] = mxCreateDoubleMatrix (nrhs, n,
478                     (A_complex || B_complex) ?  mxCOMPLEX : mxREAL) ;
479             }
480             else
481             {
482                 pargout [0] = mxCreateDoubleMatrix (n, nrhs,
483                     (A_complex || B_complex) ?  mxCOMPLEX : mxREAL) ;
484             }
485 
486             if (A_complex)
487             {
488 
489                 /* ---------------------------------------------------------- */
490                 /* A is complex, but B might be real */
491                 /* ---------------------------------------------------------- */
492 
493                 X = mxMalloc (n * nrhs * 2 * sizeof (double)) ;
494                 Bx = mxGetPr (B_matlab) ;
495                 Bz = mxGetPi (B_matlab) ;
496 
497                 if (do_transpose)
498                 {
499 
500                     /* X = B', merge and transpose B */
501                     for (j = 0 ; j < nrhs ; j++)
502                     {
503                         for (i = 0 ; i < n ; i++)
504                         {
505                             X [2*(i+j*n)  ] = Bx [j+i*nrhs] ;   /* real */
506                             X [2*(i+j*n)+1] = Bz ? (-Bz [j+i*nrhs]) : 0 ;
507                         }
508                     }
509 
510                     /* solve A'x=b (complex conjugate) */
511                     klu_zl_tsolve (Symbolic, Numeric, n, nrhs, X, 1, &Common) ;
512 
513                     /* split and transpose the solution */
514                     Xx = mxGetPr (pargout [0]) ;
515                     Xz = mxGetPi (pargout [0]) ;
516                     for (j = 0 ; j < nrhs ; j++)
517                     {
518                         for (i = 0 ; i < n ; i++)
519                         {
520                             Xx [j+i*nrhs] = X [2*(i+j*n)  ] ;  /* real part */
521                             Xz [j+i*nrhs] = -X [2*(i+j*n)+1] ; /* imag part */
522                         }
523                     }
524 
525                 }
526                 else
527                 {
528 
529                     /* X = B, but create merged X from a split B */
530                     for (k = 0 ; k < n*nrhs ; k++)
531                     {
532                         X [2*k  ] = Bx [k] ;                /* real part */
533                         X [2*k+1] = Bz ? (Bz [k]) : 0 ;     /* imaginary part */
534                     }
535 
536                     /* solve Ax=b */
537                     klu_zl_solve (Symbolic, Numeric, n, nrhs, X, &Common) ;
538 
539                     /* split the solution into real and imaginary parts */
540                     Xx = mxGetPr (pargout [0]) ;
541                     Xz = mxGetPi (pargout [0]) ;
542                     for (k = 0 ; k < n*nrhs ; k++)
543                     {
544                         Xx [k] = X [2*k  ] ;        /* real part */
545                         Xz [k] = X [2*k+1] ;        /* imaginary part */
546                     }
547                 }
548 
549                 mxFree (X) ;
550             }
551             else
552             {
553 
554                 if (do_transpose)
555                 {
556 
557                     /* solve in chunks of 4 columns at a time */
558                     W = mxMalloc (n * MAX (nrhs,4) * sizeof (double)) ;
559                     X = mxGetPr (pargout [0]) ;
560                     B = mxGetPr (B_matlab) ;
561                     Xi = mxGetPi (pargout [0]) ;
562                     Bi = mxGetPi (B_matlab) ;
563 
564                     for (chunk = 0 ; chunk < nrhs ; chunk += 4)
565                     {
566 
567                         /* A is real: real(X) = real(b) / real(A) */
568                         Long chunksize = MIN (nrhs - chunk, 4) ;
569                         for (j = 0 ; j < chunksize ; j++)
570                         {
571                             for (i = 0 ; i < n ; i++)
572                             {
573                                 W [i+j*n] = B [i*nrhs+j] ;
574                             }
575                         }
576                         klu_l_tsolve (Symbolic, Numeric, n, chunksize, W,
577                                 &Common) ;
578                         for (j = 0 ; j < chunksize ; j++)
579                         {
580                             for (i = 0 ; i < n ; i++)
581                             {
582                                 X [i*nrhs+j] = W [i+j*n] ;
583                             }
584                         }
585                         X += 4 ;
586                         B += 4 ;
587 
588                         if (B_complex)
589                         {
590                             /* B is complex: imag(X) = imag(B) / real(A) */
591 
592                             for (j = 0 ; j < chunksize ; j++)
593                             {
594                                 for (i = 0 ; i < n ; i++)
595                                 {
596                                     W [i+j*n] = Bi [i*nrhs+j] ;
597                                 }
598                             }
599                             klu_l_tsolve (Symbolic, Numeric, n, chunksize, W,
600                                 &Common) ;
601                             for (j = 0 ; j < chunksize ; j++)
602                             {
603                                 for (i = 0 ; i < n ; i++)
604                                 {
605                                     Xi [i*nrhs+j] = W [i+j*n] ;
606                                 }
607                             }
608                             Xi += 4 ;
609                             Bi += 4 ;
610                         }
611 
612                     }
613                     mxFree (W) ;
614 
615                 }
616                 else
617                 {
618 
619                     /* A is real: real(X) = real(A) \ real(b) */
620                     X = mxGetPr (pargout [0]) ;
621                     B = mxGetPr (B_matlab) ;
622                     for (k = 0 ; k < n*nrhs ; k++)
623                     {
624                         X [k] = B [k] ;
625                     }
626                     klu_l_solve (Symbolic, Numeric, n, nrhs, X, &Common) ;
627                     if (B_complex)
628                     {
629                         /* B is complex: imag(X) = real(A) \ imag(B) */
630                         X = mxGetPi (pargout [0]) ;
631                         B = mxGetPi (B_matlab) ;
632                         for (k = 0 ; k < n*nrhs ; k++)
633                         {
634                             X [k] = B [k] ;
635                         }
636                         klu_l_solve (Symbolic, Numeric, n, nrhs, X, &Common) ;
637                     }
638                 }
639             }
640 
641             /* -------------------------------------------------------------- */
642             /* free Symbolic and Numeric objects */
643             /* -------------------------------------------------------------- */
644 
645             klu_l_free_symbolic (&Symbolic, &Common) ;
646             if (A_complex)
647             {
648                 klu_zl_free_numeric (&Numeric, &Common) ;
649             }
650             else
651             {
652                 klu_l_free_numeric (&Numeric, &Common) ;
653             }
654 
655         }
656         else
657         {
658 
659             /* -------------------------------------------------------------- */
660             /* solve using LU struct given on input */
661             /* -------------------------------------------------------------- */
662 
663             /* the factorization is L*U+F = R\A(p,q), where L*U is block
664                diagonal, and F contains the entries in the upper block
665                triangular part */
666 
667             L_matlab = mxGetField (LU_matlab, 0, "L") ;
668             U_matlab = mxGetField (LU_matlab, 0, "U") ;
669             p_matlab = mxGetField (LU_matlab, 0, "p") ;
670             q_matlab = mxGetField (LU_matlab, 0, "q") ;
671             R_matlab = mxGetField (LU_matlab, 0, "R") ;
672             F_matlab = mxGetField (LU_matlab, 0, "F") ;
673             r_matlab = mxGetField (LU_matlab, 0, "r") ;
674 
675             if (!L_matlab || !U_matlab || !mxIsSparse (L_matlab) ||
676                 !mxIsSparse (U_matlab))
677             {
678                 mexErrMsgTxt ("invalid LU struct") ;
679             }
680 
681             n = mxGetM (L_matlab) ;
682             if (n != mxGetN (L_matlab) ||
683                 n != mxGetM (U_matlab) || n != mxGetN (U_matlab)
684                 /* ... */
685                 )
686             {
687                 mexErrMsgTxt ("invalid LU struct") ;
688             }
689 
690             if (n != bn || nrhs == 0)
691             {
692                 mexErrMsgTxt (
693                     "B must be non-empty with same number of rows as L and U") ;
694             }
695 
696             /* get L */
697             if (!mxIsSparse (L_matlab) ||
698                 n != mxGetM (L_matlab) || n != mxGetN (L_matlab))
699             {
700                 mexErrMsgTxt ("LU.L must be sparse and same size as A") ;
701             }
702 
703             Lp = (Long *) mxGetJc (L_matlab) ;
704             Li = (Long *) mxGetIr (L_matlab) ;
705             Lx = mxGetPr (L_matlab) ;
706             Lz = mxGetPi (L_matlab) ;
707 
708             /* get U */
709             if (!mxIsSparse (U_matlab) ||
710                 n != mxGetM (U_matlab) || n != mxGetN (U_matlab))
711             {
712                 mexErrMsgTxt ("LU.U must be sparse and same size as A") ;
713             }
714             Up = (Long *) mxGetJc (U_matlab) ;
715             Ui = (Long *) mxGetIr (U_matlab) ;
716             Ux = mxGetPr (U_matlab) ;
717             Uz = mxGetPi (U_matlab) ;
718 
719             /* get p */
720             if (p_matlab)
721             {
722                 if (mxGetNumberOfElements (p_matlab) != n
723                     || mxIsSparse (p_matlab)
724                     || mxGetClassID (p_matlab) != mx_int)
725                 {
726                     mexErrMsgTxt ("P invalid") ;
727                 }
728                 P = (Long *) mxGetData (p_matlab) ;
729                 for (k = 0 ; k < n ; k++)
730                 {
731                     if (P [k] < 1 || P [k] > n) mexErrMsgTxt ("P invalid") ;
732                 }
733             }
734             else
735             {
736                 /* no P, use identity instead */
737                 P = NULL ;
738             }
739 
740             /* get q */
741             if (q_matlab)
742             {
743                 if (mxGetNumberOfElements (q_matlab) != n
744                     || mxIsSparse (q_matlab)
745                     || mxGetClassID (q_matlab) != mx_int)
746                 {
747                     mexErrMsgTxt ("Q invalid") ;
748                 }
749                 Q = (Long *) mxGetData (q_matlab) ;
750                 for (k = 0 ; k < n ; k++)
751                 {
752                     if (Q [k] < 1 || Q [k] > n) mexErrMsgTxt ("Q invalid.") ;
753                 }
754             }
755             else
756             {
757                 /* no Q, use identity instead */
758                 Q = NULL ;
759             }
760 
761             /* get r */
762             R1 [0] = 1 ;
763             R1 [1] = n+1 ;
764             if (r_matlab)
765             {
766                 nblocks = mxGetNumberOfElements (r_matlab) - 1 ;
767                 if (nblocks < 1 || nblocks > n || mxIsSparse (r_matlab)
768                     || mxGetClassID (r_matlab) != mx_int)
769                 {
770                     mexErrMsgTxt ("r invalid") ;
771                 }
772                 R = (Long *) mxGetData (r_matlab) ;
773                 if (R [0] != 1) mexErrMsgTxt ("r invalid") ;
774                 for (k = 1 ; k <= nblocks ; k++)
775                 {
776                     if (R [k] <= R [k-1] || R [k] > n+1)
777                     {
778                         mexErrMsgTxt ("rinvalid") ;
779                     }
780                 }
781                 if (R [nblocks] != n+1) mexErrMsgTxt ("r invalid") ;
782             }
783             else
784             {
785                 /* no r */
786                 nblocks = 1 ;
787                 R = R1 ;
788             }
789 
790             /* get R, scale factors */
791             if (R_matlab)
792             {
793                 /* ensure R is sparse, real, and has the right size */
794                 if (!mxIsSparse (R_matlab) ||
795                     n != mxGetM (R_matlab) || n != mxGetN (R_matlab))
796                 {
797                     mexErrMsgTxt ("LU.R must be sparse and same size as A") ;
798                 }
799                 Rp = (Long *) mxGetJc (R_matlab) ;
800                 Rs = mxGetPr (R_matlab) ;
801                 if (Rp [n] != n)
802                 {
803                     mexErrMsgTxt ("LU.R invalid, must be diagonal") ;
804                 }
805             }
806             else
807             {
808                 /* no scale factors */
809                 Rs = NULL ;
810             }
811 
812             /* get F, off diagonal entries */
813             if (F_matlab)
814             {
815                 if (!mxIsSparse (F_matlab) ||
816                     n != mxGetM (F_matlab) || n != mxGetN (F_matlab))
817                 {
818                     mexErrMsgTxt ("LU.F must be sparse and same size as A") ;
819                 }
820                 Offp = (Long *) mxGetJc (F_matlab) ;
821                 Offi = (Long *) mxGetIr (F_matlab) ;
822                 Offx = mxGetPr (F_matlab) ;
823                 Offz = mxGetPi (F_matlab) ;
824             }
825             else
826             {
827                 /* no off-diagonal entries */
828                 Offp = NULL ;
829                 Offi = NULL ;
830                 Offx = NULL ;
831                 Offz = NULL ;
832             }
833 
834             /* -------------------------------------------------------------- */
835             /* solve */
836             /* -------------------------------------------------------------- */
837 
838             if (mxIsComplex (L_matlab) || mxIsComplex (U_matlab) ||
839                 (F_matlab && mxIsComplex (F_matlab)) || B_complex)
840             {
841 
842                 /* ========================================================== */
843                 /* === complex case ========================================= */
844                 /* ========================================================== */
845 
846                 /* create X */
847                 if (do_transpose)
848                 {
849                     pargout [0] = mxCreateDoubleMatrix (nrhs, n, mxCOMPLEX) ;
850                 }
851                 else
852                 {
853                     pargout [0] = mxCreateDoubleMatrix (n, nrhs, mxCOMPLEX) ;
854                 }
855                 Xx = mxGetPr (pargout [0]) ;
856                 Xz = mxGetPi (pargout [0]) ;
857 
858                 Bx = mxGetPr (B_matlab) ;
859                 Bz = mxGetPi (B_matlab) ;
860 
861                 /* get workspace */
862                 Wx = mxMalloc (n * sizeof (double)) ;
863                 Wz = mxMalloc (n * sizeof (double)) ;
864 
865                 /* ---------------------------------------------------------- */
866                 /* do just one row/column of the right-hand-side at a time */
867                 /* ---------------------------------------------------------- */
868 
869                 if (do_transpose)
870                 {
871 
872                     for (chunk = 0 ; chunk < nrhs ; chunk++)
873                     {
874 
875                         /* -------------------------------------------------- */
876                         /* transpose and permute right hand side, W = Q'*B' */
877                         /* -------------------------------------------------- */
878 
879                         for (k = 0 ; k < n ; k++)
880                         {
881                             i = Q ? (Q [k] - 1) : k ;
882                             Wx [k] = Bx [i*nrhs] ;
883                             Wz [k] = Bz ? (-Bz [i*nrhs]) : 0 ;
884                         }
885 
886                         /* -------------------------------------------------- */
887                         /* solve W = (L*U + Off)'\W */
888                         /* -------------------------------------------------- */
889 
890                         for (block = 0 ; block < nblocks ; block++)
891                         {
892 
893                             /* ---------------------------------------------- */
894                             /* block of size nk, rows/columns k1 to k2-1 */
895                             /* ---------------------------------------------- */
896 
897                             k1 = R [block] - 1 ;        /* R is 1-based */
898                             k2 = R [block+1] - 1 ;
899                             nk = k2 - k1 ;
900 
901                             /* ---------------------------------------------- */
902                             /* block back-substitution for off-diagonal-block */
903                             /* ---------------------------------------------- */
904 
905                             if (block > 0 && Offp != NULL)
906                             {
907                                 for (k = k1 ; k < k2 ; k++)
908                                 {
909                                     pend = Offp [k+1] ;
910                                     for (p = Offp [k] ; p < pend ; p++)
911                                     {
912                                         i = Offi [p] ;
913                                         /* W [k] -= W [i] * conj(Off [p]) ; */
914                                         z = Offz ? Offz [p] : 0 ;
915                                         MULT_SUB_CONJ (Wx [k], Wz [k],
916                                             Wx [i], Wz [i], Offx [p], z) ;
917                                     }
918                                 }
919                             }
920 
921 
922                             /* solve the block system */
923                             if (nk == 1)
924                             {
925 
926                                 /* W [k1] /= conj (L(k1,k1)) ; */
927                                 p = Lp [k1] ;
928                                 s = Lx [p] ;
929                                 sz = Lz ? (-Lz [p]) : 0 ;
930                                 DIV (wx, wz, Wx [k1], Wz [k1], s, sz) ;
931                                 Wx [k1] = wx ;
932                                 Wz [k1] = wz ;
933 
934                                 /* W [k1] /= conj (U(k1,k1)) ; */
935                                 p = Up [k1] ;
936                                 s = Ux [p] ;
937                                 sz = Uz ? (-Uz [p]) : 0 ;
938                                 DIV (wx, wz, Wx [k1], Wz [k1], s, sz) ;
939                                 Wx [k1] = wx ;
940                                 Wz [k1] = wz ;
941 
942                             }
943                             else
944                             {
945 
946                                 /* ------------------------------------------ */
947                                 /* W = U'\W and then W=L'\W */
948                                 /* ------------------------------------------ */
949 
950                                 /* W = U'\W */
951                                 for (k = k1 ; k < k2 ; k++)
952                                 {
953                                     pend = Up [k+1] - 1 ;
954                                     /* w = W [k] */
955                                     wx = Wx [k] ;
956                                     wz = Wz [k] ;
957                                     for (p = Up [k] ; p < pend ; p++)
958                                     {
959                                         i = Ui [p] ;
960                                         /* w -= W [i] * conj(U [p]) */
961                                         z = Uz ? Uz [p] : 0 ;
962                                         MULT_SUB_CONJ (wx, wz,
963                                             Wx [i], Wz [i], Ux [p], z) ;
964                                     }
965                                     /* W [k] = w / conj(ukk) ; */
966                                     ukk = Ux [pend] ;
967                                     ukkz = Uz ? (-Uz [pend]) : 0 ;
968                                     DIV (Wx [k], Wz [k], wx, wz, ukk, ukkz) ;
969                                 }
970 
971                                 /* W = L'\W */
972                                 for (k = k2-1 ; k >= k1 ; k--)
973                                 {
974                                     p = Lp [k] ;
975                                     pend = Lp [k+1] ;
976                                     /* w = W [k] */
977                                     wx = Wx [k] ;
978                                     wz = Wz [k] ;
979                                     lkk = Lx [p] ;
980                                     lkkz = Lz ? (-Lz [p]) : 0 ;
981                                     for (p++ ; p < pend ; p++)
982                                     {
983                                         i = Li [p] ;
984                                         /* w -= W [i] * conj (Lx [p]) ; */
985                                         z = Lz ? Lz [p] : 0 ;
986                                         MULT_SUB_CONJ (wx, wz,
987                                             Wx [i], Wz [i], Lx [p], z) ;
988                                     }
989                                     /* W [k] = w / conj(lkk) ; */
990                                     DIV (Wx [k], Wz [k], wx, wz, lkk, lkkz) ;
991                                 }
992                             }
993                         }
994 
995                         /* -------------------------------------------------- */
996                         /* scale, permute, and tranpose: X = (P*(R\W))' */
997                         /* -------------------------------------------------- */
998 
999                         if (Rs == NULL)
1000                         {
1001                             /* no scaling */
1002                             for (k = 0 ; k < n ; k++)
1003                             {
1004                                 i = P ? (P [k] - 1) : k ;
1005                                 Xx [i*nrhs] = Wx [k] ;
1006                                 Xz [i*nrhs] = Wz ? (-Wz [k]) : 0 ;
1007                             }
1008                         }
1009                         else
1010                         {
1011                             /* with scaling */
1012                             for (k = 0 ; k < n ; k++)
1013                             {
1014                                 i = P ? (P [k] - 1) : k ;
1015                                 rs = Rs [k] ;
1016                                 Xx [i*nrhs] = Wx [k] / rs ;
1017                                 Xz [i*nrhs] = Wz ? (-Wz [k] / rs) : 0 ;
1018                             }
1019                         }
1020 
1021                         /* -------------------------------------------------- */
1022                         /* go to the next row of B and X */
1023                         /* -------------------------------------------------- */
1024 
1025                         Xx++ ;
1026                         Xz++ ;
1027                         Bx++ ;
1028                         if (Bz) Bz++ ;
1029                     }
1030 
1031                 }
1032                 else
1033                 {
1034 
1035                     for (chunk = 0 ; chunk < nrhs ; chunk++)
1036                     {
1037 
1038                         /* -------------------------------------------------- */
1039                         /* scale and permute the right hand side, W = P*(R\B) */
1040                         /* -------------------------------------------------- */
1041 
1042                         if (Rs == NULL)
1043                         {
1044                             /* no scaling */
1045                             for (k = 0 ; k < n ; k++)
1046                             {
1047                                 i = P ? (P [k] - 1) : k ;
1048                                 Wx [k] = Bx [i] ;
1049                                 Wz [k] = Bz ? Bz [i] : 0 ;
1050                             }
1051                         }
1052                         else
1053                         {
1054                             /* with scaling */
1055                             for (k = 0 ; k < n ; k++)
1056                             {
1057                                 i = P ? (P [k] - 1) : k ;
1058                                 rs = Rs [k] ;
1059                                 Wx [k] = Bx [i] / rs ;
1060                                 Wz [k] = Bz ? (Bz [i] / rs) : 0 ;
1061                             }
1062                         }
1063 
1064                         /* -------------------------------------------------- */
1065                         /* solve W = (L*U + Off)\W */
1066                         /* -------------------------------------------------- */
1067 
1068                         for (block = nblocks-1 ; block >= 0 ; block--)
1069                         {
1070 
1071                             /* ---------------------------------------------- */
1072                             /* block of size nk, rows/columns k1 to k2-1 */
1073                             /* ---------------------------------------------- */
1074 
1075                             k1 = R [block] - 1 ;        /* R is 1-based */
1076                             k2 = R [block+1] - 1 ;
1077                             nk = k2 - k1 ;
1078 
1079                             /* solve the block system */
1080                             if (nk == 1)
1081                             {
1082 
1083                                 /* W [k1] /= L(k1,k1) ; */
1084                                 p = Lp [k1] ;
1085                                 s = Lx [p] ;
1086                                 sz = Lz ? Lz [p] : 0 ;
1087                                 DIV (wx, wz, Wx [k1], Wz [k1], s, sz) ;
1088                                 Wx [k1] = wx ;
1089                                 Wz [k1] = wz ;
1090 
1091                                 /* W [k1] /= U(k1,k1) ; */
1092                                 p = Up [k1] ;
1093                                 s = Ux [p] ;
1094                                 sz = Uz ? Uz [p] : 0 ;
1095                                 DIV (wx, wz, Wx [k1], Wz [k1], s, sz) ;
1096                                 Wx [k1] = wx ;
1097                                 Wz [k1] = wz ;
1098 
1099                             }
1100                             else
1101                             {
1102 
1103                                 /* ------------------------------------------ */
1104                                 /* W = L\W and then W=U\W */
1105                                 /* ------------------------------------------ */
1106 
1107                                 /* W = L\W */
1108                                 for (k = k1 ; k < k2 ; k++)
1109                                 {
1110                                     p = Lp [k] ;
1111                                     pend = Lp [k+1] ;
1112                                     lkk = Lx [p] ;
1113                                     lkkz = Lz ? Lz [p] : 0 ;
1114                                     /* w = W [k] / lkk ; */
1115                                     DIV (wx, wz, Wx [k], Wz [k], lkk, lkkz) ;
1116                                     Wx [k] = wx ;
1117                                     Wz [k] = wz ;
1118                                     for (p++ ; p < pend ; p++)
1119                                     {
1120                                         i = Li [p] ;
1121                                         /* W [i] -= Lx [p] * w ; */
1122                                         z = Lz ? Lz [p] : 0 ;
1123                                         MULT_SUB (Wx [i], Wz [i], Lx [p], z,
1124                                             wx, wz) ;
1125                                     }
1126                                 }
1127 
1128                                 /* W = U\W */
1129                                 for (k = k2-1 ; k >= k1 ; k--)
1130                                 {
1131                                     pend = Up [k+1] - 1 ;
1132                                     ukk = Ux [pend] ;
1133                                     ukkz = Uz ? Uz [pend] : 0 ;
1134                                     /* w = W [k] / ukk ; */
1135                                     DIV (wx, wz, Wx [k], Wz [k], ukk, ukkz) ;
1136                                     Wx [k] = wx ;
1137                                     Wz [k] = wz ;
1138                                     for (p = Up [k] ; p < pend ; p++)
1139                                     {
1140                                         i = Ui [p] ;
1141                                         /* W [i] -= U [p] * w ; */
1142                                         z = Uz ? Uz [p] : 0 ;
1143                                         MULT_SUB (Wx [i], Wz [i], Ux [p], z,
1144                                             wx, wz) ;
1145                                     }
1146                                 }
1147                             }
1148 
1149                             /* ---------------------------------------------- */
1150                             /* block back-substitution for off-diagonal-block */
1151                             /* ---------------------------------------------- */
1152 
1153                             if (block > 0 && Offp != NULL)
1154                             {
1155                                 for (k = k1 ; k < k2 ; k++)
1156                                 {
1157                                     pend = Offp [k+1] ;
1158                                     wx = Wx [k] ;
1159                                     wz = Wz [k] ;
1160                                     for (p = Offp [k] ; p < pend ; p++)
1161                                     {
1162                                         i = Offi [p] ;
1163                                         /* W [Offi [p]] -= Offx [p] * w ; */
1164                                         z = Offz ? Offz [p] : 0 ;
1165                                         MULT_SUB (Wx [i], Wz [i], Offx [p], z,
1166                                             wx, wz) ;
1167                                     }
1168                                 }
1169                             }
1170                         }
1171 
1172                         /* -------------------------------------------------- */
1173                         /* permute the result, X = Q*W */
1174                         /* -------------------------------------------------- */
1175 
1176                         for (k = 0 ; k < n ; k++)
1177                         {
1178                             i = Q ? (Q [k] - 1) : k ;
1179                             Xx [i] = Wx [k] ;
1180                             Xz [i] = Wz [k] ;
1181                         }
1182 
1183                         /* -------------------------------------------------- */
1184                         /* go to the next column of B and X */
1185                         /* -------------------------------------------------- */
1186 
1187                         Xx += n ;
1188                         Xz += n ;
1189                         Bx += n ;
1190                         if (Bz) Bz += n ;
1191                     }
1192                 }
1193 
1194                 /* free workspace */
1195                 mxFree (Wx) ;
1196                 mxFree (Wz) ;
1197 
1198             }
1199             else
1200             {
1201 
1202                 /* ========================================================== */
1203                 /* === real case ============================================ */
1204                 /* ========================================================== */
1205 
1206                 /* create X */
1207                 if (do_transpose)
1208                 {
1209                     pargout [0] = mxCreateDoubleMatrix (nrhs, n, mxREAL) ;
1210                 }
1211                 else
1212                 {
1213                     pargout [0] = mxCreateDoubleMatrix (n, nrhs, mxREAL) ;
1214                 }
1215 
1216                 Xx = mxGetPr (pargout [0]) ;
1217                 Bx = mxGetPr (B_matlab) ;
1218 
1219                 if (do_transpose)
1220                 {
1221 
1222                     /* ------------------------------------------------------ */
1223                     /* solve in chunks of one row at a time */
1224                     /* ------------------------------------------------------ */
1225 
1226                     /* get workspace */
1227                     Wx = mxMalloc (n * sizeof (double)) ;
1228 
1229                     for (chunk = 0 ; chunk < nrhs ; chunk++)
1230                     {
1231 
1232                         /* -------------------------------------------------- */
1233                         /* transpose and permute right hand side, W = Q'*B' */
1234                         /* -------------------------------------------------- */
1235 
1236                         for (k = 0 ; k < n ; k++)
1237                         {
1238                             i = Q ? (Q [k] - 1) : k ;
1239                             Wx [k] = Bx [i*nrhs] ;
1240                         }
1241 
1242                         /* -------------------------------------------------- */
1243                         /* solve W = (L*U + Off)'\W */
1244                         /* -------------------------------------------------- */
1245 
1246                         for (block = 0 ; block < nblocks ; block++)
1247                         {
1248 
1249                             /* ---------------------------------------------- */
1250                             /* block of size nk, rows/columns k1 to k2-1 */
1251                             /* ---------------------------------------------- */
1252 
1253                             k1 = R [block] - 1 ;        /* R is 1-based */
1254                             k2 = R [block+1] - 1 ;
1255                             nk = k2 - k1 ;
1256 
1257                             /* ---------------------------------------------- */
1258                             /* block back-substitution for off-diagonal-block */
1259                             /* ---------------------------------------------- */
1260 
1261                             if (block > 0 && Offp != NULL)
1262                             {
1263                                 for (k = k1 ; k < k2 ; k++)
1264                                 {
1265                                     pend = Offp [k+1] ;
1266                                     for (p = Offp [k] ; p < pend ; p++)
1267                                     {
1268                                         Wx [k] -= Wx [Offi [p]] * Offx [p] ;
1269                                     }
1270                                 }
1271                             }
1272 
1273                             /* solve the block system */
1274                             if (nk == 1)
1275                             {
1276                                 Wx [k1] /= Lx [Lp [k1]] ;
1277                                 Wx [k1] /= Ux [Up [k1]] ;
1278                             }
1279                             else
1280                             {
1281 
1282                                 /* ------------------------------------------ */
1283                                 /* W = U'\W and then W=L'\W */
1284                                 /* ------------------------------------------ */
1285 
1286                                 /* W = U'\W */
1287                                 for (k = k1 ; k < k2 ; k++)
1288                                 {
1289                                     pend = Up [k+1] - 1 ;
1290                                     for (p = Up [k] ; p < pend ; p++)
1291                                     {
1292                                         Wx [k] -= Wx [Ui [p]] * Ux [p] ;
1293                                     }
1294                                     Wx [k] /= Ux [pend] ;
1295                                 }
1296 
1297                                 /* W = L'\W */
1298                                 for (k = k2-1 ; k >= k1 ; k--)
1299                                 {
1300                                     p = Lp [k] ;
1301                                     pend = Lp [k+1] ;
1302                                     lkk = Lx [p] ;
1303                                     for (p++ ; p < pend ; p++)
1304                                     {
1305                                         Wx [k] -= Wx [Li [p]] * Lx [p] ;
1306                                     }
1307                                     Wx [k] /= lkk ;
1308                                 }
1309                             }
1310                         }
1311 
1312                         /* -------------------------------------------------- */
1313                         /* scale, permute, and tranpose: X = (P*(R\W))' */
1314                         /* -------------------------------------------------- */
1315 
1316                         if (Rs == NULL)
1317                         {
1318                             /* no scaling */
1319                             for (k = 0 ; k < n ; k++)
1320                             {
1321                                 i = P ? (P [k] - 1) : k ;
1322                                 Xx [i*nrhs] = Wx [k] ;
1323                             }
1324                         }
1325                         else
1326                         {
1327                             /* with scaling */
1328                             for (k = 0 ; k < n ; k++)
1329                             {
1330                                 i = P ? (P [k] - 1) : k ;
1331                                 rs = Rs [k] ;
1332                                 Xx [i*nrhs] = Wx [k] / rs ;
1333                             }
1334                         }
1335 
1336                         /* -------------------------------------------------- */
1337                         /* go to the next row of B and X */
1338                         /* -------------------------------------------------- */
1339 
1340                         Xx++ ;
1341                         Bx++ ;
1342                     }
1343 
1344                 }
1345                 else
1346                 {
1347 
1348                     /* ------------------------------------------------------ */
1349                     /* solve in chunks of 4 columns at a time */
1350                     /* ------------------------------------------------------ */
1351 
1352                     /* get workspace */
1353                     Wx = mxMalloc (n * MAX (4, nrhs) * sizeof (double)) ;
1354 
1355                     for (chunk = 0 ; chunk < nrhs ; chunk += 4)
1356                     {
1357                         /* -------------------------------------------------- */
1358                         /* get the size of the current chunk */
1359                         /* -------------------------------------------------- */
1360 
1361                         nr = MIN (nrhs - chunk, 4) ;
1362 
1363                         /* -------------------------------------------------- */
1364                         /* scale and permute the right hand side, W = P*(R\B) */
1365                         /* -------------------------------------------------- */
1366 
1367                         if (Rs == NULL)
1368                         {
1369 
1370                             /* no scaling */
1371                             switch (nr)
1372                             {
1373 
1374                                 case 1:
1375 
1376                                     for (k = 0 ; k < n ; k++)
1377                                     {
1378                                         i = P ? (P [k] - 1) : k ;
1379                                         Wx [k] = Bx [i] ;
1380                                     }
1381                                     break ;
1382 
1383                                 case 2:
1384 
1385                                     for (k = 0 ; k < n ; k++)
1386                                     {
1387                                         i = P ? (P [k] - 1) : k ;
1388                                         Wx [2*k    ] = Bx [i      ] ;
1389                                         Wx [2*k + 1] = Bx [i + n  ] ;
1390                                     }
1391                                     break ;
1392 
1393                                 case 3:
1394 
1395                                     for (k = 0 ; k < n ; k++)
1396                                     {
1397                                         i = P ? (P [k] - 1) : k ;
1398                                         Wx [3*k    ] = Bx [i      ] ;
1399                                         Wx [3*k + 1] = Bx [i + n  ] ;
1400                                         Wx [3*k + 2] = Bx [i + n*2] ;
1401                                     }
1402                                     break ;
1403 
1404                                 case 4:
1405 
1406                                     for (k = 0 ; k < n ; k++)
1407                                     {
1408                                         i = P ? (P [k] - 1) : k ;
1409                                         Wx [4*k    ] = Bx [i      ] ;
1410                                         Wx [4*k + 1] = Bx [i + n  ] ;
1411                                         Wx [4*k + 2] = Bx [i + n*2] ;
1412                                         Wx [4*k + 3] = Bx [i + n*3] ;
1413                                     }
1414                                     break ;
1415                             }
1416 
1417                         }
1418                         else
1419                         {
1420 
1421                             switch (nr)
1422                             {
1423 
1424                                 case 1:
1425 
1426                                     for (k = 0 ; k < n ; k++)
1427                                     {
1428                                         i = P ? (P [k] - 1) : k ;
1429                                         rs = Rs [k] ;
1430                                         Wx [k] = Bx [i] / rs ;
1431                                     }
1432                                     break ;
1433 
1434                                 case 2:
1435 
1436                                     for (k = 0 ; k < n ; k++)
1437                                     {
1438                                         i = P ? (P [k] - 1) : k ;
1439                                         rs = Rs [k] ;
1440                                         Wx [2*k    ] = Bx [i      ] / rs ;
1441                                         Wx [2*k + 1] = Bx [i + n  ] / rs ;
1442                                     }
1443                                     break ;
1444 
1445                                 case 3:
1446 
1447                                     for (k = 0 ; k < n ; k++)
1448                                     {
1449                                         i = P ? (P [k] - 1) : k ;
1450                                         rs = Rs [k] ;
1451                                         Wx [3*k    ] = Bx [i      ] / rs ;
1452                                         Wx [3*k + 1] = Bx [i + n  ] / rs ;
1453                                         Wx [3*k + 2] = Bx [i + n*2] / rs ;
1454                                     }
1455                                     break ;
1456 
1457                                 case 4:
1458 
1459                                     for (k = 0 ; k < n ; k++)
1460                                     {
1461                                         i = P ? (P [k] - 1) : k ;
1462                                         rs = Rs [k] ;
1463                                         Wx [4*k    ] = Bx [i      ] / rs ;
1464                                         Wx [4*k + 1] = Bx [i + n  ] / rs ;
1465                                         Wx [4*k + 2] = Bx [i + n*2] / rs ;
1466                                         Wx [4*k + 3] = Bx [i + n*3] / rs ;
1467                                     }
1468                                     break ;
1469                             }
1470                         }
1471 
1472                         /* -------------------------------------------------- */
1473                         /* solve W = (L*U + Off)\W */
1474                         /* -------------------------------------------------- */
1475 
1476                         for (block = nblocks-1 ; block >= 0 ; block--)
1477                         {
1478 
1479                             /* ---------------------------------------------- */
1480                             /* block of size nk is rows/columns k1 to k2-1 */
1481                             /* ---------------------------------------------- */
1482 
1483                             k1 = R [block] - 1 ;            /* R is 1-based */
1484                             k2 = R [block+1] - 1 ;
1485                             nk = k2 - k1 ;
1486 
1487                             /* solve the block system */
1488                             if (nk == 1)
1489                             {
1490 
1491                                 /* this is not done if L comes from KLU, since
1492                                    in that case, L is unit lower triangular */
1493                                 s = Lx [Lp [k1]] ;
1494                                 if (s != 1.0) switch (nr)
1495                                 {
1496                                     case 1:
1497                                         Wx [k1] /= s ;
1498                                         break ;
1499                                     case 2:
1500                                         Wx [2*k1] /= s ;
1501                                         Wx [2*k1 + 1] /= s ;
1502                                         break ;
1503                                     case 3:
1504                                         Wx [3*k1] /= s ;
1505                                         Wx [3*k1 + 1] /= s ;
1506                                         Wx [3*k1 + 2] /= s ;
1507                                         break ;
1508                                     case 4:
1509                                         Wx [4*k1] /= s ;
1510                                         Wx [4*k1 + 1] /= s ;
1511                                         Wx [4*k1 + 2] /= s ;
1512                                         Wx [4*k1 + 3] /= s ;
1513                                         break ;
1514                                 }
1515 
1516                                 s = Ux [Up [k1]] ;
1517                                 if (s != 1.0) switch (nr)
1518                                 {
1519                                     case 1:
1520                                         Wx [k1] /= s ;
1521                                         break ;
1522                                     case 2:
1523                                         Wx [2*k1] /= s ;
1524                                         Wx [2*k1 + 1] /= s ;
1525                                         break ;
1526                                     case 3:
1527                                         Wx [3*k1] /= s ;
1528                                         Wx [3*k1 + 1] /= s ;
1529                                         Wx [3*k1 + 2] /= s ;
1530                                         break ;
1531                                     case 4:
1532                                         Wx [4*k1] /= s ;
1533                                         Wx [4*k1 + 1] /= s ;
1534                                         Wx [4*k1 + 2] /= s ;
1535                                         Wx [4*k1 + 3] /= s ;
1536                                         break ;
1537                                 }
1538 
1539                             }
1540                             else
1541                             {
1542 
1543                                 /* ------------------------------------------ */
1544                                 /* W = L\W and then W=U\W */
1545                                 /* ------------------------------------------ */
1546 
1547                                 switch (nr)
1548                                 {
1549 
1550                                     case 1:
1551                                         /* W = L\W */
1552                                         for (k = k1 ; k < k2 ; k++)
1553                                         {
1554                                             p = Lp [k] ;
1555                                             pend = Lp [k+1] ;
1556                                             lkk = Lx [p++] ;
1557                                             x [0] = Wx [k] / lkk ;
1558                                             Wx [k] = x [0] ;
1559                                             for ( ; p < pend ; p++)
1560                                             {
1561                                                 Wx [Li [p]] -= Lx [p] * x [0] ;
1562                                             }
1563                                         }
1564 
1565                                         /* W = U\W */
1566                                         for (k = k2-1 ; k >= k1 ; k--)
1567                                         {
1568                                             pend = Up [k+1] - 1 ;
1569                                             ukk = Ux [pend] ;
1570                                             x [0] = Wx [k] / ukk ;
1571                                             Wx [k] = x [0] ;
1572                                             for (p = Up [k] ; p < pend ; p++)
1573                                             {
1574                                                 Wx [Ui [p]] -= Ux [p] * x [0] ;
1575                                             }
1576                                         }
1577                                         break ;
1578 
1579                                     case 2:
1580 
1581                                         /* W = L\W */
1582                                         for (k = k1 ; k < k2 ; k++)
1583                                         {
1584                                             p = Lp [k] ;
1585                                             pend = Lp [k+1] ;
1586                                             lkk = Lx [p++] ;
1587                                             x [0] = Wx [2*k    ] / lkk ;
1588                                             x [1] = Wx [2*k + 1] / lkk ;
1589                                             Wx [2*k    ] = x [0] ;
1590                                             Wx [2*k + 1] = x [1] ;
1591                                             for ( ; p < pend ; p++)
1592                                             {
1593                                                 i = Li [p] ;
1594                                                 lik = Lx [p] ;
1595                                                 Wx [2*i    ] -= lik * x [0] ;
1596                                                 Wx [2*i + 1] -= lik * x [1] ;
1597                                             }
1598                                         }
1599 
1600                                         /* W = U\W */
1601                                         for (k = k2-1 ; k >= k1 ; k--)
1602                                         {
1603                                             pend = Up [k+1] - 1 ;
1604                                             ukk = Ux [pend] ;
1605                                             x [0] = Wx [2*k    ] / ukk ;
1606                                             x [1] = Wx [2*k + 1] / ukk ;
1607                                             Wx [2*k    ] = x [0] ;
1608                                             Wx [2*k + 1] = x [1] ;
1609                                             for (p = Up [k] ; p < pend ; p++)
1610                                             {
1611                                                 i = Ui [p] ;
1612                                                 uik = Ux [p] ;
1613                                                 Wx [2*i    ] -= uik * x [0] ;
1614                                                 Wx [2*i + 1] -= uik * x [1] ;
1615                                             }
1616                                         }
1617                                         break ;
1618 
1619                                     case 3:
1620 
1621                                         /* W = L\W */
1622                                         for (k = k1 ; k < k2 ; k++)
1623                                         {
1624                                             p = Lp [k] ;
1625                                             pend = Lp [k+1] ;
1626                                             lkk = Lx [p++] ;
1627                                             x [0] = Wx [3*k    ] / lkk ;
1628                                             x [1] = Wx [3*k + 1] / lkk ;
1629                                             x [2] = Wx [3*k + 2] / lkk ;
1630                                             Wx [3*k    ] = x [0] ;
1631                                             Wx [3*k + 1] = x [1] ;
1632                                             Wx [3*k + 2] = x [2] ;
1633                                             for ( ; p < pend ; p++)
1634                                             {
1635                                                 i = Li [p] ;
1636                                                 lik = Lx [p] ;
1637                                                 Wx [3*i    ] -= lik * x [0] ;
1638                                                 Wx [3*i + 1] -= lik * x [1] ;
1639                                                 Wx [3*i + 2] -= lik * x [2] ;
1640                                             }
1641                                         }
1642 
1643                                         /* W = U\W */
1644                                         for (k = k2-1 ; k >= k1 ; k--)
1645                                         {
1646                                             pend = Up [k+1] - 1 ;
1647                                             ukk = Ux [pend] ;
1648                                             x [0] = Wx [3*k    ] / ukk ;
1649                                             x [1] = Wx [3*k + 1] / ukk ;
1650                                             x [2] = Wx [3*k + 2] / ukk ;
1651                                             Wx [3*k    ] = x [0] ;
1652                                             Wx [3*k + 1] = x [1] ;
1653                                             Wx [3*k + 2] = x [2] ;
1654                                             for (p = Up [k] ; p < pend ; p++)
1655                                             {
1656                                                 i = Ui [p] ;
1657                                                 uik = Ux [p] ;
1658                                                 Wx [3*i    ] -= uik * x [0] ;
1659                                                 Wx [3*i + 1] -= uik * x [1] ;
1660                                                 Wx [3*i + 2] -= uik * x [2] ;
1661                                             }
1662                                         }
1663                                         break ;
1664 
1665                                     case 4:
1666 
1667                                         /* W = L\W */
1668                                         for (k = k1 ; k < k2 ; k++)
1669                                         {
1670                                             p = Lp [k] ;
1671                                             pend = Lp [k+1] ;
1672                                             lkk = Lx [p++] ;
1673                                             x [0] = Wx [4*k    ] / lkk ;
1674                                             x [1] = Wx [4*k + 1] / lkk ;
1675                                             x [2] = Wx [4*k + 2] / lkk ;
1676                                             x [3] = Wx [4*k + 3] / lkk ;
1677                                             Wx [4*k    ] = x [0] ;
1678                                             Wx [4*k + 1] = x [1] ;
1679                                             Wx [4*k + 2] = x [2] ;
1680                                             Wx [4*k + 3] = x [3] ;
1681                                             for ( ; p < pend ; p++)
1682                                             {
1683                                                 i = Li [p] ;
1684                                                 lik = Lx [p] ;
1685                                                 Wx [4*i    ] -= lik * x [0] ;
1686                                                 Wx [4*i + 1] -= lik * x [1] ;
1687                                                 Wx [4*i + 2] -= lik * x [2] ;
1688                                                 Wx [4*i + 3] -= lik * x [3] ;
1689                                             }
1690                                         }
1691 
1692                                         /* Wx = U\Wx */
1693                                         for (k = k2-1 ; k >= k1 ; k--)
1694                                         {
1695                                             pend = Up [k+1] - 1 ;
1696                                             ukk = Ux [pend] ;
1697                                             x [0] = Wx [4*k    ] / ukk ;
1698                                             x [1] = Wx [4*k + 1] / ukk ;
1699                                             x [2] = Wx [4*k + 2] / ukk ;
1700                                             x [3] = Wx [4*k + 3] / ukk ;
1701                                             Wx [4*k    ] = x [0] ;
1702                                             Wx [4*k + 1] = x [1] ;
1703                                             Wx [4*k + 2] = x [2] ;
1704                                             Wx [4*k + 3] = x [3] ;
1705                                             for (p = Up [k] ; p < pend ; p++)
1706                                             {
1707                                                 i = Ui [p] ;
1708                                                 uik = Ux [p] ;
1709                                                 Wx [4*i    ] -= uik * x [0] ;
1710                                                 Wx [4*i + 1] -= uik * x [1] ;
1711                                                 Wx [4*i + 2] -= uik * x [2] ;
1712                                                 Wx [4*i + 3] -= uik * x [3] ;
1713                                             }
1714                                         }
1715                                         break ;
1716                                 }
1717                             }
1718 
1719                             /* ---------------------------------------------- */
1720                             /* block back-substitution for off-diagonal-block */
1721                             /* ---------------------------------------------- */
1722 
1723                             if (block > 0 && Offp != NULL)
1724                             {
1725                                 switch (nr)
1726                                 {
1727 
1728                                     case 1:
1729 
1730                                         for (k = k1 ; k < k2 ; k++)
1731                                         {
1732                                             pend = Offp [k+1] ;
1733                                             x [0] = Wx [k] ;
1734                                             for (p = Offp [k] ; p < pend ; p++)
1735                                             {
1736                                                 Wx [Offi [p]] -= Offx[p] * x[0];
1737                                             }
1738                                         }
1739                                         break ;
1740 
1741                                     case 2:
1742 
1743                                         for (k = k1 ; k < k2 ; k++)
1744                                         {
1745                                             pend = Offp [k+1] ;
1746                                             x [0] = Wx [2*k    ] ;
1747                                             x [1] = Wx [2*k + 1] ;
1748                                             for (p = Offp [k] ; p < pend ; p++)
1749                                             {
1750                                                 i = Offi [p] ;
1751                                                 offik = Offx [p] ;
1752                                                 Wx [2*i    ] -= offik * x [0] ;
1753                                                 Wx [2*i + 1] -= offik * x [1] ;
1754                                             }
1755                                         }
1756                                         break ;
1757 
1758                                     case 3:
1759 
1760                                         for (k = k1 ; k < k2 ; k++)
1761                                         {
1762                                             pend = Offp [k+1] ;
1763                                             x [0] = Wx [3*k    ] ;
1764                                             x [1] = Wx [3*k + 1] ;
1765                                             x [2] = Wx [3*k + 2] ;
1766                                             for (p = Offp [k] ; p < pend ; p++)
1767                                             {
1768                                                 i = Offi [p] ;
1769                                                 offik = Offx [p] ;
1770                                                 Wx [3*i    ] -= offik * x [0] ;
1771                                                 Wx [3*i + 1] -= offik * x [1] ;
1772                                                 Wx [3*i + 2] -= offik * x [2] ;
1773                                             }
1774                                         }
1775                                         break ;
1776 
1777                                     case 4:
1778 
1779                                         for (k = k1 ; k < k2 ; k++)
1780                                         {
1781                                             pend = Offp [k+1] ;
1782                                             x [0] = Wx [4*k    ] ;
1783                                             x [1] = Wx [4*k + 1] ;
1784                                             x [2] = Wx [4*k + 2] ;
1785                                             x [3] = Wx [4*k + 3] ;
1786                                             for (p = Offp [k] ; p < pend ; p++)
1787                                             {
1788                                                 i = Offi [p] ;
1789                                                 offik = Offx [p] ;
1790                                                 Wx [4*i    ] -= offik * x [0] ;
1791                                                 Wx [4*i + 1] -= offik * x [1] ;
1792                                                 Wx [4*i + 2] -= offik * x [2] ;
1793                                                 Wx [4*i + 3] -= offik * x [3] ;
1794                                             }
1795                                         }
1796                                         break ;
1797                                 }
1798                             }
1799                         }
1800 
1801                         /* -------------------------------------------------- */
1802                         /* permute the result, X = Q*W */
1803                         /* -------------------------------------------------- */
1804 
1805                         switch (nr)
1806                         {
1807 
1808                             case 1:
1809 
1810                                 for (k = 0 ; k < n ; k++)
1811                                 {
1812                                     i = Q ? (Q [k] - 1) : k ;
1813                                     Xx [i] = Wx [k] ;
1814                                 }
1815                                 break ;
1816 
1817                             case 2:
1818 
1819                                 for (k = 0 ; k < n ; k++)
1820                                 {
1821                                     i = Q ? (Q [k] - 1) : k ;
1822                                     Xx [i      ] = Wx [2*k    ] ;
1823                                     Xx [i + n  ] = Wx [2*k + 1] ;
1824                                 }
1825                                 break ;
1826 
1827                             case 3:
1828 
1829                                 for (k = 0 ; k < n ; k++)
1830                                 {
1831                                     i = Q ? (Q [k] - 1) : k ;
1832                                     Xx [i      ] = Wx [3*k    ] ;
1833                                     Xx [i + n  ] = Wx [3*k + 1] ;
1834                                     Xx [i + n*2] = Wx [3*k + 2] ;
1835                                 }
1836                                 break ;
1837 
1838                             case 4:
1839 
1840                                 for (k = 0 ; k < n ; k++)
1841                                 {
1842                                     i = Q ? (Q [k] - 1) : k ;
1843                                     Xx [i      ] = Wx [4*k    ] ;
1844                                     Xx [i + n  ] = Wx [4*k + 1] ;
1845                                     Xx [i + n*2] = Wx [4*k + 2] ;
1846                                     Xx [i + n*3] = Wx [4*k + 3] ;
1847                                 }
1848                                 break ;
1849                         }
1850 
1851                         /* -------------------------------------------------- */
1852                         /* go to the next chunk of B and X */
1853                         /* -------------------------------------------------- */
1854 
1855                         Xx += n*4 ;
1856                         Bx += n*4 ;
1857                     }
1858                 }
1859 
1860                 /* free workspace */
1861                 mxFree (Wx) ;
1862             }
1863 
1864         }
1865 
1866     }
1867     else
1868     {
1869 
1870         /* ------------------------------------------------------------------ */
1871         /* LU = klu (A) usage; extract factorization */
1872         /* ------------------------------------------------------------------ */
1873 
1874         /* sort the row indices in each column of L and U */
1875         if (A_complex)
1876         {
1877             klu_zl_sort (Symbolic, Numeric, &Common) ;
1878         }
1879         else
1880         {
1881             klu_l_sort (Symbolic, Numeric, &Common) ;
1882         }
1883 
1884         /* L */
1885         L_matlab = mxCreateSparse (n, n, Numeric->lnz,
1886             A_complex ? mxCOMPLEX: mxREAL) ;
1887         Lp = (Long *) mxGetJc (L_matlab) ;
1888         Li = (Long *) mxGetIr (L_matlab) ;
1889         Lx = mxGetPr (L_matlab) ;
1890         Lz = mxGetPi (L_matlab) ;
1891 
1892         /* U */
1893         U_matlab = mxCreateSparse (n, n, Numeric->unz,
1894             A_complex ? mxCOMPLEX: mxREAL) ;
1895         Up = (Long *) mxGetJc (U_matlab) ;
1896         Ui = (Long *) mxGetIr (U_matlab) ;
1897         Ux = mxGetPr (U_matlab) ;
1898         Uz = mxGetPi (U_matlab) ;
1899 
1900         /* p */
1901         p_matlab = mxCreateNumericMatrix (1, n, mx_int, mxREAL) ;
1902         P = (Long *) mxGetData (p_matlab) ;
1903 
1904         /* q */
1905         q_matlab = mxCreateNumericMatrix (1, n, mx_int, mxREAL) ;
1906         Q = (Long *) mxGetData (q_matlab) ;
1907 
1908         /* R, as a sparse diagonal matrix */
1909         R_matlab = mxCreateSparse (n, n, n+1, mxREAL) ;
1910         Rp = (Long *) mxGetJc (R_matlab) ;
1911         Ri = (Long *) mxGetIr (R_matlab) ;
1912         Rs = mxGetPr (R_matlab) ;
1913         for (k = 0 ; k <= n ; k++)
1914         {
1915             Rp [k] = k ;
1916             Ri [k] = k ;
1917         }
1918 
1919         /* F, off diagonal blocks */
1920         F_matlab = mxCreateSparse (n, n, Numeric->nzoff,
1921             A_complex ? mxCOMPLEX: mxREAL) ;
1922         Offp = (Long *) mxGetJc (F_matlab) ;
1923         Offi = (Long *) mxGetIr (F_matlab) ;
1924         Offx = mxGetPr (F_matlab) ;
1925         Offz = mxGetPi (F_matlab) ;
1926 
1927         /* r, block boundaries */
1928         nblocks = Symbolic->nblocks ;
1929         r_matlab = mxCreateNumericMatrix (1, nblocks+1, mx_int, mxREAL) ;
1930         R = (Long *) mxGetData (r_matlab) ;
1931 
1932         /* extract the LU factorization from KLU Numeric and Symbolic objects */
1933         if (A_complex)
1934         {
1935             klu_zl_extract (Numeric, Symbolic, Lp, Li, Lx, Lz, Up, Ui, Ux, Uz,
1936                 Offp, Offi, Offx, Offz, P, Q, Rs, R, &Common) ;
1937         }
1938         else
1939         {
1940             klu_l_extract (Numeric, Symbolic, Lp, Li, Lx, Up, Ui, Ux,
1941                 Offp, Offi, Offx, P, Q, Rs, R, &Common) ;
1942         }
1943 
1944         /* fix p and q for 1-based indexing */
1945         for (k = 0 ; k < n ; k++)
1946         {
1947             P [k]++ ;
1948             Q [k]++ ;
1949         }
1950 
1951         /* fix r for 1-based indexing */
1952         for (k = 0 ; k <= nblocks ; k++)
1953         {
1954             R [k]++ ;
1955         }
1956 
1957         /* create output LU struct */
1958         pargout [0] = mxCreateStructMatrix (1, 1, 7, LUnames) ;
1959         mxSetFieldByNumber (pargout [0], 0, 0, L_matlab) ;
1960         mxSetFieldByNumber (pargout [0], 0, 1, U_matlab) ;
1961         mxSetFieldByNumber (pargout [0], 0, 2, p_matlab) ;
1962         mxSetFieldByNumber (pargout [0], 0, 3, q_matlab) ;
1963         mxSetFieldByNumber (pargout [0], 0, 4, R_matlab) ;
1964         mxSetFieldByNumber (pargout [0], 0, 5, F_matlab) ;
1965         mxSetFieldByNumber (pargout [0], 0, 6, r_matlab) ;
1966 
1967         /* ------------------------------------------------------------------ */
1968         /* free Symbolic and Numeric objects */
1969         /* ------------------------------------------------------------------ */
1970 
1971         klu_l_free_symbolic (&Symbolic, &Common) ;
1972         klu_l_free_numeric (&Numeric, &Common) ;
1973     }
1974 }
1975