1 /*! \file
2 Copyright (c) 2003, The Regents of the University of California, through
3 Lawrence Berkeley National Laboratory (subject to receipt of any required
4 approvals from U.S. Dept. of Energy)
5 
6 All rights reserved.
7 
8 The source code is distributed under BSD license, see the file License.txt
9 at the top-level directory.
10 */
11 
12 /*! @file cmemory.c
13  * \brief Memory details
14  *
15  * <pre>
16  * -- SuperLU routine (version 4.0) --
17  * Lawrence Berkeley National Laboratory.
18  * June 30, 2009
19  * </pre>
20  */
21 #include "slu_cdefs.h"
22 
23 
24 /* Internal prototypes */
25 void  *cexpand (int *, MemType,int, int, GlobalLU_t *);
26 int   cLUWorkInit (int, int, int, int **, complex **, GlobalLU_t *);
27 void  copy_mem_complex (int, void *, void *);
28 void  cStackCompress (GlobalLU_t *);
29 void  cSetupSpace (void *, int, GlobalLU_t *);
30 void  *cuser_malloc (int, int, GlobalLU_t *);
31 void  cuser_free (int, int, GlobalLU_t *);
32 
33 /* External prototypes (in memory.c - prec-independent) */
34 extern void    copy_mem_int    (int, void *, void *);
35 extern void    user_bcopy      (char *, char *, int);
36 
37 
38 /* Macros to manipulate stack */
39 #define StackFull(x)         ( x + Glu->stack.used >= Glu->stack.size )
40 #define NotDoubleAlign(addr) ( (intptr_t)addr & 7 )
41 #define DoubleAlign(addr)    ( ((intptr_t)addr + 7) & ~7L )
42 #define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
43 			      (w + 1) * m * sizeof(complex) )
44 #define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
45 
46 
47 
48 
49 /*! \brief Setup the memory model to be used for factorization.
50  *
51  *    lwork = 0: use system malloc;
52  *    lwork > 0: use user-supplied work[] space.
53  */
cSetupSpace(void * work,int lwork,GlobalLU_t * Glu)54 void cSetupSpace(void *work, int lwork, GlobalLU_t *Glu)
55 {
56     if ( lwork == 0 ) {
57 	Glu->MemModel = SYSTEM; /* malloc/free */
58     } else if ( lwork > 0 ) {
59 	Glu->MemModel = USER;   /* user provided space */
60 	Glu->stack.used = 0;
61 	Glu->stack.top1 = 0;
62 	Glu->stack.top2 = (lwork/4)*4; /* must be word addressable */
63 	Glu->stack.size = Glu->stack.top2;
64 	Glu->stack.array = (void *) work;
65     }
66 }
67 
68 
69 
cuser_malloc(int bytes,int which_end,GlobalLU_t * Glu)70 void *cuser_malloc(int bytes, int which_end, GlobalLU_t *Glu)
71 {
72     void *buf;
73 
74     if ( StackFull(bytes) ) return (NULL);
75 
76     if ( which_end == HEAD ) {
77 	buf = (char*) Glu->stack.array + Glu->stack.top1;
78 	Glu->stack.top1 += bytes;
79     } else {
80 	Glu->stack.top2 -= bytes;
81 	buf = (char*) Glu->stack.array + Glu->stack.top2;
82     }
83 
84     Glu->stack.used += bytes;
85     return buf;
86 }
87 
88 
cuser_free(int bytes,int which_end,GlobalLU_t * Glu)89 void cuser_free(int bytes, int which_end, GlobalLU_t *Glu)
90 {
91     if ( which_end == HEAD ) {
92 	Glu->stack.top1 -= bytes;
93     } else {
94 	Glu->stack.top2 += bytes;
95     }
96     Glu->stack.used -= bytes;
97 }
98 
99 
100 
101 /*! \brief
102  *
103  * <pre>
104  * mem_usage consists of the following fields:
105  *    - for_lu (float)
106  *      The amount of space used in bytes for the L\U data structures.
107  *    - total_needed (float)
108  *      The amount of space needed in bytes to perform factorization.
109  * </pre>
110  */
cQuerySpace(SuperMatrix * L,SuperMatrix * U,mem_usage_t * mem_usage)111 int cQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
112 {
113     SCformat *Lstore;
114     NCformat *Ustore;
115     register int n, iword, dword, panel_size = sp_ienv(1);
116 
117     Lstore = L->Store;
118     Ustore = U->Store;
119     n = L->ncol;
120     iword = sizeof(int);
121     dword = sizeof(complex);
122 
123     /* For LU factors */
124     mem_usage->for_lu = (float)( (4.0*n + 3.0) * iword +
125                                  Lstore->nzval_colptr[n] * dword +
126                                  Lstore->rowind_colptr[n] * iword );
127     mem_usage->for_lu += (float)( (n + 1.0) * iword +
128 				 Ustore->colptr[n] * (dword + iword) );
129 
130     /* Working storage to support factorization */
131     mem_usage->total_needed = mem_usage->for_lu +
132 	(float)( (2.0 * panel_size + 4.0 + NO_MARKER) * n * iword +
133 		(panel_size + 1.0) * n * dword );
134 
135     return 0;
136 } /* cQuerySpace */
137 
138 
139 /*! \brief
140  *
141  * <pre>
142  * mem_usage consists of the following fields:
143  *    - for_lu (float)
144  *      The amount of space used in bytes for the L\U data structures.
145  *    - total_needed (float)
146  *      The amount of space needed in bytes to perform factorization.
147  * </pre>
148  */
ilu_cQuerySpace(SuperMatrix * L,SuperMatrix * U,mem_usage_t * mem_usage)149 int ilu_cQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
150 {
151     SCformat *Lstore;
152     NCformat *Ustore;
153     register int n, panel_size = sp_ienv(1);
154     register float iword, dword;
155 
156     Lstore = L->Store;
157     Ustore = U->Store;
158     n = L->ncol;
159     iword = sizeof(int);
160     dword = sizeof(double);
161 
162     /* For LU factors */
163     mem_usage->for_lu = (float)( (4.0f * n + 3.0f) * iword +
164 				 Lstore->nzval_colptr[n] * dword +
165 				 Lstore->rowind_colptr[n] * iword );
166     mem_usage->for_lu += (float)( (n + 1.0f) * iword +
167 				 Ustore->colptr[n] * (dword + iword) );
168 
169     /* Working storage to support factorization.
170        ILU needs 5*n more integers than LU */
171     mem_usage->total_needed = mem_usage->for_lu +
172 	(float)( (2.0f * panel_size + 9.0f + NO_MARKER) * n * iword +
173 		(panel_size + 1.0f) * n * dword );
174 
175     return 0;
176 } /* ilu_cQuerySpace */
177 
178 
179 /*! \brief Allocate storage for the data structures common to all factor routines.
180  *
181  * <pre>
182  * For those unpredictable size, estimate as fill_ratio * nnz(A).
183  * Return value:
184  *     If lwork = -1, return the estimated amount of space required, plus n;
185  *     otherwise, return the amount of space actually allocated when
186  *     memory allocation failure occurred.
187  * </pre>
188  */
189 int
cLUMemInit(fact_t fact,void * work,int lwork,int m,int n,int annz,int panel_size,float fill_ratio,SuperMatrix * L,SuperMatrix * U,GlobalLU_t * Glu,int ** iwork,complex ** dwork)190 cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
191 	  int panel_size, float fill_ratio, SuperMatrix *L, SuperMatrix *U,
192           GlobalLU_t *Glu, int **iwork, complex **dwork)
193 {
194     int      info, iword, dword;
195     SCformat *Lstore;
196     NCformat *Ustore;
197     int      *xsup, *supno;
198     int      *lsub, *xlsub;
199     complex   *lusup;
200     int      *xlusup;
201     complex   *ucol;
202     int      *usub, *xusub;
203     int      nzlmax, nzumax, nzlumax;
204 
205     iword     = sizeof(int);
206     dword     = sizeof(complex);
207     Glu->n    = n;
208     Glu->num_expansions = 0;
209 
210     Glu->expanders = (ExpHeader *) SUPERLU_MALLOC( NO_MEMTYPE *
211                                                      sizeof(ExpHeader) );
212     if ( !Glu->expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
213 
214     if ( fact != SamePattern_SameRowPerm ) {
215 	/* Guess for L\U factors */
216 	nzumax = nzlumax = fill_ratio * annz;
217 	nzlmax = SUPERLU_MAX(1, fill_ratio/4.) * annz;
218 
219 	if ( lwork == -1 ) {
220 	    return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
221 		    + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
222         } else {
223 	    cSetupSpace(work, lwork, Glu);
224 	}
225 
226 #if ( PRNTlevel >= 1 )
227 	printf("cLUMemInit() called: fill_ratio %.0f, nzlmax %ld, nzumax %ld\n",
228 	       fill_ratio, nzlmax, nzumax);
229 	fflush(stdout);
230 #endif
231 
232 	/* Integer pointers for L\U factors */
233 	if ( Glu->MemModel == SYSTEM ) {
234 	    xsup   = intMalloc(n+1);
235 	    supno  = intMalloc(n+1);
236 	    xlsub  = intMalloc(n+1);
237 	    xlusup = intMalloc(n+1);
238 	    xusub  = intMalloc(n+1);
239 	} else {
240 	    xsup   = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
241 	    supno  = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
242 	    xlsub  = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
243 	    xlusup = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
244 	    xusub  = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
245 	}
246 
247 	lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
248 	ucol  = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
249 	lsub  = (int *)    cexpand( &nzlmax, LSUB, 0, 0, Glu );
250 	usub  = (int *)    cexpand( &nzumax, USUB, 0, 1, Glu );
251 
252 	while ( !lusup || !ucol || !lsub || !usub ) {
253 	    if ( Glu->MemModel == SYSTEM ) {
254 		SUPERLU_FREE(lusup);
255 		SUPERLU_FREE(ucol);
256 		SUPERLU_FREE(lsub);
257 		SUPERLU_FREE(usub);
258 	    } else {
259 		cuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword,
260                             HEAD, Glu);
261 	    }
262 	    nzlumax /= 2;
263 	    nzumax /= 2;
264 	    nzlmax /= 2;
265 	    if ( nzlumax < annz ) {
266 		printf("Not enough memory to perform factorization.\n");
267 		return (cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
268 	    }
269 #if ( PRNTlevel >= 1)
270 	    printf("cLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n",
271 		   nzlmax, nzumax);
272 	    fflush(stdout);
273 #endif
274 	    lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
275 	    ucol  = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
276 	    lsub  = (int *)    cexpand( &nzlmax, LSUB, 0, 0, Glu );
277 	    usub  = (int *)    cexpand( &nzumax, USUB, 0, 1, Glu );
278 	}
279 
280     } else {
281 	/* fact == SamePattern_SameRowPerm */
282 	Lstore   = L->Store;
283 	Ustore   = U->Store;
284 	xsup     = Lstore->sup_to_col;
285 	supno    = Lstore->col_to_sup;
286 	xlsub    = Lstore->rowind_colptr;
287 	xlusup   = Lstore->nzval_colptr;
288 	xusub    = Ustore->colptr;
289 	nzlmax   = Glu->nzlmax;    /* max from previous factorization */
290 	nzumax   = Glu->nzumax;
291 	nzlumax  = Glu->nzlumax;
292 
293 	if ( lwork == -1 ) {
294 	    return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
295 		    + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
296         } else if ( lwork == 0 ) {
297 	    Glu->MemModel = SYSTEM;
298 	} else {
299 	    Glu->MemModel = USER;
300 	    Glu->stack.top2 = (lwork/4)*4; /* must be word-addressable */
301 	    Glu->stack.size = Glu->stack.top2;
302 	}
303 
304 	lsub  = Glu->expanders[LSUB].mem  = Lstore->rowind;
305 	lusup = Glu->expanders[LUSUP].mem = Lstore->nzval;
306 	usub  = Glu->expanders[USUB].mem  = Ustore->rowind;
307 	ucol  = Glu->expanders[UCOL].mem  = Ustore->nzval;;
308 	Glu->expanders[LSUB].size         = nzlmax;
309 	Glu->expanders[LUSUP].size        = nzlumax;
310 	Glu->expanders[USUB].size         = nzumax;
311 	Glu->expanders[UCOL].size         = nzumax;
312     }
313 
314     Glu->xsup    = xsup;
315     Glu->supno   = supno;
316     Glu->lsub    = lsub;
317     Glu->xlsub   = xlsub;
318     Glu->lusup   = (void *) lusup;
319     Glu->xlusup  = xlusup;
320     Glu->ucol    = (void *) ucol;
321     Glu->usub    = usub;
322     Glu->xusub   = xusub;
323     Glu->nzlmax  = nzlmax;
324     Glu->nzumax  = nzumax;
325     Glu->nzlumax = nzlumax;
326 
327     info = cLUWorkInit(m, n, panel_size, iwork, dwork, Glu);
328     if ( info )
329 	return ( info + cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
330 
331     ++Glu->num_expansions;
332     return 0;
333 
334 } /* cLUMemInit */
335 
336 /*! \brief Allocate known working storage. Returns 0 if success, otherwise
337    returns the number of bytes allocated so far when failure occurred. */
338 int
cLUWorkInit(int m,int n,int panel_size,int ** iworkptr,complex ** dworkptr,GlobalLU_t * Glu)339 cLUWorkInit(int m, int n, int panel_size, int **iworkptr,
340             complex **dworkptr, GlobalLU_t *Glu)
341 {
342     int    isize, dsize, extra;
343     complex *old_ptr;
344     int    maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
345            rowblk   = sp_ienv(4);
346 
347     isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
348     dsize = (m * panel_size +
349 	     NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(complex);
350 
351     if ( Glu->MemModel == SYSTEM )
352 	*iworkptr = (int *) intCalloc(isize/sizeof(int));
353     else
354 	*iworkptr = (int *) cuser_malloc(isize, TAIL, Glu);
355     if ( ! *iworkptr ) {
356 	fprintf(stderr, "cLUWorkInit: malloc fails for local iworkptr[]\n");
357 	return (isize + n);
358     }
359 
360     if ( Glu->MemModel == SYSTEM )
361 	*dworkptr = (complex *) SUPERLU_MALLOC(dsize);
362     else {
363 	*dworkptr = (complex *) cuser_malloc(dsize, TAIL, Glu);
364 	if ( NotDoubleAlign(*dworkptr) ) {
365 	    old_ptr = *dworkptr;
366 	    *dworkptr = (complex*) DoubleAlign(*dworkptr);
367 	    *dworkptr = (complex*) ((double*)*dworkptr - 1);
368 	    extra = (char*)old_ptr - (char*)*dworkptr;
369 #ifdef DEBUG
370 	    printf("cLUWorkInit: not aligned, extra %d\n", extra);
371 #endif
372 	    Glu->stack.top2 -= extra;
373 	    Glu->stack.used += extra;
374 	}
375     }
376     if ( ! *dworkptr ) {
377 	fprintf(stderr, "malloc fails for local dworkptr[].");
378 	return (isize + dsize + n);
379     }
380 
381     return 0;
382 }
383 
384 
385 /*! \brief Set up pointers for real working arrays.
386  */
387 void
cSetRWork(int m,int panel_size,complex * dworkptr,complex ** dense,complex ** tempv)388 cSetRWork(int m, int panel_size, complex *dworkptr,
389 	 complex **dense, complex **tempv)
390 {
391     complex zero = {0.0, 0.0};
392 
393     int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
394         rowblk   = sp_ienv(4);
395     *dense = dworkptr;
396     *tempv = *dense + panel_size*m;
397     cfill (*dense, m * panel_size, zero);
398     cfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);
399 }
400 
401 /*! \brief Free the working storage used by factor routines.
402  */
cLUWorkFree(int * iwork,complex * dwork,GlobalLU_t * Glu)403 void cLUWorkFree(int *iwork, complex *dwork, GlobalLU_t *Glu)
404 {
405     if ( Glu->MemModel == SYSTEM ) {
406 	SUPERLU_FREE (iwork);
407 	SUPERLU_FREE (dwork);
408     } else {
409 	Glu->stack.used -= (Glu->stack.size - Glu->stack.top2);
410 	Glu->stack.top2 = Glu->stack.size;
411 /*	cStackCompress(Glu);  */
412     }
413 
414     SUPERLU_FREE (Glu->expanders);
415     Glu->expanders = NULL;
416 }
417 
418 /*! \brief Expand the data structures for L and U during the factorization.
419  *
420  * <pre>
421  * Return value:   0 - successful return
422  *               > 0 - number of bytes allocated when run out of space
423  * </pre>
424  */
425 int
cLUMemXpand(int jcol,int next,MemType mem_type,int * maxlen,GlobalLU_t * Glu)426 cLUMemXpand(int jcol,
427 	   int next,          /* number of elements currently in the factors */
428 	   MemType mem_type,  /* which type of memory to expand  */
429 	   int *maxlen,       /* modified - maximum length of a data structure */
430 	   GlobalLU_t *Glu    /* modified - global LU data structures */
431 	   )
432 {
433     void   *new_mem;
434 
435 #ifdef DEBUG
436     printf("cLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
437 	   jcol, next, *maxlen, mem_type);
438 #endif
439 
440     if (mem_type == USUB)
441     	new_mem = cexpand(maxlen, mem_type, next, 1, Glu);
442     else
443 	new_mem = cexpand(maxlen, mem_type, next, 0, Glu);
444 
445     if ( !new_mem ) {
446 	int    nzlmax  = Glu->nzlmax;
447 	int    nzumax  = Glu->nzumax;
448 	int    nzlumax = Glu->nzlumax;
449     	fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
450     	return (cmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
451     }
452 
453     switch ( mem_type ) {
454       case LUSUP:
455 	Glu->lusup   = (void *) new_mem;
456 	Glu->nzlumax = *maxlen;
457 	break;
458       case UCOL:
459 	Glu->ucol   = (void *) new_mem;
460 	Glu->nzumax = *maxlen;
461 	break;
462       case LSUB:
463 	Glu->lsub   = (int *) new_mem;
464 	Glu->nzlmax = *maxlen;
465 	break;
466       case USUB:
467 	Glu->usub   = (int *) new_mem;
468 	Glu->nzumax = *maxlen;
469 	break;
470     }
471 
472     return 0;
473 
474 }
475 
476 
477 
478 void
copy_mem_complex(int howmany,void * old,void * new)479 copy_mem_complex(int howmany, void *old, void *new)
480 {
481     register int i;
482     complex *dold = old;
483     complex *dnew = new;
484     for (i = 0; i < howmany; i++) dnew[i] = dold[i];
485 }
486 
487 /*! \brief Expand the existing storage to accommodate more fill-ins.
488  */
489 void
cexpand(int * prev_len,MemType type,int len_to_copy,int keep_prev,GlobalLU_t * Glu)490 *cexpand (
491 	 int *prev_len,   /* length used from previous call */
492 	 MemType type,    /* which part of the memory to expand */
493 	 int len_to_copy, /* size of the memory to be copied to new store */
494 	 int keep_prev,   /* = 1: use prev_len;
495 			     = 0: compute new_len to expand */
496 	 GlobalLU_t *Glu  /* modified - global LU data structures */
497 	)
498 {
499     float    EXPAND = 1.5;
500     float    alpha;
501     void     *new_mem, *old_mem;
502     int      new_len, tries, lword, extra, bytes_to_copy;
503     ExpHeader *expanders = Glu->expanders; /* Array of 4 types of memory */
504 
505     alpha = EXPAND;
506 
507     if ( Glu->num_expansions == 0 || keep_prev ) {
508         /* First time allocate requested */
509         new_len = *prev_len;
510     } else {
511 	new_len = alpha * *prev_len;
512     }
513 
514     if ( type == LSUB || type == USUB ) lword = sizeof(int);
515     else lword = sizeof(complex);
516 
517     if ( Glu->MemModel == SYSTEM ) {
518 	new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
519 	if ( Glu->num_expansions != 0 ) {
520 	    tries = 0;
521 	    if ( keep_prev ) {
522 		if ( !new_mem ) return (NULL);
523 	    } else {
524 		while ( !new_mem ) {
525 		    if ( ++tries > 10 ) return (NULL);
526 		    alpha = Reduce(alpha);
527 		    new_len = alpha * *prev_len;
528 		    new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
529 		}
530 	    }
531 	    if ( type == LSUB || type == USUB ) {
532 		copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
533 	    } else {
534 		copy_mem_complex(len_to_copy, expanders[type].mem, new_mem);
535 	    }
536 	    SUPERLU_FREE (expanders[type].mem);
537 	}
538 	expanders[type].mem = (void *) new_mem;
539 
540     } else { /* MemModel == USER */
541 	if ( Glu->num_expansions == 0 ) {
542 	    new_mem = cuser_malloc(new_len * lword, HEAD, Glu);
543 	    if ( NotDoubleAlign(new_mem) &&
544 		(type == LUSUP || type == UCOL) ) {
545 		old_mem = new_mem;
546 		new_mem = (void *)DoubleAlign(new_mem);
547 		extra = (char*)new_mem - (char*)old_mem;
548 #ifdef DEBUG
549 		printf("expand(): not aligned, extra %d\n", extra);
550 #endif
551 		Glu->stack.top1 += extra;
552 		Glu->stack.used += extra;
553 	    }
554 	    expanders[type].mem = (void *) new_mem;
555 	} else {
556 	    tries = 0;
557 	    extra = (new_len - *prev_len) * lword;
558 	    if ( keep_prev ) {
559 		if ( StackFull(extra) ) return (NULL);
560 	    } else {
561 		while ( StackFull(extra) ) {
562 		    if ( ++tries > 10 ) return (NULL);
563 		    alpha = Reduce(alpha);
564 		    new_len = alpha * *prev_len;
565 		    extra = (new_len - *prev_len) * lword;
566 		}
567 	    }
568 
569 	    if ( type != USUB ) {
570 		new_mem = (void*)((char*)expanders[type + 1].mem + extra);
571 		bytes_to_copy = (char*)Glu->stack.array + Glu->stack.top1
572 		    - (char*)expanders[type + 1].mem;
573 		user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
574 
575 		if ( type < USUB ) {
576 		    Glu->usub = expanders[USUB].mem =
577 			(void*)((char*)expanders[USUB].mem + extra);
578 		}
579 		if ( type < LSUB ) {
580 		    Glu->lsub = expanders[LSUB].mem =
581 			(void*)((char*)expanders[LSUB].mem + extra);
582 		}
583 		if ( type < UCOL ) {
584 		    Glu->ucol = expanders[UCOL].mem =
585 			(void*)((char*)expanders[UCOL].mem + extra);
586 		}
587 		Glu->stack.top1 += extra;
588 		Glu->stack.used += extra;
589 		if ( type == UCOL ) {
590 		    Glu->stack.top1 += extra;   /* Add same amount for USUB */
591 		    Glu->stack.used += extra;
592 		}
593 
594 	    } /* if ... */
595 
596 	} /* else ... */
597     }
598 
599     expanders[type].size = new_len;
600     *prev_len = new_len;
601     if ( Glu->num_expansions ) ++Glu->num_expansions;
602 
603     return (void *) expanders[type].mem;
604 
605 } /* cexpand */
606 
607 
608 /*! \brief Compress the work[] array to remove fragmentation.
609  */
610 void
cStackCompress(GlobalLU_t * Glu)611 cStackCompress(GlobalLU_t *Glu)
612 {
613     register int iword, dword, ndim;
614     char    *last, *fragment;
615     int      *ifrom, *ito;
616     complex   *dfrom, *dto;
617     int      *xlsub, *lsub, *xusub, *usub, *xlusup;
618     complex   *ucol, *lusup;
619 
620     iword = sizeof(int);
621     dword = sizeof(complex);
622     ndim = Glu->n;
623 
624     xlsub  = Glu->xlsub;
625     lsub   = Glu->lsub;
626     xusub  = Glu->xusub;
627     usub   = Glu->usub;
628     xlusup = Glu->xlusup;
629     ucol   = Glu->ucol;
630     lusup  = Glu->lusup;
631 
632     dfrom = ucol;
633     dto = (complex *)((char*)lusup + xlusup[ndim] * dword);
634     copy_mem_complex(xusub[ndim], dfrom, dto);
635     ucol = dto;
636 
637     ifrom = lsub;
638     ito = (int *) ((char*)ucol + xusub[ndim] * iword);
639     copy_mem_int(xlsub[ndim], ifrom, ito);
640     lsub = ito;
641 
642     ifrom = usub;
643     ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
644     copy_mem_int(xusub[ndim], ifrom, ito);
645     usub = ito;
646 
647     last = (char*)usub + xusub[ndim] * iword;
648     fragment = (char*) (((char*)Glu->stack.array + Glu->stack.top1) - last);
649     Glu->stack.used -= (long int) fragment;
650     Glu->stack.top1 -= (long int) fragment;
651 
652     Glu->ucol = ucol;
653     Glu->lsub = lsub;
654     Glu->usub = usub;
655 
656 #ifdef DEBUG
657     printf("cStackCompress: fragment %d\n", fragment);
658     /* for (last = 0; last < ndim; ++last)
659 	print_lu_col("After compress:", last, 0);*/
660 #endif
661 
662 }
663 
664 /*! \brief Allocate storage for original matrix A
665  */
666 void
callocateA(int n,int nnz,complex ** a,int ** asub,int ** xa)667 callocateA(int n, int nnz, complex **a, int **asub, int **xa)
668 {
669     *a    = (complex *) complexMalloc(nnz);
670     *asub = (int *) intMalloc(nnz);
671     *xa   = (int *) intMalloc(n+1);
672 }
673 
674 
complexMalloc(int n)675 complex *complexMalloc(int n)
676 {
677     complex *buf;
678     buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
679     if ( !buf ) {
680 	ABORT("SUPERLU_MALLOC failed for buf in complexMalloc()\n");
681     }
682     return (buf);
683 }
684 
complexCalloc(int n)685 complex *complexCalloc(int n)
686 {
687     complex *buf;
688     register int i;
689     complex zero = {0.0, 0.0};
690     buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
691     if ( !buf ) {
692 	ABORT("SUPERLU_MALLOC failed for buf in complexCalloc()\n");
693     }
694     for (i = 0; i < n; ++i) buf[i] = zero;
695     return (buf);
696 }
697 
698 
cmemory_usage(const int nzlmax,const int nzumax,const int nzlumax,const int n)699 int cmemory_usage(const int nzlmax, const int nzumax,
700 		  const int nzlumax, const int n)
701 {
702     register int iword, dword;
703 
704     iword   = sizeof(int);
705     dword   = sizeof(complex);
706 
707     return (10 * n * iword +
708 	    nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
709 
710 }
711