1 
2 /*! @file cmemory.c
3  * \brief Memory details
4  *
5  * <pre>
6  * -- SuperLU routine (version 4.0) --
7  * Lawrence Berkeley National Laboratory.
8  * June 30, 2009
9  * </pre>
10  */
11 #include "slu_cdefs.h"
12 
13 
14 /* Internal prototypes */
15 void  *cexpand (int *, MemType,int, int, GlobalLU_t *);
16 int   cLUWorkInit (int, int, int, int **, complex **, GlobalLU_t *);
17 void  copy_mem_complex (int, void *, void *);
18 void  cStackCompress (GlobalLU_t *);
19 void  cSetupSpace (void *, int, GlobalLU_t *);
20 void  *cuser_malloc (int, int, GlobalLU_t *);
21 void  cuser_free (int, int, GlobalLU_t *);
22 
23 /* External prototypes (in memory.c - prec-independent) */
24 extern void    copy_mem_int    (int, void *, void *);
25 extern void    user_bcopy      (char *, char *, int);
26 
27 
28 /* Macros to manipulate stack */
29 #define StackFull(x)         ( x + Glu->stack.used >= Glu->stack.size )
30 #define NotDoubleAlign(addr) ( (long int)addr & 7 )
31 #define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
32 #define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
33 			      (w + 1) * m * sizeof(complex) )
34 #define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
35 
36 
37 
38 
39 /*! \brief Setup the memory model to be used for factorization.
40  *
41  *    lwork = 0: use system malloc;
42  *    lwork > 0: use user-supplied work[] space.
43  */
cSetupSpace(void * work,int lwork,GlobalLU_t * Glu)44 void cSetupSpace(void *work, int lwork, GlobalLU_t *Glu)
45 {
46     if ( lwork == 0 ) {
47 	Glu->MemModel = SYSTEM; /* malloc/free */
48     } else if ( lwork > 0 ) {
49 	Glu->MemModel = USER;   /* user provided space */
50 	Glu->stack.used = 0;
51 	Glu->stack.top1 = 0;
52 	Glu->stack.top2 = (lwork/4)*4; /* must be word addressable */
53 	Glu->stack.size = Glu->stack.top2;
54 	Glu->stack.array = (void *) work;
55     }
56 }
57 
58 
59 
cuser_malloc(int bytes,int which_end,GlobalLU_t * Glu)60 void *cuser_malloc(int bytes, int which_end, GlobalLU_t *Glu)
61 {
62     void *buf;
63 
64     if ( StackFull(bytes) ) return (NULL);
65 
66     if ( which_end == HEAD ) {
67 	buf = (char*) Glu->stack.array + Glu->stack.top1;
68 	Glu->stack.top1 += bytes;
69     } else {
70 	Glu->stack.top2 -= bytes;
71 	buf = (char*) Glu->stack.array + Glu->stack.top2;
72     }
73 
74     Glu->stack.used += bytes;
75     return buf;
76 }
77 
78 
cuser_free(int bytes,int which_end,GlobalLU_t * Glu)79 void cuser_free(int bytes, int which_end, GlobalLU_t *Glu)
80 {
81     if ( which_end == HEAD ) {
82 	Glu->stack.top1 -= bytes;
83     } else {
84 	Glu->stack.top2 += bytes;
85     }
86     Glu->stack.used -= bytes;
87 }
88 
89 
90 
91 /*! \brief
92  *
93  * <pre>
94  * mem_usage consists of the following fields:
95  *    - for_lu (float)
96  *      The amount of space used in bytes for the L\U data structures.
97  *    - total_needed (float)
98  *      The amount of space needed in bytes to perform factorization.
99  * </pre>
100  */
cQuerySpace(SuperMatrix * L,SuperMatrix * U,mem_usage_t * mem_usage)101 int cQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
102 {
103     SCformat *Lstore;
104     NCformat *Ustore;
105     register int n, iword, dword, panel_size = sp_ienv(1);
106 
107     Lstore = L->Store;
108     Ustore = U->Store;
109     n = L->ncol;
110     iword = sizeof(int);
111     dword = sizeof(complex);
112 
113     /* For LU factors */
114     mem_usage->for_lu = (float)( (4.0*n + 3.0) * iword +
115                                  Lstore->nzval_colptr[n] * dword +
116                                  Lstore->rowind_colptr[n] * iword );
117     mem_usage->for_lu += (float)( (n + 1.0) * iword +
118 				 Ustore->colptr[n] * (dword + iword) );
119 
120     /* Working storage to support factorization */
121     mem_usage->total_needed = mem_usage->for_lu +
122 	(float)( (2.0 * panel_size + 4.0 + NO_MARKER) * n * iword +
123 		(panel_size + 1.0) * n * dword );
124 
125     return 0;
126 } /* cQuerySpace */
127 
128 
129 /*! \brief
130  *
131  * <pre>
132  * mem_usage consists of the following fields:
133  *    - for_lu (float)
134  *      The amount of space used in bytes for the L\U data structures.
135  *    - total_needed (float)
136  *      The amount of space needed in bytes to perform factorization.
137  * </pre>
138  */
ilu_cQuerySpace(SuperMatrix * L,SuperMatrix * U,mem_usage_t * mem_usage)139 int ilu_cQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
140 {
141     SCformat *Lstore;
142     NCformat *Ustore;
143     register int n, panel_size = sp_ienv(1);
144     register float iword, dword;
145 
146     Lstore = L->Store;
147     Ustore = U->Store;
148     n = L->ncol;
149     iword = sizeof(int);
150     dword = sizeof(double);
151 
152     /* For LU factors */
153     mem_usage->for_lu = (float)( (4.0f * n + 3.0f) * iword +
154 				 Lstore->nzval_colptr[n] * dword +
155 				 Lstore->rowind_colptr[n] * iword );
156     mem_usage->for_lu += (float)( (n + 1.0f) * iword +
157 				 Ustore->colptr[n] * (dword + iword) );
158 
159     /* Working storage to support factorization.
160        ILU needs 5*n more integers than LU */
161     mem_usage->total_needed = mem_usage->for_lu +
162 	(float)( (2.0f * panel_size + 9.0f + NO_MARKER) * n * iword +
163 		(panel_size + 1.0f) * n * dword );
164 
165     return 0;
166 } /* ilu_cQuerySpace */
167 
168 
169 /*! \brief Allocate storage for the data structures common to all factor routines.
170  *
171  * <pre>
172  * For those unpredictable size, estimate as fill_ratio * nnz(A).
173  * Return value:
174  *     If lwork = -1, return the estimated amount of space required, plus n;
175  *     otherwise, return the amount of space actually allocated when
176  *     memory allocation failure occurred.
177  * </pre>
178  */
179 int
cLUMemInit(fact_t fact,void * work,int lwork,int m,int n,int annz,int panel_size,float fill_ratio,SuperMatrix * L,SuperMatrix * U,GlobalLU_t * Glu,int ** iwork,complex ** dwork)180 cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
181 	  int panel_size, float fill_ratio, SuperMatrix *L, SuperMatrix *U,
182           GlobalLU_t *Glu, int **iwork, complex **dwork)
183 {
184     int      info, iword, dword;
185     SCformat *Lstore;
186     NCformat *Ustore;
187     int      *xsup, *supno;
188     int      *lsub, *xlsub;
189     complex   *lusup;
190     int      *xlusup;
191     complex   *ucol;
192     int      *usub, *xusub;
193     int      nzlmax, nzumax, nzlumax;
194 
195     iword     = sizeof(int);
196     dword     = sizeof(complex);
197     Glu->n    = n;
198     Glu->num_expansions = 0;
199 
200     if ( !Glu->expanders )
201         Glu->expanders = (ExpHeader*)SUPERLU_MALLOC( NO_MEMTYPE *
202                                                      sizeof(ExpHeader) );
203     if ( !Glu->expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
204 
205     if ( fact != SamePattern_SameRowPerm ) {
206 	/* Guess for L\U factors */
207 	nzumax = nzlumax = fill_ratio * annz;
208 	nzlmax = SUPERLU_MAX(1, fill_ratio/4.) * annz;
209 
210 	if ( lwork == -1 ) {
211 	    return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
212 		    + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
213         } else {
214 	    cSetupSpace(work, lwork, Glu);
215 	}
216 
217 #if ( PRNTlevel >= 1 )
218 	printf("cLUMemInit() called: fill_ratio %.0f, nzlmax %ld, nzumax %ld\n",
219 	       fill_ratio, nzlmax, nzumax);
220 	fflush(stdout);
221 #endif
222 
223 	/* Integer pointers for L\U factors */
224 	if ( Glu->MemModel == SYSTEM ) {
225 	    xsup   = intMalloc(n+1);
226 	    supno  = intMalloc(n+1);
227 	    xlsub  = intMalloc(n+1);
228 	    xlusup = intMalloc(n+1);
229 	    xusub  = intMalloc(n+1);
230 	} else {
231 	    xsup   = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
232 	    supno  = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
233 	    xlsub  = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
234 	    xlusup = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
235 	    xusub  = (int *)cuser_malloc((n+1) * iword, HEAD, Glu);
236 	}
237 
238 	lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
239 	ucol  = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
240 	lsub  = (int *)    cexpand( &nzlmax, LSUB, 0, 0, Glu );
241 	usub  = (int *)    cexpand( &nzumax, USUB, 0, 1, Glu );
242 
243 	while ( !lusup || !ucol || !lsub || !usub ) {
244 	    if ( Glu->MemModel == SYSTEM ) {
245 		SUPERLU_FREE(lusup);
246 		SUPERLU_FREE(ucol);
247 		SUPERLU_FREE(lsub);
248 		SUPERLU_FREE(usub);
249 	    } else {
250 		cuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword,
251                             HEAD, Glu);
252 	    }
253 	    nzlumax /= 2;
254 	    nzumax /= 2;
255 	    nzlmax /= 2;
256 	    if ( nzlumax < annz ) {
257 		printf("Not enough memory to perform factorization.\n");
258 		return (cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
259 	    }
260 #if ( PRNTlevel >= 1)
261 	    printf("cLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n",
262 		   nzlmax, nzumax);
263 	    fflush(stdout);
264 #endif
265 	    lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
266 	    ucol  = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
267 	    lsub  = (int *)    cexpand( &nzlmax, LSUB, 0, 0, Glu );
268 	    usub  = (int *)    cexpand( &nzumax, USUB, 0, 1, Glu );
269 	}
270 
271     } else {
272 	/* fact == SamePattern_SameRowPerm */
273 	Lstore   = L->Store;
274 	Ustore   = U->Store;
275 	xsup     = Lstore->sup_to_col;
276 	supno    = Lstore->col_to_sup;
277 	xlsub    = Lstore->rowind_colptr;
278 	xlusup   = Lstore->nzval_colptr;
279 	xusub    = Ustore->colptr;
280 	nzlmax   = Glu->nzlmax;    /* max from previous factorization */
281 	nzumax   = Glu->nzumax;
282 	nzlumax  = Glu->nzlumax;
283 
284 	if ( lwork == -1 ) {
285 	    return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
286 		    + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
287         } else if ( lwork == 0 ) {
288 	    Glu->MemModel = SYSTEM;
289 	} else {
290 	    Glu->MemModel = USER;
291 	    Glu->stack.top2 = (lwork/4)*4; /* must be word-addressable */
292 	    Glu->stack.size = Glu->stack.top2;
293 	}
294 
295 	lsub  = Glu->expanders[LSUB].mem  = Lstore->rowind;
296 	lusup = Glu->expanders[LUSUP].mem = Lstore->nzval;
297 	usub  = Glu->expanders[USUB].mem  = Ustore->rowind;
298 	ucol  = Glu->expanders[UCOL].mem  = Ustore->nzval;;
299 	Glu->expanders[LSUB].size         = nzlmax;
300 	Glu->expanders[LUSUP].size        = nzlumax;
301 	Glu->expanders[USUB].size         = nzumax;
302 	Glu->expanders[UCOL].size         = nzumax;
303     }
304 
305     Glu->xsup    = xsup;
306     Glu->supno   = supno;
307     Glu->lsub    = lsub;
308     Glu->xlsub   = xlsub;
309     Glu->lusup   = lusup;
310     Glu->xlusup  = xlusup;
311     Glu->ucol    = ucol;
312     Glu->usub    = usub;
313     Glu->xusub   = xusub;
314     Glu->nzlmax  = nzlmax;
315     Glu->nzumax  = nzumax;
316     Glu->nzlumax = nzlumax;
317 
318     info = cLUWorkInit(m, n, panel_size, iwork, dwork, Glu);
319     if ( info )
320 	return ( info + cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
321 
322     ++Glu->num_expansions;
323     return 0;
324 
325 } /* cLUMemInit */
326 
327 /*! \brief Allocate known working storage. Returns 0 if success, otherwise
328    returns the number of bytes allocated so far when failure occurred. */
329 int
cLUWorkInit(int m,int n,int panel_size,int ** iworkptr,complex ** dworkptr,GlobalLU_t * Glu)330 cLUWorkInit(int m, int n, int panel_size, int **iworkptr,
331             complex **dworkptr, GlobalLU_t *Glu)
332 {
333     int    isize, dsize, extra;
334     complex *old_ptr;
335     int    maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
336            rowblk   = sp_ienv(4);
337 
338     isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
339     dsize = (m * panel_size +
340 	     NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(complex);
341 
342     if ( Glu->MemModel == SYSTEM )
343 	*iworkptr = (int *) intCalloc(isize/sizeof(int));
344     else
345 	*iworkptr = (int *) cuser_malloc(isize, TAIL, Glu);
346     if ( ! *iworkptr ) {
347 	fprintf(stderr, "cLUWorkInit: malloc fails for local iworkptr[]\n");
348 	return (isize + n);
349     }
350 
351     if ( Glu->MemModel == SYSTEM )
352 	*dworkptr = (complex *) SUPERLU_MALLOC(dsize);
353     else {
354 	*dworkptr = (complex *) cuser_malloc(dsize, TAIL, Glu);
355 	if ( NotDoubleAlign(*dworkptr) ) {
356 	    old_ptr = *dworkptr;
357 	    *dworkptr = (complex*) DoubleAlign(*dworkptr);
358 	    *dworkptr = (complex*) ((double*)*dworkptr - 1);
359 	    extra = (char*)old_ptr - (char*)*dworkptr;
360 #ifdef DEBUG
361 	    printf("cLUWorkInit: not aligned, extra %d\n", extra);
362 #endif
363 	    Glu->stack.top2 -= extra;
364 	    Glu->stack.used += extra;
365 	}
366     }
367     if ( ! *dworkptr ) {
368 	fprintf(stderr, "malloc fails for local dworkptr[].");
369 	return (isize + dsize + n);
370     }
371 
372     return 0;
373 }
374 
375 
376 /*! \brief Set up pointers for real working arrays.
377  */
378 void
cSetRWork(int m,int panel_size,complex * dworkptr,complex ** dense,complex ** tempv)379 cSetRWork(int m, int panel_size, complex *dworkptr,
380 	 complex **dense, complex **tempv)
381 {
382     complex zero = {0.0, 0.0};
383 
384     int maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) ),
385         rowblk   = sp_ienv(4);
386     *dense = dworkptr;
387     *tempv = *dense + panel_size*m;
388     cfill (*dense, m * panel_size, zero);
389     cfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);
390 }
391 
392 /*! \brief Free the working storage used by factor routines.
393  */
cLUWorkFree(int * iwork,complex * dwork,GlobalLU_t * Glu)394 void cLUWorkFree(int *iwork, complex *dwork, GlobalLU_t *Glu)
395 {
396     if ( Glu->MemModel == SYSTEM ) {
397 	SUPERLU_FREE (iwork);
398 	SUPERLU_FREE (dwork);
399     } else {
400 	Glu->stack.used -= (Glu->stack.size - Glu->stack.top2);
401 	Glu->stack.top2 = Glu->stack.size;
402 /*	cStackCompress(Glu);  */
403     }
404 
405     SUPERLU_FREE (Glu->expanders);
406     Glu->expanders = NULL;
407 }
408 
409 /*! \brief Expand the data structures for L and U during the factorization.
410  *
411  * <pre>
412  * Return value:   0 - successful return
413  *               > 0 - number of bytes allocated when run out of space
414  * </pre>
415  */
416 int
cLUMemXpand(int jcol,int next,MemType mem_type,int * maxlen,GlobalLU_t * Glu)417 cLUMemXpand(int jcol,
418 	   int next,          /* number of elements currently in the factors */
419 	   MemType mem_type,  /* which type of memory to expand  */
420 	   int *maxlen,       /* modified - maximum length of a data structure */
421 	   GlobalLU_t *Glu    /* modified - global LU data structures */
422 	   )
423 {
424     void   *new_mem;
425 
426 #ifdef DEBUG
427     printf("cLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
428 	   jcol, next, *maxlen, mem_type);
429 #endif
430 
431     if (mem_type == USUB)
432     	new_mem = cexpand(maxlen, mem_type, next, 1, Glu);
433     else
434 	new_mem = cexpand(maxlen, mem_type, next, 0, Glu);
435 
436     if ( !new_mem ) {
437 	int    nzlmax  = Glu->nzlmax;
438 	int    nzumax  = Glu->nzumax;
439 	int    nzlumax = Glu->nzlumax;
440     	fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
441     	return (cmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
442     }
443 
444     switch ( mem_type ) {
445       case LUSUP:
446 	Glu->lusup   = (complex *) new_mem;
447 	Glu->nzlumax = *maxlen;
448 	break;
449       case UCOL:
450 	Glu->ucol   = (complex *) new_mem;
451 	Glu->nzumax = *maxlen;
452 	break;
453       case LSUB:
454 	Glu->lsub   = (int *) new_mem;
455 	Glu->nzlmax = *maxlen;
456 	break;
457       case USUB:
458 	Glu->usub   = (int *) new_mem;
459 	Glu->nzumax = *maxlen;
460 	break;
461     }
462 
463     return 0;
464 
465 }
466 
467 
468 
469 void
copy_mem_complex(int howmany,void * old,void * new)470 copy_mem_complex(int howmany, void *old, void *new)
471 {
472     register int i;
473     complex *dold = old;
474     complex *dnew = new;
475     for (i = 0; i < howmany; i++) dnew[i] = dold[i];
476 }
477 
478 /*! \brief Expand the existing storage to accommodate more fill-ins.
479  */
480 void
cexpand(int * prev_len,MemType type,int len_to_copy,int keep_prev,GlobalLU_t * Glu)481 *cexpand (
482 	 int *prev_len,   /* length used from previous call */
483 	 MemType type,    /* which part of the memory to expand */
484 	 int len_to_copy, /* size of the memory to be copied to new store */
485 	 int keep_prev,   /* = 1: use prev_len;
486 			     = 0: compute new_len to expand */
487 	 GlobalLU_t *Glu  /* modified - global LU data structures */
488 	)
489 {
490     float    EXPAND = 1.5;
491     float    alpha;
492     void     *new_mem, *old_mem;
493     int      new_len, tries, lword, extra, bytes_to_copy;
494     ExpHeader *expanders = Glu->expanders; /* Array of 4 types of memory */
495 
496     alpha = EXPAND;
497 
498     if ( Glu->num_expansions == 0 || keep_prev ) {
499         /* First time allocate requested */
500         new_len = *prev_len;
501     } else {
502 	new_len = alpha * *prev_len;
503     }
504 
505     if ( type == LSUB || type == USUB ) lword = sizeof(int);
506     else lword = sizeof(complex);
507 
508     if ( Glu->MemModel == SYSTEM ) {
509 	new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
510 	if ( Glu->num_expansions != 0 ) {
511 	    tries = 0;
512 	    if ( keep_prev ) {
513 		if ( !new_mem ) return (NULL);
514 	    } else {
515 		while ( !new_mem ) {
516 		    if ( ++tries > 10 ) return (NULL);
517 		    alpha = Reduce(alpha);
518 		    new_len = alpha * *prev_len;
519 		    new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
520 		}
521 	    }
522 	    if ( type == LSUB || type == USUB ) {
523 		copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
524 	    } else {
525 		copy_mem_complex(len_to_copy, expanders[type].mem, new_mem);
526 	    }
527 	    SUPERLU_FREE (expanders[type].mem);
528 	}
529 	expanders[type].mem = (void *) new_mem;
530 
531     } else { /* MemModel == USER */
532 	if ( Glu->num_expansions == 0 ) {
533 	    new_mem = cuser_malloc(new_len * lword, HEAD, Glu);
534 	    if ( NotDoubleAlign(new_mem) &&
535 		(type == LUSUP || type == UCOL) ) {
536 		old_mem = new_mem;
537 		new_mem = (void *)DoubleAlign(new_mem);
538 		extra = (char*)new_mem - (char*)old_mem;
539 #ifdef DEBUG
540 		printf("expand(): not aligned, extra %d\n", extra);
541 #endif
542 		Glu->stack.top1 += extra;
543 		Glu->stack.used += extra;
544 	    }
545 	    expanders[type].mem = (void *) new_mem;
546 	} else {
547 	    tries = 0;
548 	    extra = (new_len - *prev_len) * lword;
549 	    if ( keep_prev ) {
550 		if ( StackFull(extra) ) return (NULL);
551 	    } else {
552 		while ( StackFull(extra) ) {
553 		    if ( ++tries > 10 ) return (NULL);
554 		    alpha = Reduce(alpha);
555 		    new_len = alpha * *prev_len;
556 		    extra = (new_len - *prev_len) * lword;
557 		}
558 	    }
559 
560 	    if ( type != USUB ) {
561 		new_mem = (void*)((char*)expanders[type + 1].mem + extra);
562 		bytes_to_copy = (char*)Glu->stack.array + Glu->stack.top1
563 		    - (char*)expanders[type + 1].mem;
564 		user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
565 
566 		if ( type < USUB ) {
567 		    Glu->usub = expanders[USUB].mem =
568 			(void*)((char*)expanders[USUB].mem + extra);
569 		}
570 		if ( type < LSUB ) {
571 		    Glu->lsub = expanders[LSUB].mem =
572 			(void*)((char*)expanders[LSUB].mem + extra);
573 		}
574 		if ( type < UCOL ) {
575 		    Glu->ucol = expanders[UCOL].mem =
576 			(void*)((char*)expanders[UCOL].mem + extra);
577 		}
578 		Glu->stack.top1 += extra;
579 		Glu->stack.used += extra;
580 		if ( type == UCOL ) {
581 		    Glu->stack.top1 += extra;   /* Add same amount for USUB */
582 		    Glu->stack.used += extra;
583 		}
584 
585 	    } /* if ... */
586 
587 	} /* else ... */
588     }
589 
590     expanders[type].size = new_len;
591     *prev_len = new_len;
592     if ( Glu->num_expansions ) ++Glu->num_expansions;
593 
594     return (void *) expanders[type].mem;
595 
596 } /* cexpand */
597 
598 
599 /*! \brief Compress the work[] array to remove fragmentation.
600  */
601 void
cStackCompress(GlobalLU_t * Glu)602 cStackCompress(GlobalLU_t *Glu)
603 {
604     register int iword, dword, ndim;
605     char    *last, *fragment;
606     int      *ifrom, *ito;
607     complex   *dfrom, *dto;
608     int      *xlsub, *lsub, *xusub, *usub, *xlusup;
609     complex   *ucol, *lusup;
610 
611     iword = sizeof(int);
612     dword = sizeof(complex);
613     ndim = Glu->n;
614 
615     xlsub  = Glu->xlsub;
616     lsub   = Glu->lsub;
617     xusub  = Glu->xusub;
618     usub   = Glu->usub;
619     xlusup = Glu->xlusup;
620     ucol   = Glu->ucol;
621     lusup  = Glu->lusup;
622 
623     dfrom = ucol;
624     dto = (complex *)((char*)lusup + xlusup[ndim] * dword);
625     copy_mem_complex(xusub[ndim], dfrom, dto);
626     ucol = dto;
627 
628     ifrom = lsub;
629     ito = (int *) ((char*)ucol + xusub[ndim] * iword);
630     copy_mem_int(xlsub[ndim], ifrom, ito);
631     lsub = ito;
632 
633     ifrom = usub;
634     ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
635     copy_mem_int(xusub[ndim], ifrom, ito);
636     usub = ito;
637 
638     last = (char*)usub + xusub[ndim] * iword;
639     fragment = (char*) (((char*)Glu->stack.array + Glu->stack.top1) - last);
640     Glu->stack.used -= (long int) fragment;
641     Glu->stack.top1 -= (long int) fragment;
642 
643     Glu->ucol = ucol;
644     Glu->lsub = lsub;
645     Glu->usub = usub;
646 
647 #ifdef DEBUG
648     printf("cStackCompress: fragment %d\n", fragment);
649     /* for (last = 0; last < ndim; ++last)
650 	print_lu_col("After compress:", last, 0);*/
651 #endif
652 
653 }
654 
655 /*! \brief Allocate storage for original matrix A
656  */
657 void
callocateA(int n,int nnz,complex ** a,int ** asub,int ** xa)658 callocateA(int n, int nnz, complex **a, int **asub, int **xa)
659 {
660     *a    = (complex *) complexMalloc(nnz);
661     *asub = (int *) intMalloc(nnz);
662     *xa   = (int *) intMalloc(n+1);
663 }
664 
665 
complexMalloc(int n)666 complex *complexMalloc(int n)
667 {
668     complex *buf;
669     buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
670     if ( !buf ) {
671 	ABORT("SUPERLU_MALLOC failed for buf in complexMalloc()\n");
672     }
673     return (buf);
674 }
675 
complexCalloc(int n)676 complex *complexCalloc(int n)
677 {
678     complex *buf;
679     register int i;
680     complex zero = {0.0, 0.0};
681     buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
682     if ( !buf ) {
683 	ABORT("SUPERLU_MALLOC failed for buf in complexCalloc()\n");
684     }
685     for (i = 0; i < n; ++i) buf[i] = zero;
686     return (buf);
687 }
688 
689 
cmemory_usage(const int nzlmax,const int nzumax,const int nzlumax,const int n)690 int cmemory_usage(const int nzlmax, const int nzumax,
691 		  const int nzlumax, const int n)
692 {
693     register int iword, dword;
694 
695     iword   = sizeof(int);
696     dword   = sizeof(complex);
697 
698     return (10 * n * iword +
699 	    nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
700 
701 }
702