1 /* ========================================================================== */
2 /* === UMF_start_front ====================================================== */
3 /* ========================================================================== */
4
5 /* -------------------------------------------------------------------------- */
6 /* Copyright (c) 2005-2012 by Timothy A. Davis, http://www.suitesparse.com. */
7 /* All Rights Reserved. See ../Doc/License.txt for License. */
8 /* -------------------------------------------------------------------------- */
9
10 /* Allocate the initial frontal matrix working array for a single chain. The
11 * front does not have to be big enough, since if it's too small it will get
12 * reallocated. The size computed here is just an estimate. */
13
14 #include "umf_internal.h"
15 #include "umf_start_front.h"
16 #include "umf_grow_front.h"
17
UMF_start_front(Int chain,NumericType * Numeric,WorkType * Work,SymbolicType * Symbolic)18 GLOBAL Int UMF_start_front /* returns TRUE if successful, FALSE otherwise */
19 (
20 Int chain,
21 NumericType *Numeric,
22 WorkType *Work,
23 SymbolicType *Symbolic
24 )
25 {
26 double maxbytes ;
27 Int fnrows_max, fncols_max, fnr2, fnc2, fsize, fcurr_size, maxfrsize,
28 overflow, nb, f, cdeg ;
29
30 nb = Symbolic->nb ;
31 fnrows_max = Symbolic->Chain_maxrows [chain] ;
32 fncols_max = Symbolic->Chain_maxcols [chain] ;
33
34 DEBUGm2 (("Start Front for chain "ID". fnrows_max "ID" fncols_max "ID"\n",
35 chain, fnrows_max, fncols_max)) ;
36
37 Work->fnrows_max = fnrows_max ;
38 Work->fncols_max = fncols_max ;
39 Work->any_skip = FALSE ;
40
41 maxbytes = sizeof (Entry) *
42 (double) (fnrows_max + nb) * (double) (fncols_max + nb) ;
43 fcurr_size = Work->fcurr_size ;
44
45 if (Symbolic->prefer_diagonal)
46 {
47 /* Get a rough upper bound on the degree of the first pivot column in
48 * this front. Note that Col_degree is not maintained if diagonal
49 * pivoting is preferred. For most matrices, the first pivot column
50 * of the first frontal matrix of a new chain has only one tuple in
51 * it anyway, so this bound is exact in that case. */
52 Int col, tpi, e, *E, *Col_tuples, *Col_tlen, *Cols ;
53 Tuple *tp, *tpend ;
54 Unit *Memory, *p ;
55 Element *ep ;
56 E = Work->E ;
57 Memory = Numeric->Memory ;
58 Col_tuples = Numeric->Lip ;
59 Col_tlen = Numeric->Lilen ;
60 col = Work->nextcand ;
61 tpi = Col_tuples [col] ;
62 tp = (Tuple *) Memory + tpi ;
63 tpend = tp + Col_tlen [col] ;
64 cdeg = 0 ;
65 DEBUGm3 (("\n=============== start front: col "ID" tlen "ID"\n",
66 col, Col_tlen [col])) ;
67 for ( ; tp < tpend ; tp++)
68 {
69 DEBUG1 (("Tuple ("ID","ID")\n", tp->e, tp->f)) ;
70 e = tp->e ;
71 if (!E [e]) continue ;
72 f = tp->f ;
73 p = Memory + E [e] ;
74 ep = (Element *) p ;
75 p += UNITS (Element, 1) ;
76 Cols = (Int *) p ;
77 if (Cols [f] == EMPTY) continue ;
78 DEBUG1 ((" nrowsleft "ID"\n", ep->nrowsleft)) ;
79 cdeg += ep->nrowsleft ;
80 }
81 #ifndef NDEBUG
82 DEBUGm3 (("start front cdeg: "ID" col "ID"\n", cdeg, col)) ;
83 UMF_dump_rowcol (1, Numeric, Work, col, FALSE) ;
84 #endif
85
86 /* cdeg is now the rough upper bound on the degree of the next pivot
87 * column. */
88
89 /* If AMD was called, we know the maximum number of nonzeros in any
90 * column of L. Use this as an upper bound for cdeg, but add 2 to
91 * account for a small amount of off-diagonal pivoting. */
92 if (Symbolic->amd_dmax > 0)
93 {
94 cdeg = MIN (cdeg, Symbolic->amd_dmax) ;
95 }
96
97 /* Increase it to account for larger columns later on.
98 * Also ensure that it's larger than zero. */
99 cdeg += 2 ;
100
101 /* cdeg cannot be larger than fnrows_max */
102 cdeg = MIN (cdeg, fnrows_max) ;
103
104 }
105 else
106 {
107 /* don't do the above cdeg computation */
108 cdeg = 0 ;
109 }
110
111 DEBUGm2 (("fnrows max "ID" fncols_max "ID"\n", fnrows_max, fncols_max)) ;
112
113 /* the current frontal matrix is empty */
114 ASSERT (Work->fnrows == 0 && Work->fncols == 0 && Work->fnpiv == 0) ;
115
116 /* maximum row dimension is always odd, to avoid bad cache effects */
117 ASSERT (fnrows_max >= 0) ;
118 ASSERT (fnrows_max % 2 == 1) ;
119
120 /* ----------------------------------------------------------------------
121 * allocate working array for current frontal matrix:
122 * minimum size: 1-by-1
123 * maximum size: fnrows_max-by-fncols_max
124 * desired size:
125 *
126 * if Numeric->front_alloc_init >= 0:
127 *
128 * for unsymmetric matrices:
129 * Numeric->front_alloc_init * (fnrows_max-by-fncols_max)
130 *
131 * for symmetric matrices (diagonal pivoting preference, actually):
132 * Numeric->front_alloc_init * (fnrows_max-by-fncols_max), or
133 * cdeg*cdeg, whichever is smaller.
134 *
135 * if Numeric->front_alloc_init < 0:
136 * allocate a front of size -Numeric->front_alloc_init.
137 *
138 * Allocate the whole thing if it's small (less than 2*nb^2). Make sure the
139 * leading dimension of the frontal matrix is odd.
140 *
141 * Also allocate the nb-by-nb LU block, the dr-by-nb L block, and the
142 * nb-by-dc U block.
143 * ---------------------------------------------------------------------- */
144
145 /* get the maximum front size, avoiding integer overflow */
146 overflow = INT_OVERFLOW (maxbytes) ;
147 if (overflow)
148 {
149 /* :: int overflow, max front size :: */
150 maxfrsize = Int_MAX / sizeof (Entry) ;
151 }
152 else
153 {
154 maxfrsize = (fnrows_max + nb) * (fncols_max + nb) ;
155 }
156 ASSERT (!INT_OVERFLOW ((double) maxfrsize * sizeof (Entry))) ;
157
158 if (Numeric->front_alloc_init < 0)
159 {
160 /* allocate a front of -Numeric->front_alloc_init entries */
161 fsize = -Numeric->front_alloc_init ;
162 fsize = MAX (1, fsize) ;
163 }
164 else
165 {
166 if (INT_OVERFLOW (Numeric->front_alloc_init * maxbytes))
167 {
168 /* :: int overflow, requested front size :: */
169 fsize = Int_MAX / sizeof (Entry) ;
170 }
171 else
172 {
173 fsize = Numeric->front_alloc_init * maxfrsize ;
174 }
175
176 if (cdeg > 0)
177 {
178 /* diagonal pivoting is in use. cdeg was computed above */
179 Int fsize2 ;
180
181 /* add the L and U blocks */
182 cdeg += nb ;
183
184 if (INT_OVERFLOW (((double) cdeg * (double) cdeg) * sizeof (Entry)))
185 {
186 /* :: int overflow, symmetric front size :: */
187 fsize2 = Int_MAX / sizeof (Entry) ;
188 }
189 else
190 {
191 fsize2 = MAX (cdeg * cdeg, fcurr_size) ;
192 }
193 fsize = MIN (fsize, fsize2) ;
194 }
195 }
196
197 fsize = MAX (fsize, 2*nb*nb) ;
198
199 /* fsize and maxfrsize are now safe from integer overflow. They both
200 * include the size of the pivot blocks. */
201 ASSERT (!INT_OVERFLOW ((double) fsize * sizeof (Entry))) ;
202
203 Work->fnrows_new = 0 ;
204 Work->fncols_new = 0 ;
205
206 /* desired size is fnr2-by-fnc2 (includes L and U blocks): */
207 DEBUGm2 ((" fsize "ID" fcurr_size "ID"\n", fsize, fcurr_size)) ;
208 DEBUGm2 ((" maxfrsize "ID" fnr_curr "ID" fnc_curr "ID"\n", maxfrsize,
209 Work->fnr_curr, Work->fnc_curr)) ;
210
211 if (fsize >= maxfrsize && !overflow)
212 {
213 /* max working array is small, allocate all of it */
214 fnr2 = fnrows_max + nb ;
215 fnc2 = fncols_max + nb ;
216 fsize = maxfrsize ;
217 DEBUGm1 ((" sufficient for ("ID"+"ID")-by-("ID"+"ID")\n",
218 fnrows_max, nb, fncols_max, nb)) ;
219 }
220 else
221 {
222 /* allocate a smaller working array */
223 if (fnrows_max <= fncols_max)
224 {
225 fnr2 = (Int) sqrt ((double) fsize) ;
226 /* make sure fnr2 is odd */
227 fnr2 = MAX (fnr2, 1) ;
228 if (fnr2 % 2 == 0) fnr2++ ;
229 fnr2 = MIN (fnr2, fnrows_max + nb) ;
230 fnc2 = fsize / fnr2 ;
231 }
232 else
233 {
234 fnc2 = (Int) sqrt ((double) fsize) ;
235 fnc2 = MIN (fnc2, fncols_max + nb) ;
236 fnr2 = fsize / fnc2 ;
237 /* make sure fnr2 is odd */
238 fnr2 = MAX (fnr2, 1) ;
239 if (fnr2 % 2 == 0)
240 {
241 fnr2++ ;
242 fnc2 = fsize / fnr2 ;
243 }
244 }
245 DEBUGm1 ((" smaller "ID"-by-"ID"\n", fnr2, fnc2)) ;
246 }
247 fnr2 = MIN (fnr2, fnrows_max + nb) ;
248 fnc2 = MIN (fnc2, fncols_max + nb) ;
249 ASSERT (fnr2 % 2 == 1) ;
250 ASSERT (fnr2 * fnc2 <= fsize) ;
251
252 fnr2 -= nb ;
253 fnc2 -= nb ;
254 ASSERT (fnr2 >= 0) ;
255 ASSERT (fnc2 >= 0) ;
256
257 if (fsize > fcurr_size)
258 {
259 DEBUGm1 ((" Grow front \n")) ;
260 Work->do_grow = TRUE ;
261 if (!UMF_grow_front (Numeric, fnr2, fnc2, Work, -1))
262 {
263 /* since the minimum front size is 1-by-1, it would be nearly
264 * impossible to run out of memory here. */
265 DEBUGm4 (("out of memory: start front\n")) ;
266 return (FALSE) ;
267 }
268 }
269 else
270 {
271 /* use the existing front */
272 DEBUGm1 ((" existing front ok\n")) ;
273 Work->fnr_curr = fnr2 ;
274 Work->fnc_curr = fnc2 ;
275 Work->Flblock = Work->Flublock + nb * nb ;
276 Work->Fublock = Work->Flblock + nb * fnr2 ;
277 Work->Fcblock = Work->Fublock + nb * fnc2 ;
278 }
279 ASSERT (Work->Flblock == Work->Flublock + Work->nb*Work->nb) ;
280 ASSERT (Work->Fublock == Work->Flblock + Work->fnr_curr*Work->nb) ;
281 ASSERT (Work->Fcblock == Work->Fublock + Work->nb*Work->fnc_curr) ;
282 return (TRUE) ;
283 }
284