1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 
3 /*
4  *  (C) 2001 by Argonne National Laboratory.
5  *      See COPYRIGHT in top-level directory.
6  */
7 
8 #include <string.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 
12 #include "./dataloop.h"
13 
14 #undef DEBUG_DLOOP_SIZE
15 #undef DLOOP_DEBUG_MEMORY
16 
17 /* Dataloops
18  *
19  * The functions here are used for the creation, copying, update, and display
20  * of DLOOP_Dataloop structures and trees of these structures.
21  *
22  * Currently we store trees of dataloops in contiguous regions of memory.  They
23  * are stored in such a way that subtrees are also stored contiguously.  This
24  * makes it somewhat easier to copy these subtrees around.  Keep this in mind
25  * when looking at the functions below.
26  *
27  * The structures used in this file are defined in mpid_datatype.h.  There is
28  * no separate mpid_dataloop.h at this time.
29  *
30  * OPTIMIZATIONS:
31  *
32  * There are spots in the code with OPT tags that indicate where we could
33  * optimize particular calculations or avoid certain checks.
34  *
35  * NOTES:
36  *
37  * Don't have locks in place at this time!
38  */
39 
40 /* Some functions in this file are responsible for allocation of space for
41  * dataloops.  These structures include the dataloop structure itself
42  * followed by a sequence of variable-sized arrays, depending on the loop
43  * kind.  For example, a dataloop of kind DLOOP_KIND_INDEXED has a
44  * dataloop structure followed by an array of block sizes and then an array
45  * of offsets.
46  *
47  * For efficiency and ease of cleanup (preserving a single free at
48  * deallocation), we want to allocate this memory as a single large chunk.
49  * However, we must perform some alignment of the components of this chunk
50  * in order to obtain correct and efficient operation across all platforms.
51  */
52 
53 
54 /*@
55   Dataloop_free - deallocate the resources used to store a dataloop
56 
57   Input Parameters:
58 . dataloop - pointer to dataloop structure
59 @*/
PREPEND_PREFIX(Dataloop_free)60 void PREPEND_PREFIX(Dataloop_free)(DLOOP_Dataloop **dataloop)
61 {
62 
63     if (*dataloop == NULL) return;
64 
65 #ifdef DLOOP_DEBUG_MEMORY
66     DLOOP_dbg_printf("DLOOP_Dataloop_free: freeing loop @ %x.\n",
67 		     (int) *dataloop);
68 #endif
69 
70     memset(*dataloop, 0, sizeof(DLOOP_Dataloop_common));
71     DLOOP_Free(*dataloop);
72     *dataloop = NULL;
73     return;
74 }
75 /*@
76   Dataloop_copy - Copy an arbitrary dataloop structure, updating
77   pointers as necessary
78 
79   Input Parameters:
80 + dest   - pointer to destination region
81 . src    - pointer to original dataloop structure
82 - size   - size of dataloop structure
83 
84   This routine parses the dataloop structure as it goes in order to
85   determine what exactly it needs to update.
86 
87   Notes:
88   It assumes that the source dataloop was allocated in our usual way;
89   this means that the entire dataloop is in a contiguous region and that
90   the root of the tree is first in the array.
91 
92   This has some implications:
93 + we can use a contiguous copy mechanism to copy the majority of the
94   structure
95 - all pointers in the region are relative to the start of the data region
96   the first dataloop in the array is the root of the tree
97 @*/
PREPEND_PREFIX(Dataloop_copy)98 void PREPEND_PREFIX(Dataloop_copy)(void *dest,
99 				   void *src,
100 				   int size)
101 {
102     DLOOP_Offset ptrdiff;
103 
104 #ifdef DLOOP_DEBUG_MEMORY
105     DLOOP_dbg_printf("DLOOP_Dataloop_copy: copying from %x to %x (%d bytes).\n",
106 		     (int) src, (int) dest, size);
107 #endif
108 
109     /* copy region first */
110     DLOOP_Memcpy(dest, src, size);
111 
112     /* Calculate difference in starting locations. DLOOP_Dataloop_update()
113      * then traverses the new structure and updates internal pointers by
114      * adding this difference to them. This way we can just copy the
115      * structure, including pointers, in one big block.
116      */
117     ptrdiff = (DLOOP_Offset) ((char *) dest - (char *) src);
118 
119     /* traverse structure updating pointers */
120     PREPEND_PREFIX(Dataloop_update)(dest, ptrdiff);
121 
122     return;
123 }
124 
125 
126 /*@
127   Dataloop_update - update pointers after a copy operation
128 
129   Input Parameters:
130 + dataloop - pointer to loop to update
131 - ptrdiff - value indicating offset between old and new pointer values
132 
133   This function is used to recursively update all the pointers in a
134   dataloop tree.
135 @*/
PREPEND_PREFIX(Dataloop_update)136 void PREPEND_PREFIX(Dataloop_update)(DLOOP_Dataloop *dataloop,
137 				     DLOOP_Offset ptrdiff)
138 {
139     /* OPT: only declare these variables down in the Struct case */
140     int i;
141     DLOOP_Dataloop **looparray;
142 
143     switch(dataloop->kind & DLOOP_KIND_MASK) {
144 	case DLOOP_KIND_CONTIG:
145 	case DLOOP_KIND_VECTOR:
146 	    /*
147 	     * All these really ugly assignments are really of the form:
148 	     *
149 	     * ((char *) dataloop->loop_params.c_t.loop) += ptrdiff;
150 	     *
151 	     * However, some compilers spit out warnings about casting on the
152 	     * LHS, so we get this much nastier form instead (using common
153 	     * struct for contig and vector):
154 	     */
155 
156 	    if (!(dataloop->kind & DLOOP_FINAL_MASK)) {
157 		DLOOP_Assert(dataloop->loop_params.cm_t.dataloop);
158 
159 		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff);
160 
161 		dataloop->loop_params.cm_t.dataloop =
162 		    (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
163 		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.cm_t.dataloop + ptrdiff);
164 
165 		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.cm_t.dataloop, ptrdiff);
166 	    }
167 	    break;
168 
169 	case DLOOP_KIND_BLOCKINDEXED:
170 	    DLOOP_Assert(dataloop->loop_params.bi_t.offset_array);
171 
172 	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff);
173 
174 	    dataloop->loop_params.bi_t.offset_array =
175 		(DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR
176 		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.offset_array + ptrdiff);
177 
178 	    if (!(dataloop->kind & DLOOP_FINAL_MASK)) {
179 		DLOOP_Assert(dataloop->loop_params.bi_t.dataloop);
180 
181 		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff);
182 
183 		dataloop->loop_params.bi_t.dataloop =
184 		    (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
185 		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.bi_t.dataloop + ptrdiff);
186 
187 		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.bi_t.dataloop, ptrdiff);
188 	    }
189 	    break;
190 
191 	case DLOOP_KIND_INDEXED:
192 	    DLOOP_Assert(dataloop->loop_params.i_t.blocksize_array);
193 
194 	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff);
195 
196 	    dataloop->loop_params.i_t.blocksize_array =
197 		(DLOOP_Count *) DLOOP_OFFSET_CAST_TO_VOID_PTR
198 		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.blocksize_array + ptrdiff);
199 
200 	    DLOOP_Assert(dataloop->loop_params.i_t.offset_array);
201 
202 	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.offset_array + ptrdiff);
203 
204 	    dataloop->loop_params.i_t.offset_array =
205 		(DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR
206 		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.offset_array + ptrdiff);
207 
208 	    if (!(dataloop->kind & DLOOP_FINAL_MASK)) {
209 		DLOOP_Assert(dataloop->loop_params.i_t.dataloop);
210 
211 		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.dataloop + ptrdiff);
212 
213 		dataloop->loop_params.i_t.dataloop =
214 		    (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
215 		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.i_t.dataloop + ptrdiff);
216 
217 		PREPEND_PREFIX(Dataloop_update)(dataloop->loop_params.i_t.dataloop, ptrdiff);
218 	    }
219 	    break;
220 
221 	case DLOOP_KIND_STRUCT:
222 	    DLOOP_Assert(dataloop->loop_params.s_t.blocksize_array);
223 
224 	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff);
225 
226 	    dataloop->loop_params.s_t.blocksize_array =
227 		(DLOOP_Count *) DLOOP_OFFSET_CAST_TO_VOID_PTR
228 		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.blocksize_array + ptrdiff);
229 
230 	    DLOOP_Assert(dataloop->loop_params.s_t.offset_array);
231 
232 	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.offset_array + ptrdiff);
233 
234 	    dataloop->loop_params.s_t.offset_array =
235 		(DLOOP_Offset *) DLOOP_OFFSET_CAST_TO_VOID_PTR
236 		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.offset_array + ptrdiff);
237 
238 	    if (dataloop->kind & DLOOP_FINAL_MASK) break;
239 
240 	    DLOOP_Assert(dataloop->loop_params.s_t.dataloop_array);
241 
242 	    DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff);
243 
244 	    dataloop->loop_params.s_t.dataloop_array =
245 		(DLOOP_Dataloop **) DLOOP_OFFSET_CAST_TO_VOID_PTR
246 		(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) dataloop->loop_params.s_t.dataloop_array + ptrdiff);
247 
248 	    /* fix the N dataloop pointers too */
249 	    looparray = dataloop->loop_params.s_t.dataloop_array;
250 	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
251 		DLOOP_Assert(looparray[i]);
252 
253 		DLOOP_Ensure_Offset_fits_in_pointer(DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) looparray[i] + ptrdiff);
254 
255 		looparray[i] = (DLOOP_Dataloop *) DLOOP_OFFSET_CAST_TO_VOID_PTR
256 		    (DLOOP_VOID_PTR_CAST_TO_OFFSET (char *) looparray[i] + ptrdiff);
257 	    }
258 
259 	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
260 		PREPEND_PREFIX(Dataloop_update)(looparray[i], ptrdiff);
261 	    }
262 	    break;
263 	default:
264 	    /* --BEGIN ERROR HANDLING-- */
265 	    DLOOP_Assert(0);
266 	    break;
267 	    /* --END ERROR HANDLING-- */
268     }
269     return;
270 }
271 
272 /*@
273   Dataloop_alloc - allocate the resources used to store a dataloop with
274                    no old loops associated with it.
275 
276   Input Parameters:
277 + kind          - kind of dataloop to allocate
278 . count         - number of elements in dataloop (kind dependent)
279 . new_loop_p    - address at which to store new dataloop pointer
280 - new_loop_sz_p - pointer to integer in which to store new loop size
281 
282   Notes:
283   The count parameter passed into this function will often be different
284   from the count passed in at the MPI layer due to optimizations.
285 @*/
PREPEND_PREFIX(Dataloop_alloc)286 void PREPEND_PREFIX(Dataloop_alloc)(int kind,
287 				    DLOOP_Count count,
288 				    DLOOP_Dataloop **new_loop_p,
289 				    int *new_loop_sz_p)
290 {
291     PREPEND_PREFIX(Dataloop_alloc_and_copy)(kind,
292 					    count,
293 					    NULL,
294 					    0,
295 					    new_loop_p,
296 					    new_loop_sz_p);
297     return;
298 }
299 
300 /*@
301   Dataloop_alloc_and_copy - allocate the resources used to store a
302                             dataloop and copy in old dataloop as
303 			    appropriate
304 
305   Input Parameters:
306 + kind          - kind of dataloop to allocate
307 . count         - number of elements in dataloop (kind dependent)
308 . old_loop      - pointer to old dataloop (or NULL for none)
309 . old_loop_sz   - size of old dataloop (should be zero if old_loop is NULL)
310 . new_loop_p    - address at which to store new dataloop pointer
311 - new_loop_sz_p - pointer to integer in which to store new loop size
312 
313   Notes:
314   The count parameter passed into this function will often be different
315   from the count passed in at the MPI layer.
316 @*/
PREPEND_PREFIX(Dataloop_alloc_and_copy)317 void PREPEND_PREFIX(Dataloop_alloc_and_copy)(int kind,
318 					     DLOOP_Count count,
319 					     DLOOP_Dataloop *old_loop,
320 					     int old_loop_sz,
321 					     DLOOP_Dataloop **new_loop_p,
322 					     int *new_loop_sz_p)
323 {
324     int new_loop_sz = 0;
325     int align_sz = 8; /* default aligns everything to 8-byte boundaries */
326     int epsilon;
327     int loop_sz = sizeof(DLOOP_Dataloop);
328     int off_sz = 0, blk_sz = 0, ptr_sz = 0, extent_sz = 0;
329 
330     char *pos;
331     DLOOP_Dataloop *new_loop;
332 
333 #ifdef HAVE_MAX_STRUCT_ALIGNMENT
334     if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
335 	align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
336     }
337 #endif
338 
339     if (old_loop != NULL) {
340 	DLOOP_Assert((old_loop_sz % align_sz) == 0);
341     }
342 
343     /* calculate the space that we actually need for everything */
344     switch (kind) {
345 	case DLOOP_KIND_STRUCT:
346 	    /* need space for dataloop pointers and extents */
347 	    ptr_sz = count * sizeof(DLOOP_Dataloop *);
348 	    extent_sz = count * sizeof(DLOOP_Offset);
349 	case DLOOP_KIND_INDEXED:
350 	    /* need space for block sizes */
351 	    blk_sz = count * sizeof(DLOOP_Count);
352 	case DLOOP_KIND_BLOCKINDEXED:
353 	    /* need space for block offsets */
354 	    off_sz = count * sizeof(DLOOP_Offset);
355 	case DLOOP_KIND_CONTIG:
356 	case DLOOP_KIND_VECTOR:
357 	    break;
358 	default:
359 	    DLOOP_Assert(0);
360     }
361 
362     /* pad everything that we're going to allocate */
363     epsilon = loop_sz % align_sz;
364     if (epsilon) loop_sz += align_sz - epsilon;
365 
366     epsilon = off_sz % align_sz;
367     if (epsilon) off_sz += align_sz - epsilon;
368 
369     epsilon = blk_sz % align_sz;
370     if (epsilon) blk_sz += align_sz - epsilon;
371 
372     epsilon = ptr_sz % align_sz;
373     if (epsilon) ptr_sz += align_sz - epsilon;
374 
375     epsilon = extent_sz % align_sz;
376     if (epsilon) extent_sz += align_sz - epsilon;
377 
378     new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz +
379 	extent_sz + old_loop_sz;
380 
381     /* allocate space */
382     new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz);
383     if (new_loop == NULL) {
384 	*new_loop_p = NULL;
385 	return;
386     }
387 
388 #ifdef DLOOP_DEBUG_MEMORY
389     DLOOP_dbg_printf("DLOOP_Dataloop_alloc_and_copy: new loop @ %x (tot sz = %d, loop = %d, off = %d, blk = %d, ptr = %d, extent = %d, old = %d)\n",
390 		     (int) new_loop,
391 		     new_loop_sz,
392 		     loop_sz,
393 		     off_sz,
394 		     blk_sz,
395 		     ptr_sz,
396 		     extent_sz,
397 		     old_loop_sz);
398 #endif
399 
400     /* set all the pointers in the new dataloop structure */
401     switch (kind) {
402 	case DLOOP_KIND_STRUCT:
403 	    /* order is:
404 	     * - pointers
405 	     * - blocks
406 	     * - offsets
407 	     * - extents
408 	     */
409 	    new_loop->loop_params.s_t.dataloop_array =
410 		(DLOOP_Dataloop **) (((char *) new_loop) + loop_sz);
411 	    new_loop->loop_params.s_t.blocksize_array =
412 		(DLOOP_Count *) (((char *) new_loop) + loop_sz + ptr_sz);
413 	    new_loop->loop_params.s_t.offset_array =
414 		(DLOOP_Offset *) (((char *) new_loop) + loop_sz +
415 				  ptr_sz + blk_sz);
416 	    new_loop->loop_params.s_t.el_extent_array =
417 		(DLOOP_Offset *) (((char *) new_loop) + loop_sz +
418 				  ptr_sz + blk_sz + off_sz);
419 	    break;
420 	case DLOOP_KIND_INDEXED:
421 	    /* order is:
422 	     * - blocks
423 	     * - offsets
424 	     */
425 	    new_loop->loop_params.i_t.blocksize_array =
426 		(DLOOP_Count *) (((char *) new_loop) + loop_sz);
427 	    new_loop->loop_params.i_t.offset_array =
428 		(DLOOP_Offset *) (((char *) new_loop) + loop_sz + blk_sz);
429 	    if (old_loop == NULL) {
430 		new_loop->loop_params.i_t.dataloop = NULL;
431 	    }
432 	    else {
433 		new_loop->loop_params.i_t.dataloop =
434 		    (DLOOP_Dataloop *) (((char *) new_loop) +
435 					(new_loop_sz - old_loop_sz));
436 	    }
437 	    break;
438 	case DLOOP_KIND_BLOCKINDEXED:
439 	    new_loop->loop_params.bi_t.offset_array =
440 		(DLOOP_Offset *) (((char *) new_loop) + loop_sz);
441 	    if (old_loop == NULL) {
442 		new_loop->loop_params.bi_t.dataloop = NULL;
443 	    }
444 	    else {
445 		new_loop->loop_params.bi_t.dataloop =
446 		    (DLOOP_Dataloop *) (((char *) new_loop) +
447 					(new_loop_sz - old_loop_sz));
448 	    }
449 	    break;
450 	case DLOOP_KIND_CONTIG:
451 	    if (old_loop == NULL) {
452 		new_loop->loop_params.c_t.dataloop = NULL;
453 	    }
454 	    else {
455 		new_loop->loop_params.c_t.dataloop =
456 		    (DLOOP_Dataloop *) (((char *) new_loop) +
457 					(new_loop_sz - old_loop_sz));
458 	    }
459 	    break;
460 	case DLOOP_KIND_VECTOR:
461 	    if (old_loop == NULL) {
462 		new_loop->loop_params.v_t.dataloop = NULL;
463 	    }
464 	    else {
465 		new_loop->loop_params.v_t.dataloop =
466 		    (DLOOP_Dataloop *) (((char *) new_loop) +
467 					(new_loop_sz - old_loop_sz));
468 	    }
469 	    break;
470 	default:
471 	    DLOOP_Assert(0);
472     }
473 
474     pos = ((char *) new_loop) + (new_loop_sz - old_loop_sz);
475     if (old_loop != NULL) {
476 	PREPEND_PREFIX(Dataloop_copy)(pos, old_loop, old_loop_sz);
477     }
478 
479     *new_loop_p    = new_loop;
480     *new_loop_sz_p = new_loop_sz;
481     return;
482 }
483 
484 /*@
485   Dataloop_struct_alloc - allocate the resources used to store a dataloop and
486                           copy in old dataloop as appropriate.  this version
487                           is specifically for use when a struct dataloop is
488                           being created; the space to hold old dataloops in
489                           this case must be described back to the
490                           implementation in order for efficient copying.
491 
492   Input Parameters:
493 + count         - number of elements in dataloop (kind dependent)
494 . old_loop_sz   - size of old dataloop (should be zero if old_loop is NULL)
495 . basic_ct      - number of basic types for which new dataloops are needed
496 . old_loop_p    - address at which to store pointer to old loops
497 . new_loop_p    - address at which to store new struct dataloop pointer
498 - new_loop_sz_p - address at which to store new loop size
499 
500   Notes:
501   The count parameter passed into this function will often be different
502   from the count passed in at the MPI layer due to optimizations.
503 
504   The caller is responsible for filling in the region pointed to by
505   old_loop_p (count elements).
506 @*/
PREPEND_PREFIX(Dataloop_struct_alloc)507 void PREPEND_PREFIX(Dataloop_struct_alloc)(DLOOP_Count count,
508 					   int old_loop_sz,
509 					   int basic_ct,
510 					   DLOOP_Dataloop **old_loop_p,
511 					   DLOOP_Dataloop **new_loop_p,
512 					   int *new_loop_sz_p)
513 {
514     int new_loop_sz = 0;
515     int align_sz = 8; /* default aligns everything to 8-byte boundaries */
516     int epsilon;
517     int loop_sz = sizeof(DLOOP_Dataloop);
518     int off_sz, blk_sz, ptr_sz, extent_sz, basic_sz;
519 
520     DLOOP_Dataloop *new_loop;
521 
522 #ifdef HAVE_MAX_STRUCT_ALIGNMENT
523     if (align_sz > HAVE_MAX_STRUCT_ALIGNMENT) {
524 	align_sz = HAVE_MAX_STRUCT_ALIGNMENT;
525     }
526 #endif
527 
528     /* calculate the space that we actually need for everything */
529     ptr_sz    = count * sizeof(DLOOP_Dataloop *);
530     extent_sz = count * sizeof(DLOOP_Offset);
531     blk_sz    = count * sizeof(DLOOP_Count);
532     off_sz    = count * sizeof(DLOOP_Offset);
533     basic_sz  = sizeof(DLOOP_Dataloop);
534 
535     /* pad everything that we're going to allocate */
536     epsilon = loop_sz % align_sz;
537     if (epsilon) loop_sz += align_sz - epsilon;
538 
539     epsilon = off_sz % align_sz;
540     if (epsilon) off_sz += align_sz - epsilon;
541 
542     epsilon = blk_sz % align_sz;
543     if (epsilon) blk_sz += align_sz - epsilon;
544 
545     epsilon = ptr_sz % align_sz;
546     if (epsilon) ptr_sz += align_sz - epsilon;
547 
548     epsilon = extent_sz % align_sz;
549     if (epsilon) extent_sz += align_sz - epsilon;
550 
551     epsilon = basic_sz % align_sz;
552     if (epsilon) basic_sz += align_sz - epsilon;
553 
554     /* note: we pad *each* basic type dataloop, because the
555      * code used to create them assumes that we're going to
556      * do that.
557      */
558 
559     new_loop_sz += loop_sz + off_sz + blk_sz + ptr_sz +
560 	extent_sz + (basic_ct * basic_sz) + old_loop_sz;
561 
562     /* allocate space */
563     new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(new_loop_sz);
564     if (new_loop == NULL) {
565 	*new_loop_p = NULL;
566 	return;
567     }
568 
569 #ifdef DLOOP_DEBUG_MEMORY
570     DLOOP_dbg_printf("DLOOP_Dataloop_struct_alloc: new loop @ %x (tot sz = %d, loop = %d, off = %d, blk = %d, ptr = %d, extent = %d, basics = %d, old = %d)\n",
571 		     (int) new_loop,
572 		     new_loop_sz,
573 		     loop_sz,
574 		     off_sz,
575 		     blk_sz,
576 		     ptr_sz,
577 		     extent_sz,
578 		     basic_sz,
579 		     old_loop_sz);
580 #endif
581 
582     /* set all the pointers in the new dataloop structure */
583     new_loop->loop_params.s_t.dataloop_array = (DLOOP_Dataloop **)
584 	(((char *) new_loop) + loop_sz);
585     new_loop->loop_params.s_t.blocksize_array =	(DLOOP_Count *)
586 	(((char *) new_loop) + loop_sz + ptr_sz);
587     new_loop->loop_params.s_t.offset_array = (DLOOP_Offset *)
588 	(((char *) new_loop) + loop_sz + ptr_sz + blk_sz);
589     new_loop->loop_params.s_t.el_extent_array =	(DLOOP_Offset *)
590 	(((char *) new_loop) + loop_sz + ptr_sz + blk_sz + off_sz);
591 
592     *old_loop_p = (DLOOP_Dataloop *)
593 	(((char *) new_loop) + loop_sz + ptr_sz + blk_sz + off_sz + extent_sz);
594     *new_loop_p = new_loop;
595     *new_loop_sz_p = new_loop_sz;
596 
597     return;
598 }
599 
600 /*@
601   Dataloop_dup - make a copy of a dataloop
602 
603   Returns 0 on success, -1 on failure.
604 @*/
PREPEND_PREFIX(Dataloop_dup)605 void PREPEND_PREFIX(Dataloop_dup)(DLOOP_Dataloop *old_loop,
606 				  int old_loop_sz,
607 				  DLOOP_Dataloop **new_loop_p)
608 {
609     DLOOP_Dataloop *new_loop;
610 
611     DLOOP_Assert(old_loop != NULL);
612     DLOOP_Assert(old_loop_sz > 0);
613 
614     new_loop = (DLOOP_Dataloop *) DLOOP_Malloc(old_loop_sz);
615     if (new_loop == NULL) {
616 	*new_loop_p = NULL;
617 	return;
618     }
619 
620     PREPEND_PREFIX(Dataloop_copy)(new_loop, old_loop, old_loop_sz);
621     *new_loop_p = new_loop;
622     return;
623 }
624 
625 /*@
626   Dataloop_stream_size - return the size of the data described by the dataloop
627 
628   Input Parameters:
629 + dl_p   - pointer to dataloop for which we will return the size
630 - sizefn - function for determining size of types in the corresponding stream
631            (passing NULL will instead result in el_size values being used)
632 
633 @*/
634 DLOOP_Offset
PREPEND_PREFIX(Dataloop_stream_size)635 PREPEND_PREFIX(Dataloop_stream_size)(struct DLOOP_Dataloop *dl_p,
636 				     DLOOP_Offset (*sizefn)(DLOOP_Type el_type))
637 {
638     DLOOP_Offset tmp_sz, tmp_ct = 1;
639 
640     for (;;)
641     {
642         if ((dl_p->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT)
643         {
644             int i;
645 
646             tmp_sz = 0;
647             for (i = 0; i < dl_p->loop_params.s_t.count; i++)
648             {
649                 tmp_sz += (DLOOP_Offset)(dl_p->loop_params.s_t.blocksize_array[i]) *
650                     PREPEND_PREFIX(Dataloop_stream_size)(dl_p->loop_params.s_t.dataloop_array[i], sizefn);
651             }
652             return tmp_sz * tmp_ct;
653         }
654 
655         switch (dl_p->kind & DLOOP_KIND_MASK) {
656         case DLOOP_KIND_CONTIG:
657             tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.c_t.count);
658 #ifdef DLOOP_DEBUG_SIZE
659             DLOOP_dbg_printf("stream_size: contig: ct = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
660                              (int) dl_p->loop_params.c_t.count, (DLOOP_Offset) tmp_ct);
661 #endif
662             break;
663         case DLOOP_KIND_VECTOR:
664             tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.v_t.count) *
665 		      (DLOOP_Offset)(dl_p->loop_params.v_t.blocksize);
666 #ifdef DLOOP_DEBUG_SIZE
667             DLOOP_dbg_printf("stream_size: vector: ct = %d; blk = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
668                              (int) dl_p->loop_params.v_t.count,
669                              (int) dl_p->loop_params.v_t.blocksize,
670                              (DLOOP_Offset) tmp_ct);
671 #endif
672             break;
673         case DLOOP_KIND_BLOCKINDEXED:
674             tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.bi_t.count) *
675 		      (DLOOP_Offset)(dl_p->loop_params.bi_t.blocksize);
676 #ifdef DLOOP_DEBUG_SIZE
677             DLOOP_dbg_printf("stream_size: blkindexed: blks = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
678                              (int) dl_p->loop_params.bi_t.count *
679                              (int) dl_p->loop_params.bi_t.blocksize,
680                              (DLOOP_Offset) tmp_ct);
681 #endif
682             break;
683         case DLOOP_KIND_INDEXED:
684             tmp_ct *= (DLOOP_Offset)(dl_p->loop_params.i_t.total_blocks);
685 #ifdef DLOOP_DEBUG_SIZE
686             DLOOP_dbg_printf("stream_size: contig: blks = %d; new tot_ct = " DLOOP_OFFSET_FMT_DEC_SPEC "\n",
687                              (int) dl_p->loop_params.i_t.total_blocks,
688                              (DLOOP_Offset) tmp_ct);
689 #endif
690             break;
691         default:
692             /* --BEGIN ERROR HANDLING-- */
693             DLOOP_Assert(0);
694             break;
695             /* --END ERROR HANDLING-- */
696         }
697 
698         if (dl_p->kind & DLOOP_FINAL_MASK) break;
699         else {
700             DLOOP_Assert(dl_p->loop_params.cm_t.dataloop != NULL);
701             dl_p = dl_p->loop_params.cm_t.dataloop;
702         }
703     }
704 
705     /* call fn for size using bottom type, or use size if fnptr is NULL */
706     tmp_sz = ((sizefn) ? sizefn(dl_p->el_type) : dl_p->el_size);
707 
708     return tmp_sz * tmp_ct;
709 }
710 
711 /* --BEGIN ERROR HANDLING-- */
712 /*@
713   Dataloop_print - dump a dataloop tree to stdout for debugging
714   purposes
715 
716   Input Parameters:
717 + dataloop - root of tree to dump
718 - depth - starting depth; used to help keep up with where we are in the tree
719 @*/
PREPEND_PREFIX(Dataloop_print)720 void PREPEND_PREFIX(Dataloop_print)(struct DLOOP_Dataloop *dataloop,
721 				    int depth)
722 {
723     int i;
724 
725     if (dataloop == NULL)
726     {
727         DLOOP_dbg_printf("dataloop is NULL (probably basic type)\n");
728         return;
729     }
730 
731     DLOOP_dbg_printf("loc=%p, treedepth=%d, kind=%d, el_extent=" DLOOP_OFFSET_FMT_DEC_SPEC "\n",
732 		     dataloop, (int) depth, (int) dataloop->kind, (DLOOP_Offset) dataloop->el_extent);
733     switch(dataloop->kind & DLOOP_KIND_MASK) {
734 	case DLOOP_KIND_CONTIG:
735 	    DLOOP_dbg_printf("\tCONTIG: count=%d, datatype=%p\n",
736 			     (int) dataloop->loop_params.c_t.count,
737 			     dataloop->loop_params.c_t.dataloop);
738 	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
739 		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.c_t.dataloop, depth+1);
740 	    break;
741 	case DLOOP_KIND_VECTOR:
742 	    DLOOP_dbg_printf("\tVECTOR: count=%d, blksz=%d, stride=" DLOOP_OFFSET_FMT_DEC_SPEC ", datatype=%p\n",
743 			     (int) dataloop->loop_params.v_t.count,
744 			     (int) dataloop->loop_params.v_t.blocksize,
745 			     (DLOOP_Offset) dataloop->loop_params.v_t.stride,
746 			     dataloop->loop_params.v_t.dataloop);
747 	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
748 		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.v_t.dataloop, depth+1);
749 	    break;
750 	case DLOOP_KIND_BLOCKINDEXED:
751 	    DLOOP_dbg_printf("\tBLOCKINDEXED: count=%d, blksz=%d, datatype=%p\n",
752 			     (int) dataloop->loop_params.bi_t.count,
753 			     (int) dataloop->loop_params.bi_t.blocksize,
754 			     dataloop->loop_params.bi_t.dataloop);
755 	    /* print out offsets later */
756 	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
757 		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.bi_t.dataloop, depth+1);
758 	    break;
759 	case DLOOP_KIND_INDEXED:
760 	    DLOOP_dbg_printf("\tINDEXED: count=%d, datatype=%p\n",
761 			     (int) dataloop->loop_params.i_t.count,
762 			     dataloop->loop_params.i_t.dataloop);
763 	    /* print out blocksizes and offsets later */
764 	    if (!(dataloop->kind & DLOOP_FINAL_MASK))
765 		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.i_t.dataloop, depth+1);
766 	    break;
767 	case DLOOP_KIND_STRUCT:
768 	    DLOOP_dbg_printf("\tSTRUCT: count=%d\n", (int) dataloop->loop_params.s_t.count);
769 	    DLOOP_dbg_printf("\tblocksizes:\n");
770 	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
771 		DLOOP_dbg_printf("\t\t%d\n", (int) dataloop->loop_params.s_t.blocksize_array[i]);
772 	    DLOOP_dbg_printf("\toffsets:\n");
773 	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
774 		DLOOP_dbg_printf("\t\t" DLOOP_OFFSET_FMT_DEC_SPEC "\n", (DLOOP_Offset) dataloop->loop_params.s_t.offset_array[i]);
775 	    DLOOP_dbg_printf("\tdatatypes:\n");
776 	    for (i=0; i < dataloop->loop_params.s_t.count; i++)
777 		DLOOP_dbg_printf("\t\t%p\n", dataloop->loop_params.s_t.dataloop_array[i]);
778 	    if (dataloop->kind & DLOOP_FINAL_MASK) break;
779 
780 	    for (i=0; i < dataloop->loop_params.s_t.count; i++) {
781 		PREPEND_PREFIX(Dataloop_print)(dataloop->loop_params.s_t.dataloop_array[i],depth+1);
782 	    }
783 	    break;
784 	default:
785 	    DLOOP_Assert(0);
786 	    break;
787     }
788     return;
789 }
790 /* --END ERROR HANDLING-- */
791