1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2
3 /*
4 * (C) 2001 by Argonne National Laboratory.
5 * See COPYRIGHT in top-level directory.
6 */
7
8 #include <stdio.h>
9 #include <stdlib.h>
10
11 #include "./dataloop.h"
12
13 #undef DLOOP_DEBUG_MANIPULATE
14
15 #ifndef PREPEND_PREFIX
16 #error "You must explicitly include a header that sets the PREPEND_PREFIX and includes dataloop_parts.h"
17 #endif
18
19 /* Notes on functions:
20 *
21 * There are a few different sets of functions here:
22 * - DLOOP_Segment_manipulate() - uses a "piece" function to perform operations
23 * using segments (piece functions defined elsewhere)
24 * - PREPEND_PREFIX functions - these define the externally visible interface
25 * to segment functionality
26 */
27
28 static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp);
29 static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp);
30 static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp,
31 struct DLOOP_Dataloop *dlp,
32 int branch_flag);
33 /* Segment_init
34 *
35 * buf - datatype buffer location
36 * count - number of instances of the datatype in the buffer
37 * handle - handle for datatype (could be derived or not)
38 * segp - pointer to previously allocated segment structure
39 * flag - flag indicating which optimizations are valid
40 * should be one of DLOOP_DATALOOP_HOMOGENEOUS, _HETEROGENEOUS,
41 * of _ALL_BYTES.
42 *
43 * Notes:
44 * - Assumes that the segment has been allocated.
45 * - Older MPICH2 code may pass "0" to indicate HETEROGENEOUS or "1" to
46 * indicate HETEROGENEOUS.
47 *
48 */
PREPEND_PREFIX(Segment_init)49 int PREPEND_PREFIX(Segment_init)(const DLOOP_Buffer buf,
50 DLOOP_Count count,
51 DLOOP_Handle handle,
52 struct DLOOP_Segment *segp,
53 int flag)
54 {
55 DLOOP_Offset elmsize = 0;
56 int i, depth = 0;
57 int branch_detected = 0;
58
59 struct DLOOP_Dataloop_stackelm *elmp;
60 struct DLOOP_Dataloop *dlp = 0, *sblp = &segp->builtin_loop;
61
62 DLOOP_Assert(flag == DLOOP_DATALOOP_HETEROGENEOUS ||
63 flag == DLOOP_DATALOOP_HOMOGENEOUS ||
64 flag == DLOOP_DATALOOP_ALL_BYTES);
65
66 #ifdef DLOOP_DEBUG_MANIPULATE
67 DLOOP_dbg_printf("DLOOP_Segment_init: count = %d, buf = %x\n",
68 count,
69 buf);
70 #endif
71
72 if (!DLOOP_Handle_hasloop_macro(handle)) {
73 /* simplest case; datatype has no loop (basic) */
74
75 DLOOP_Handle_get_size_macro(handle, elmsize);
76
77 sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
78 sblp->loop_params.c_t.count = count;
79 sblp->loop_params.c_t.dataloop = 0;
80 sblp->el_size = elmsize;
81 DLOOP_Handle_get_basic_type_macro(handle, sblp->el_type);
82 DLOOP_Handle_get_extent_macro(handle, sblp->el_extent);
83
84 dlp = sblp;
85 depth = 1;
86 }
87 else if (count == 0) {
88 /* only use the builtin */
89 sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
90 sblp->loop_params.c_t.count = 0;
91 sblp->loop_params.c_t.dataloop = 0;
92 sblp->el_size = 0;
93 sblp->el_extent = 0;
94
95 dlp = sblp;
96 depth = 1;
97 }
98 else if (count == 1) {
99 /* don't use the builtin */
100 DLOOP_Handle_get_loopptr_macro(handle, dlp, flag);
101 DLOOP_Handle_get_loopdepth_macro(handle, depth, flag);
102 }
103 else {
104 /* default: need to use builtin to handle contig; must check
105 * loop depth first
106 */
107 DLOOP_Dataloop *oldloop; /* loop from original type, before new count */
108 DLOOP_Offset type_size, type_extent;
109 DLOOP_Type el_type;
110
111 DLOOP_Handle_get_loopdepth_macro(handle, depth, flag);
112
113 DLOOP_Handle_get_loopptr_macro(handle, oldloop, flag);
114 DLOOP_Assert(oldloop != NULL);
115 DLOOP_Handle_get_size_macro(handle, type_size);
116 DLOOP_Handle_get_extent_macro(handle, type_extent);
117 DLOOP_Handle_get_basic_type_macro(handle, el_type);
118
119 if (depth == 1 && ((oldloop->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG))
120 {
121 if (type_size == type_extent)
122 {
123 /* use a contig */
124 sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
125 sblp->loop_params.c_t.count = count * oldloop->loop_params.c_t.count;
126 sblp->loop_params.c_t.dataloop = NULL;
127 sblp->el_size = oldloop->el_size;
128 sblp->el_extent = oldloop->el_extent;
129 sblp->el_type = oldloop->el_type;
130 }
131 else
132 {
133 /* use a vector, with extent of original type becoming the stride */
134 sblp->kind = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK;
135 sblp->loop_params.v_t.count = count;
136 sblp->loop_params.v_t.blocksize = oldloop->loop_params.c_t.count;
137 sblp->loop_params.v_t.stride = type_extent;
138 sblp->loop_params.v_t.dataloop = NULL;
139 sblp->el_size = oldloop->el_size;
140 sblp->el_extent = oldloop->el_extent;
141 sblp->el_type = oldloop->el_type;
142 }
143 }
144 else
145 {
146 /* general case */
147 sblp->kind = DLOOP_KIND_CONTIG;
148 sblp->loop_params.c_t.count = count;
149 sblp->loop_params.c_t.dataloop = oldloop;
150 sblp->el_size = type_size;
151 sblp->el_extent = type_extent;
152 sblp->el_type = el_type;
153
154 depth++; /* we're adding to the depth with the builtin */
155 DLOOP_Assert(depth < (DLOOP_MAX_DATATYPE_DEPTH));
156 }
157
158 dlp = sblp;
159 }
160
161 /* assert instead of return b/c dtype/dloop errorhandling code is inconsistent */
162 DLOOP_Assert(depth < (DLOOP_MAX_DATATYPE_DEPTH));
163
164 /* initialize the rest of the segment values */
165 segp->handle = handle;
166 segp->ptr = (DLOOP_Buffer) buf;
167 segp->stream_off = 0;
168 segp->cur_sp = 0;
169 segp->valid_sp = 0;
170
171 /* initialize the first stackelm in its entirety */
172 elmp = &(segp->stackelm[0]);
173 DLOOP_Stackelm_load(elmp, dlp, 0);
174 branch_detected = elmp->may_require_reloading;
175
176 /* Fill in parameters not set by DLOOP_Stackelm_load */
177 elmp->orig_offset = 0;
178 elmp->curblock = elmp->orig_block;
179 /* DLOOP_Stackelm_offset assumes correct orig_count, curcount, loop_p */
180 elmp->curoffset = /* elmp->orig_offset + */ DLOOP_Stackelm_offset(elmp);
181
182 i = 1;
183 while(!(dlp->kind & DLOOP_FINAL_MASK))
184 {
185 /* get pointer to next dataloop */
186 switch (dlp->kind & DLOOP_KIND_MASK)
187 {
188 case DLOOP_KIND_CONTIG:
189 case DLOOP_KIND_VECTOR:
190 case DLOOP_KIND_BLOCKINDEXED:
191 case DLOOP_KIND_INDEXED:
192 dlp = dlp->loop_params.cm_t.dataloop;
193 break;
194 case DLOOP_KIND_STRUCT:
195 dlp = dlp->loop_params.s_t.dataloop_array[0];
196 break;
197 default:
198 /* --BEGIN ERROR HANDLING-- */
199 DLOOP_Assert(0);
200 break;
201 /* --END ERROR HANDLING-- */
202 }
203
204 DLOOP_Assert(i < DLOOP_MAX_DATATYPE_DEPTH);
205
206 /* loop_p, orig_count, orig_block, and curcount are all filled by us now.
207 * the rest are filled in at processing time.
208 */
209 elmp = &(segp->stackelm[i]);
210
211 DLOOP_Stackelm_load(elmp, dlp, branch_detected);
212 branch_detected = elmp->may_require_reloading;
213 i++;
214
215 }
216
217 segp->valid_sp = depth-1;
218
219 return 0;
220 }
221
222 /* Segment_alloc
223 *
224 */
PREPEND_PREFIX(Segment_alloc)225 struct DLOOP_Segment * PREPEND_PREFIX(Segment_alloc)(void)
226 {
227 return (struct DLOOP_Segment *) DLOOP_Malloc(sizeof(struct DLOOP_Segment));
228 }
229
230 /* Segment_free
231 *
232 * Input Parameters:
233 * segp - pointer to segment
234 */
PREPEND_PREFIX(Segment_free)235 void PREPEND_PREFIX(Segment_free)(struct DLOOP_Segment *segp)
236 {
237 DLOOP_Free(segp);
238 return;
239 }
240
241 /* DLOOP_Segment_manipulate - do something to a segment
242 *
243 * If you think of all the data to be manipulated (packed, unpacked, whatever),
244 * as a stream of bytes, it's easier to understand how first and last fit in.
245 *
246 * This function does all the work, calling the piecefn passed in when it
247 * encounters a datatype element which falls into the range of first..(last-1).
248 *
249 * piecefn can be NULL, in which case this function doesn't do anything when it
250 * hits a region. This is used internally for repositioning within this stream.
251 *
252 * last is a byte offset to the byte just past the last byte in the stream
253 * to operate on. this makes the calculations all over MUCH cleaner.
254 *
255 * stream_off, stream_el_size, first, and last are all working in terms of the
256 * types and sizes for the stream, which might be different from the local sizes
257 * (in the heterogeneous case).
258 *
259 * This is a horribly long function. Too bad; it's complicated :)! -- Rob
260 *
261 * NOTE: THIS IMPLEMENTATION CANNOT HANDLE STRUCT DATALOOPS.
262 */
263 #define DLOOP_SEGMENT_SAVE_LOCAL_VALUES \
264 { \
265 segp->cur_sp = cur_sp; \
266 segp->valid_sp = valid_sp; \
267 segp->stream_off = stream_off; \
268 *lastp = stream_off; \
269 }
270
271 #define DLOOP_SEGMENT_LOAD_LOCAL_VALUES \
272 { \
273 last = *lastp; \
274 cur_sp = segp->cur_sp; \
275 valid_sp = segp->valid_sp; \
276 stream_off = segp->stream_off; \
277 cur_elmp = &(segp->stackelm[cur_sp]); \
278 }
279
280 #define DLOOP_SEGMENT_RESET_VALUES \
281 { \
282 segp->stream_off = 0; \
283 segp->cur_sp = 0; \
284 cur_elmp = &(segp->stackelm[0]); \
285 cur_elmp->curcount = cur_elmp->orig_count; \
286 cur_elmp->orig_block = DLOOP_Stackelm_blocksize(cur_elmp); \
287 cur_elmp->curblock = cur_elmp->orig_block; \
288 cur_elmp->curoffset = cur_elmp->orig_offset + \
289 DLOOP_Stackelm_offset(cur_elmp); \
290 }
291
292 #define DLOOP_SEGMENT_POP_AND_MAYBE_EXIT \
293 { \
294 cur_sp--; \
295 if (cur_sp >= 0) cur_elmp = &segp->stackelm[cur_sp]; \
296 else { \
297 DLOOP_SEGMENT_SAVE_LOCAL_VALUES; \
298 return; \
299 } \
300 }
301
302 #define DLOOP_SEGMENT_PUSH \
303 { \
304 cur_sp++; \
305 cur_elmp = &segp->stackelm[cur_sp]; \
306 }
307
308 #define DLOOP_STACKELM_BLOCKINDEXED_OFFSET(elmp_, curcount_) \
309 (elmp_)->loop_p->loop_params.bi_t.offset_array[(curcount_)]
310
311 #define DLOOP_STACKELM_INDEXED_OFFSET(elmp_, curcount_) \
312 (elmp_)->loop_p->loop_params.i_t.offset_array[(curcount_)]
313
314 #define DLOOP_STACKELM_INDEXED_BLOCKSIZE(elmp_, curcount_) \
315 (elmp_)->loop_p->loop_params.i_t.blocksize_array[(curcount_)]
316
317 #define DLOOP_STACKELM_STRUCT_OFFSET(elmp_, curcount_) \
318 (elmp_)->loop_p->loop_params.s_t.offset_array[(curcount_)]
319
320 #define DLOOP_STACKELM_STRUCT_BLOCKSIZE(elmp_, curcount_) \
321 (elmp_)->loop_p->loop_params.s_t.blocksize_array[(curcount_)]
322
323 #define DLOOP_STACKELM_STRUCT_EL_EXTENT(elmp_, curcount_) \
324 (elmp_)->loop_p->loop_params.s_t.el_extent_array[(curcount_)]
325
326 #define DLOOP_STACKELM_STRUCT_DATALOOP(elmp_, curcount_) \
327 (elmp_)->loop_p->loop_params.s_t.dataloop_array[(curcount_)]
328
PREPEND_PREFIX(Segment_manipulate)329 void PREPEND_PREFIX(Segment_manipulate)(struct DLOOP_Segment *segp,
330 DLOOP_Offset first,
331 DLOOP_Offset *lastp,
332 int (*contigfn) (DLOOP_Offset *blocks_p,
333 DLOOP_Type el_type,
334 DLOOP_Offset rel_off,
335 DLOOP_Buffer bufp,
336 void *v_paramp),
337 int (*vectorfn) (DLOOP_Offset *blocks_p,
338 DLOOP_Count count,
339 DLOOP_Count blklen,
340 DLOOP_Offset stride,
341 DLOOP_Type el_type,
342 DLOOP_Offset rel_off,
343 DLOOP_Buffer bufp,
344 void *v_paramp),
345 int (*blkidxfn) (DLOOP_Offset *blocks_p,
346 DLOOP_Count count,
347 DLOOP_Count blklen,
348 DLOOP_Offset *offsetarray,
349 DLOOP_Type el_type,
350 DLOOP_Offset rel_off,
351 DLOOP_Buffer bufp,
352 void *v_paramp),
353 int (*indexfn) (DLOOP_Offset *blocks_p,
354 DLOOP_Count count,
355 DLOOP_Count *blockarray,
356 DLOOP_Offset *offsetarray,
357 DLOOP_Type el_type,
358 DLOOP_Offset rel_off,
359 DLOOP_Buffer bufp,
360 void *v_paramp),
361 DLOOP_Offset (*sizefn) (DLOOP_Type el_type),
362 void *pieceparams)
363 {
364 /* these four are the "local values": cur_sp, valid_sp, last, stream_off */
365 int cur_sp, valid_sp;
366 DLOOP_Offset last, stream_off;
367
368 struct DLOOP_Dataloop_stackelm *cur_elmp;
369 enum { PF_NULL, PF_CONTIG, PF_VECTOR, PF_BLOCKINDEXED, PF_INDEXED } piecefn_type = PF_NULL;
370
371 DLOOP_SEGMENT_LOAD_LOCAL_VALUES;
372
373 if (first == *lastp) {
374 /* nothing to do */
375 DLOOP_dbg_printf("dloop_segment_manipulate: warning: first == last (" DLOOP_OFFSET_FMT_DEC_SPEC ")\n", first);
376 return;
377 }
378
379 /* first we ensure that stream_off and first are in the same spot */
380 if (first != stream_off) {
381 #ifdef DLOOP_DEBUG_MANIPULATE
382 DLOOP_dbg_printf("first=" DLOOP_OFFSET_FMT_DEC_SPEC "; stream_off=" DLOOP_OFFSET_FMT_DEC_SPEC "; resetting.\n",
383 first, stream_off);
384 #endif
385
386 if (first < stream_off) {
387 DLOOP_SEGMENT_RESET_VALUES;
388 stream_off = 0;
389 }
390
391 if (first != stream_off) {
392 DLOOP_Offset tmp_last = first;
393
394 /* use manipulate function with a NULL piecefn to advance
395 * stream offset
396 */
397 PREPEND_PREFIX(Segment_manipulate)(segp,
398 stream_off,
399 &tmp_last,
400 NULL, /* contig fn */
401 NULL, /* vector fn */
402 NULL, /* blkidx fn */
403 NULL, /* index fn */
404 sizefn,
405 NULL);
406
407 /* --BEGIN ERROR HANDLING-- */
408 /* verify that we're in the right location */
409 DLOOP_Assert(tmp_last == first);
410 /* --END ERROR HANDLING-- */
411 }
412
413 DLOOP_SEGMENT_LOAD_LOCAL_VALUES;
414
415 #ifdef DLOOP_DEBUG_MANIPULATE
416 DLOOP_dbg_printf("done repositioning stream_off; first=" DLOOP_OFFSET_FMT_DEC_SPEC ", stream_off=" DLOOP_OFFSET_FMT_DEC_SPEC ", last=" DLOOP_OFFSET_FMT_DEC_SPEC "\n",
417 first, stream_off, last);
418 #endif
419 }
420
421 for (;;) {
422 #ifdef DLOOP_DEBUG_MANIPULATE
423 #if 0
424 DLOOP_dbg_printf("looptop; cur_sp=%d, cur_elmp=%x\n",
425 cur_sp, (unsigned) cur_elmp);
426 #endif
427 #endif
428
429 if (cur_elmp->loop_p->kind & DLOOP_FINAL_MASK) {
430 int piecefn_indicated_exit = -1;
431 DLOOP_Offset myblocks, local_el_size, stream_el_size;
432 DLOOP_Type el_type;
433
434 /* structs are never finals (leaves) */
435 DLOOP_Assert((cur_elmp->loop_p->kind & DLOOP_KIND_MASK) !=
436 DLOOP_KIND_STRUCT);
437
438 /* pop immediately on zero count */
439 if (cur_elmp->curcount == 0) DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
440
441 /* size on this system of the int, double, etc. that is
442 * the elementary type.
443 */
444 local_el_size = cur_elmp->loop_p->el_size;
445 el_type = cur_elmp->loop_p->el_type;
446 stream_el_size = (sizefn) ? sizefn(el_type) : local_el_size;
447
448 /* calculate number of elem. types to work on and function to use.
449 * default is to use the contig piecefn (if there is one).
450 */
451 myblocks = cur_elmp->curblock;
452 piecefn_type = (contigfn ? PF_CONTIG : PF_NULL);
453
454 /* check for opportunities to use other piecefns */
455 switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
456 case DLOOP_KIND_CONTIG:
457 break;
458 case DLOOP_KIND_BLOCKINDEXED:
459 /* only use blkidx piecefn if at start of blkidx type */
460 if (blkidxfn &&
461 cur_elmp->orig_block == cur_elmp->curblock &&
462 cur_elmp->orig_count == cur_elmp->curcount)
463 {
464 /* TODO: RELAX CONSTRAINTS */
465 myblocks = cur_elmp->curblock * cur_elmp->curcount;
466 piecefn_type = PF_BLOCKINDEXED;
467 }
468 break;
469 case DLOOP_KIND_INDEXED:
470 /* only use index piecefn if at start of the index type.
471 * count test checks that we're on first block.
472 * block test checks that we haven't made progress on first block.
473 */
474 if (indexfn &&
475 cur_elmp->orig_count == cur_elmp->curcount &&
476 cur_elmp->curblock == DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, 0))
477 {
478 /* TODO: RELAX CONSTRAINT ON COUNT? */
479 myblocks = cur_elmp->loop_p->loop_params.i_t.total_blocks;
480 piecefn_type = PF_INDEXED;
481 }
482 break;
483 case DLOOP_KIND_VECTOR:
484 /* only use the vector piecefn if at the start of a
485 * contiguous block.
486 */
487 if (vectorfn && cur_elmp->orig_block == cur_elmp->curblock)
488 {
489 myblocks = cur_elmp->curblock * cur_elmp->curcount;
490 piecefn_type = PF_VECTOR;
491 }
492 break;
493 default:
494 /* --BEGIN ERROR HANDLING-- */
495 DLOOP_Assert(0);
496 break;
497 /* --END ERROR HANDLING-- */
498 }
499
500 #ifdef DLOOP_DEBUG_MANIPULATE
501 DLOOP_dbg_printf("\thit leaf; cur_sp=%d, elmp=%x, piece_sz=" DLOOP_OFFSET_FMT_DEC_SPEC "\n",
502 cur_sp,
503 (unsigned) cur_elmp, myblocks * local_el_size);
504 #endif
505
506 /* enforce the last parameter if necessary by reducing myblocks */
507 if (last != SEGMENT_IGNORE_LAST &&
508 (stream_off + (myblocks * stream_el_size) > last))
509 {
510 myblocks = ((last - stream_off) / stream_el_size);
511 #ifdef DLOOP_DEBUG_MANIPULATE
512 DLOOP_dbg_printf("\tpartial block count=" DLOOP_OFFSET_FMT_DEC_SPEC " (" DLOOP_OFFSET_FMT_DEC_SPEC " bytes)\n",
513 myblocks,
514 myblocks * stream_el_size);
515 #endif
516 if (myblocks == 0) {
517 DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
518 return;
519 }
520 }
521
522 /* call piecefn to perform data manipulation */
523 switch (piecefn_type) {
524 case PF_NULL:
525 piecefn_indicated_exit = 0;
526 #ifdef DLOOP_DEBUG_MANIPULATE
527 DLOOP_dbg_printf("\tNULL piecefn for this piece\n");
528 #endif
529 break;
530 case PF_CONTIG:
531 DLOOP_Assert(myblocks <= cur_elmp->curblock);
532 piecefn_indicated_exit =
533 contigfn(&myblocks,
534 el_type,
535 cur_elmp->curoffset, /* relative to segp->ptr */
536 segp->ptr, /* start of buffer (from segment) */
537 pieceparams);
538 break;
539 case PF_VECTOR:
540 piecefn_indicated_exit =
541 vectorfn(&myblocks,
542 cur_elmp->curcount,
543 cur_elmp->orig_block,
544 cur_elmp->loop_p->loop_params.v_t.stride,
545 el_type,
546 cur_elmp->curoffset,
547 segp->ptr,
548 pieceparams);
549 break;
550 case PF_BLOCKINDEXED:
551 piecefn_indicated_exit =
552 blkidxfn(&myblocks,
553 cur_elmp->curcount,
554 cur_elmp->orig_block,
555 cur_elmp->loop_p->loop_params.bi_t.offset_array,
556 el_type,
557 cur_elmp->orig_offset, /* blkidxfn adds offset */
558 segp->ptr,
559 pieceparams);
560 break;
561 case PF_INDEXED:
562 piecefn_indicated_exit =
563 indexfn(&myblocks,
564 cur_elmp->curcount,
565 cur_elmp->loop_p->loop_params.i_t.blocksize_array,
566 cur_elmp->loop_p->loop_params.i_t.offset_array,
567 el_type,
568 cur_elmp->orig_offset, /* indexfn adds offset value */
569 segp->ptr,
570 pieceparams);
571 break;
572 }
573
574 /* update local values based on piecefn returns (myblocks and
575 * piecefn_indicated_exit)
576 */
577 DLOOP_Assert(piecefn_indicated_exit >= 0);
578 DLOOP_Assert(myblocks >= 0);
579 stream_off += myblocks * stream_el_size;
580
581 /* myblocks of 0 or less than cur_elmp->curblock indicates
582 * that we should stop processing and return.
583 */
584 if (myblocks == 0) {
585 DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
586 return;
587 }
588 else if (myblocks < (DLOOP_Offset)(cur_elmp->curblock)) {
589 cur_elmp->curoffset += myblocks * local_el_size;
590 cur_elmp->curblock -= myblocks;
591
592 DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
593 return;
594 }
595 else /* myblocks >= cur_elmp->curblock */ {
596 int count_index = 0;
597
598 /* this assumes we're either *just* processing the last parts
599 * of the current block, or we're processing as many blocks as
600 * we like starting at the beginning of one.
601 */
602
603 switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
604 case DLOOP_KIND_INDEXED:
605 while (myblocks > 0 && myblocks >= (DLOOP_Offset)(cur_elmp->curblock)) {
606 myblocks -= (DLOOP_Offset)(cur_elmp->curblock);
607 cur_elmp->curcount--;
608 DLOOP_Assert(cur_elmp->curcount >= 0);
609
610 count_index = cur_elmp->orig_count -
611 cur_elmp->curcount;
612 cur_elmp->curblock =
613 DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp,
614 count_index);
615 }
616
617 if (cur_elmp->curcount == 0) {
618 /* don't bother to fill in values; we're popping anyway */
619 DLOOP_Assert(myblocks == 0);
620 DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
621 }
622 else {
623 cur_elmp->orig_block = cur_elmp->curblock;
624 cur_elmp->curoffset = cur_elmp->orig_offset +
625 DLOOP_STACKELM_INDEXED_OFFSET(cur_elmp,
626 count_index);
627
628 cur_elmp->curblock -= myblocks;
629 cur_elmp->curoffset += myblocks * local_el_size;
630 }
631 break;
632 case DLOOP_KIND_VECTOR:
633 /* this math relies on assertions at top of code block */
634 cur_elmp->curcount -= myblocks / (DLOOP_Offset)(cur_elmp->curblock);
635 if (cur_elmp->curcount == 0) {
636 DLOOP_Assert(myblocks % ((DLOOP_Offset)(cur_elmp->curblock)) == 0);
637 DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
638 }
639 else {
640 /* this math relies on assertions at top of code
641 * block
642 */
643 cur_elmp->curblock = cur_elmp->orig_block -
644 (myblocks % (DLOOP_Offset)(cur_elmp->curblock));
645 /* new offset = original offset +
646 * stride * whole blocks +
647 * leftover bytes
648 */
649 cur_elmp->curoffset = cur_elmp->orig_offset +
650 (((DLOOP_Offset)(cur_elmp->orig_count - cur_elmp->curcount)) *
651 cur_elmp->loop_p->loop_params.v_t.stride) +
652 (((DLOOP_Offset)(cur_elmp->orig_block - cur_elmp->curblock)) *
653 local_el_size);
654 }
655 break;
656 case DLOOP_KIND_CONTIG:
657 /* contigs that reach this point have always been
658 * completely processed
659 */
660 DLOOP_Assert(myblocks == (DLOOP_Offset)(cur_elmp->curblock) &&
661 cur_elmp->curcount == 1);
662 DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
663 break;
664 case DLOOP_KIND_BLOCKINDEXED:
665 while (myblocks > 0 && myblocks >= (DLOOP_Offset)(cur_elmp->curblock))
666 {
667 myblocks -= (DLOOP_Offset)(cur_elmp->curblock);
668 cur_elmp->curcount--;
669 DLOOP_Assert(cur_elmp->curcount >= 0);
670
671 count_index = cur_elmp->orig_count -
672 cur_elmp->curcount;
673 cur_elmp->curblock = cur_elmp->orig_block;
674 }
675 if (cur_elmp->curcount == 0) {
676 /* popping */
677 DLOOP_Assert(myblocks == 0);
678 DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
679 }
680 else {
681 /* cur_elmp->orig_block = cur_elmp->curblock; */
682 cur_elmp->curoffset = cur_elmp->orig_offset +
683 DLOOP_STACKELM_BLOCKINDEXED_OFFSET(cur_elmp,
684 count_index);
685 cur_elmp->curblock -= myblocks;
686 cur_elmp->curoffset += myblocks * local_el_size;
687 }
688 break;
689 }
690 }
691
692 if (piecefn_indicated_exit) {
693 /* piece function indicated that we should quit processing */
694 DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
695 return;
696 }
697 } /* end of if leaf */
698 else if (cur_elmp->curblock == 0) {
699 #ifdef DLOOP_DEBUG_MANIPULATE
700 DLOOP_dbg_printf("\thit end of block; elmp=%x [%d]\n",
701 (unsigned) cur_elmp, cur_sp);
702 #endif
703 cur_elmp->curcount--;
704
705 /* new block. for indexed and struct reset orig_block.
706 * reset curblock for all types
707 */
708 switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
709 case DLOOP_KIND_CONTIG:
710 case DLOOP_KIND_VECTOR:
711 case DLOOP_KIND_BLOCKINDEXED:
712 break;
713 case DLOOP_KIND_INDEXED:
714 cur_elmp->orig_block =
715 DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, cur_elmp->curcount ? cur_elmp->orig_count - cur_elmp->curcount : 0);
716 break;
717 case DLOOP_KIND_STRUCT:
718 cur_elmp->orig_block =
719 DLOOP_STACKELM_STRUCT_BLOCKSIZE(cur_elmp, cur_elmp->curcount ? cur_elmp->orig_count - cur_elmp->curcount : 0);
720 break;
721 default:
722 /* --BEGIN ERROR HANDLING-- */
723 DLOOP_Assert(0);
724 break;
725 /* --END ERROR HANDLING-- */
726 }
727 cur_elmp->curblock = cur_elmp->orig_block;
728
729 if (cur_elmp->curcount == 0) {
730 #ifdef DLOOP_DEBUG_MANIPULATE
731 DLOOP_dbg_printf("\talso hit end of count; elmp=%x [%d]\n",
732 (unsigned) cur_elmp, cur_sp);
733 #endif
734 DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
735 }
736 }
737 else /* push the stackelm */ {
738 DLOOP_Dataloop_stackelm *next_elmp;
739 int count_index, block_index;
740
741 count_index = cur_elmp->orig_count - cur_elmp->curcount;
742 block_index = cur_elmp->orig_block - cur_elmp->curblock;
743
744 /* reload the next stackelm if necessary */
745 next_elmp = &(segp->stackelm[cur_sp + 1]);
746 if (cur_elmp->may_require_reloading) {
747 DLOOP_Dataloop *load_dlp = NULL;
748 switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
749 case DLOOP_KIND_CONTIG:
750 case DLOOP_KIND_VECTOR:
751 case DLOOP_KIND_BLOCKINDEXED:
752 case DLOOP_KIND_INDEXED:
753 load_dlp = cur_elmp->loop_p->loop_params.cm_t.dataloop;
754 break;
755 case DLOOP_KIND_STRUCT:
756 load_dlp = DLOOP_STACKELM_STRUCT_DATALOOP(cur_elmp,
757 count_index);
758 break;
759 default:
760 /* --BEGIN ERROR HANDLING-- */
761 DLOOP_Assert(0);
762 break;
763 /* --END ERROR HANDLING-- */
764 }
765
766 #ifdef DLOOP_DEBUG_MANIPULATE
767 DLOOP_dbg_printf("\tloading dlp=%x, elmp=%x [%d]\n",
768 (unsigned) load_dlp,
769 (unsigned) next_elmp,
770 cur_sp+1);
771 #endif
772
773 DLOOP_Stackelm_load(next_elmp,
774 load_dlp,
775 1);
776 }
777
778 #ifdef DLOOP_DEBUG_MANIPULATE
779 DLOOP_dbg_printf("\tpushing type, elmp=%x [%d], count=%d, block=%d\n",
780 (unsigned) cur_elmp, cur_sp, count_index,
781 block_index);
782 #endif
783 /* set orig_offset and all cur values for new stackelm.
784 * this is done in two steps: first set orig_offset based on
785 * current stackelm, then set cur values based on new stackelm.
786 */
787 switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
788 case DLOOP_KIND_CONTIG:
789 next_elmp->orig_offset = cur_elmp->curoffset +
790 (DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent;
791 break;
792 case DLOOP_KIND_VECTOR:
793 /* note: stride is in bytes */
794 next_elmp->orig_offset = cur_elmp->orig_offset +
795 (DLOOP_Offset) count_index * cur_elmp->loop_p->loop_params.v_t.stride +
796 (DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent;
797 break;
798 case DLOOP_KIND_BLOCKINDEXED:
799 next_elmp->orig_offset = cur_elmp->orig_offset +
800 (DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent +
801 DLOOP_STACKELM_BLOCKINDEXED_OFFSET(cur_elmp,
802 count_index);
803 break;
804 case DLOOP_KIND_INDEXED:
805 next_elmp->orig_offset = cur_elmp->orig_offset +
806 (DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent +
807 DLOOP_STACKELM_INDEXED_OFFSET(cur_elmp, count_index);
808 break;
809 case DLOOP_KIND_STRUCT:
810 next_elmp->orig_offset = cur_elmp->orig_offset +
811 (DLOOP_Offset) block_index * DLOOP_STACKELM_STRUCT_EL_EXTENT(cur_elmp, count_index) +
812 DLOOP_STACKELM_STRUCT_OFFSET(cur_elmp, count_index);
813 break;
814 default:
815 /* --BEGIN ERROR HANDLING-- */
816 DLOOP_Assert(0);
817 break;
818 /* --END ERROR HANDLING-- */
819 }
820
821 #ifdef DLOOP_DEBUG_MANIPULATE
822 DLOOP_dbg_printf("\tstep 1: next orig_offset = " DLOOP_OFFSET_FMT_DEC_SPEC " (0x" DLOOP_OFFSET_FMT_HEX_SPEC ")\n",
823 next_elmp->orig_offset,
824 next_elmp->orig_offset);
825 #endif
826
827 switch (next_elmp->loop_p->kind & DLOOP_KIND_MASK) {
828 case DLOOP_KIND_CONTIG:
829 case DLOOP_KIND_VECTOR:
830 next_elmp->curcount = next_elmp->orig_count;
831 next_elmp->curblock = next_elmp->orig_block;
832 next_elmp->curoffset = next_elmp->orig_offset;
833 break;
834 case DLOOP_KIND_BLOCKINDEXED:
835 next_elmp->curcount = next_elmp->orig_count;
836 next_elmp->curblock = next_elmp->orig_block;
837 next_elmp->curoffset = next_elmp->orig_offset +
838 DLOOP_STACKELM_BLOCKINDEXED_OFFSET(next_elmp, 0);
839 break;
840 case DLOOP_KIND_INDEXED:
841 next_elmp->curcount = next_elmp->orig_count;
842 next_elmp->curblock =
843 DLOOP_STACKELM_INDEXED_BLOCKSIZE(next_elmp, 0);
844 next_elmp->curoffset = next_elmp->orig_offset +
845 DLOOP_STACKELM_INDEXED_OFFSET(next_elmp, 0);
846 break;
847 case DLOOP_KIND_STRUCT:
848 next_elmp->curcount = next_elmp->orig_count;
849 next_elmp->curblock =
850 DLOOP_STACKELM_STRUCT_BLOCKSIZE(next_elmp, 0);
851 next_elmp->curoffset = next_elmp->orig_offset +
852 DLOOP_STACKELM_STRUCT_OFFSET(next_elmp, 0);
853 break;
854 default:
855 /* --BEGIN ERROR HANDLING-- */
856 DLOOP_Assert(0);
857 break;
858 /* --END ERROR HANDLING-- */
859 }
860
861 #ifdef DLOOP_DEBUG_MANIPULATE
862 DLOOP_dbg_printf("\tstep 2: next curoffset = " DLOOP_OFFSET_FMT_DEC_SPEC " (0x" DLOOP_OFFSET_FMT_HEX_SPEC ")\n",
863 next_elmp->curoffset,
864 next_elmp->curoffset);
865 #endif
866
867 cur_elmp->curblock--;
868 DLOOP_SEGMENT_PUSH;
869 } /* end of else push the stackelm */
870 } /* end of for (;;) */
871
872 #ifdef DLOOP_DEBUG_MANIPULATE
873 DLOOP_dbg_printf("hit end of datatype\n");
874 #endif
875
876 DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
877 return;
878 }
879
880 /* DLOOP_Stackelm_blocksize - returns block size for stackelm based on current
881 * count in stackelm.
882 *
883 * NOTE: loop_p, orig_count, and curcount members of stackelm MUST be correct
884 * before this is called!
885 *
886 */
DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm * elmp)887 static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp)
888 {
889 struct DLOOP_Dataloop *dlp = elmp->loop_p;
890
891 switch(dlp->kind & DLOOP_KIND_MASK) {
892 case DLOOP_KIND_CONTIG:
893 /* NOTE: we're dropping the count into the
894 * blksize field for contigs, as described
895 * in the init call.
896 */
897 return dlp->loop_params.c_t.count;
898 break;
899 case DLOOP_KIND_VECTOR:
900 return dlp->loop_params.v_t.blocksize;
901 break;
902 case DLOOP_KIND_BLOCKINDEXED:
903 return dlp->loop_params.bi_t.blocksize;
904 break;
905 case DLOOP_KIND_INDEXED:
906 return dlp->loop_params.i_t.blocksize_array[elmp->orig_count - elmp->curcount];
907 break;
908 case DLOOP_KIND_STRUCT:
909 return dlp->loop_params.s_t.blocksize_array[elmp->orig_count - elmp->curcount];
910 break;
911 default:
912 /* --BEGIN ERROR HANDLING-- */
913 DLOOP_Assert(0);
914 break;
915 /* --END ERROR HANDLING-- */
916 }
917 return -1;
918 }
919
920 /* DLOOP_Stackelm_offset - returns starting offset (displacement) for stackelm
921 * based on current count in stackelm.
922 *
923 * NOTE: loop_p, orig_count, and curcount members of stackelm MUST be correct
924 * before this is called!
925 *
926 * also, this really is only good at init time for vectors and contigs
927 * (all the time for indexed) at the moment.
928 *
929 */
DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm * elmp)930 static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp)
931 {
932 struct DLOOP_Dataloop *dlp = elmp->loop_p;
933
934 switch(dlp->kind & DLOOP_KIND_MASK) {
935 case DLOOP_KIND_VECTOR:
936 case DLOOP_KIND_CONTIG:
937 return 0;
938 break;
939 case DLOOP_KIND_BLOCKINDEXED:
940 return dlp->loop_params.bi_t.offset_array[elmp->orig_count - elmp->curcount];
941 break;
942 case DLOOP_KIND_INDEXED:
943 return dlp->loop_params.i_t.offset_array[elmp->orig_count - elmp->curcount];
944 break;
945 case DLOOP_KIND_STRUCT:
946 return dlp->loop_params.s_t.offset_array[elmp->orig_count - elmp->curcount];
947 break;
948 default:
949 /* --BEGIN ERROR HANDLING-- */
950 DLOOP_Assert(0);
951 break;
952 /* --END ERROR HANDLING-- */
953 }
954 return -1;
955 }
956
957 /* DLOOP_Stackelm_load
958 * loop_p, orig_count, orig_block, and curcount are all filled by us now.
959 * the rest are filled in at processing time.
960 */
DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm * elmp,struct DLOOP_Dataloop * dlp,int branch_flag)961 static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp,
962 struct DLOOP_Dataloop *dlp,
963 int branch_flag)
964 {
965 elmp->loop_p = dlp;
966
967 if ((dlp->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG) {
968 elmp->orig_count = 1; /* put in blocksize instead */
969 }
970 else {
971 elmp->orig_count = dlp->loop_params.count;
972 }
973
974 if (branch_flag || (dlp->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT)
975 {
976 elmp->may_require_reloading = 1;
977 }
978 else {
979 elmp->may_require_reloading = 0;
980 }
981
982 /* required by DLOOP_Stackelm_blocksize */
983 elmp->curcount = elmp->orig_count;
984
985 elmp->orig_block = DLOOP_Stackelm_blocksize(elmp);
986 /* TODO: GO AHEAD AND FILL IN CURBLOCK? */
987 }
988
989 /*
990 * Local variables:
991 * c-indent-tabs-mode: nil
992 * End:
993 */
994