1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "mpiimpl.h"
7 #include "dataloop_internal.h"
8 #include "datatype.h"
9 #include "mpir_typerep.h"
10 #include "looputil.h"
11 #include "veccpy.h"
12 
13 #define M2M_TO_USERBUF   0
14 #define M2M_FROM_USERBUF 1
15 
16 /* piece_params
17  *
18  * This structure is used to pass function-specific parameters into our
19  * segment processing function.  This allows us to get additional parameters
20  * to the functions it calls without changing the prototype.
21  */
22 struct piece_params {
23     union {
24         struct {
25             char *pack_buffer;
26         } pack;
27         struct {
28             struct iovec *vectorp;
29             int index;
30             int length;
31         } pack_vector;
32         struct {
33             int64_t *offp;
34             MPI_Aint *sizep;    /* see notes in Segment_flatten header */
35             int index;
36             int length;
37         } flatten;
38         struct {
39             char *last_loc;
40             int count;
41         } contig_blocks;
42         struct {
43             const char *unpack_buffer;
44         } unpack;
45         struct {
46             int stream_off;
47         } print;
48     } u;
49 };
50 
51 /* #define MPICH_DEBUG_SEGMENT_MOVE */
52 /* TODO: Consider integrating this with the general debug support. */
53 /* Note: This does not use the CVAR support for the environment variable
54    because (a) this is a temporary code and (b) it is expert developer
55    only */
56 #ifdef MPICH_DEBUG_SEGMENT_MOVE
57 static int printSegment = -1;
setPrint(void)58 static void setPrint(void)
59 {
60     char *s = getenv("MPICH_DATALOOP_PRINT");
61     if (s && (strcmp(s, "yes") == 0 || strcmp(s, "YES") == 0)) {
62         printSegment = 1;
63     } else {
64         printSegment = 0;
65     }
66 }
67 
68 #define DBG_SEGMENT(_a) do { if (printSegment < 0) setPrint(); \
69         if (printSegment) { _a; } } while (0)
70 #else
71 #define DBG_SEGMENT(_a)
72 #endif
73 
74 /* NOTE: bufp values are unused, ripe for removal */
75 
76 static int contig_m2m(MPI_Aint * blocks_p,
77                       MPI_Datatype el_type, MPI_Aint rel_off, void *bufp, void *v_paramp);
78 static int vector_m2m(MPI_Aint * blocks_p,
79                       MPI_Aint count,
80                       MPI_Aint blksz,
81                       MPI_Aint stride,
82                       MPI_Datatype el_type, MPI_Aint rel_off, void *bufp, void *v_paramp);
83 static int blkidx_m2m(MPI_Aint * blocks_p,
84                       MPI_Aint count,
85                       MPI_Aint blocklen,
86                       MPI_Aint * offsetarray,
87                       MPI_Datatype el_type, MPI_Aint rel_off, void *bufp, void *v_paramp);
88 static int index_m2m(MPI_Aint * blocks_p,
89                      MPI_Aint count,
90                      MPI_Aint * blockarray,
91                      MPI_Aint * offsetarray,
92                      MPI_Datatype el_type, MPI_Aint rel_off, void *bufp, void *v_paramp);
93 
94 /* prototypes of internal functions */
95 static int vector_pack_to_iov(MPI_Aint * blocks_p,
96                               MPI_Aint count,
97                               MPI_Aint blksz,
98                               MPI_Aint stride,
99                               MPI_Datatype el_type, MPI_Aint rel_off, void *bufp, void *v_paramp);
100 
101 static int contig_pack_to_iov(MPI_Aint * blocks_p,
102                               MPI_Datatype el_type, MPI_Aint rel_off, void *bufp, void *v_paramp);
103 
is_float_type(MPI_Datatype el_type)104 static inline int is_float_type(MPI_Datatype el_type)
105 {
106     return ((el_type == MPI_FLOAT) || (el_type == MPI_DOUBLE) ||
107             (el_type == MPI_LONG_DOUBLE) ||
108             (el_type == MPI_DOUBLE_PRECISION) ||
109             (el_type == MPI_COMPLEX) || (el_type == MPI_DOUBLE_COMPLEX));
110 /*             (el_type == MPI_REAL4) || (el_type == MPI_REAL8) || */
111 /*             (el_type == MPI_REAL16)); */
112 }
113 
external32_basic_convert(char * dest_buf,const char * src_buf,int dest_el_size,int src_el_size,MPI_Aint count)114 static int external32_basic_convert(char *dest_buf,
115                                     const char *src_buf,
116                                     int dest_el_size, int src_el_size, MPI_Aint count)
117 {
118     const char *src_ptr = src_buf;
119     char *dest_ptr = dest_buf;
120     const char *src_end = src_buf + ((int) count * src_el_size);
121 
122     MPIR_Assert(dest_buf && src_buf);
123 
124     if (src_el_size == dest_el_size) {
125         if (src_el_size == 2) {
126             while (src_ptr != src_end) {
127                 BASIC_convert16((*(const TWO_BYTE_BASIC_TYPE *) src_ptr),
128                                 (*(TWO_BYTE_BASIC_TYPE *) dest_ptr));
129 
130                 src_ptr += src_el_size;
131                 dest_ptr += dest_el_size;
132             }
133         } else if (src_el_size == 4) {
134             while (src_ptr != src_end) {
135                 BASIC_convert32((*(const FOUR_BYTE_BASIC_TYPE *) src_ptr),
136                                 (*(FOUR_BYTE_BASIC_TYPE *) dest_ptr));
137 
138                 src_ptr += src_el_size;
139                 dest_ptr += dest_el_size;
140             }
141         } else if (src_el_size == 8) {
142             while (src_ptr != src_end) {
143                 BASIC_convert64(src_ptr, dest_ptr);
144 
145                 src_ptr += src_el_size;
146                 dest_ptr += dest_el_size;
147             }
148         }
149     } else {
150         /* TODO */
151         MPL_error_printf
152             ("Conversion of types whose size is not the same as the size in external32 is not supported\n");
153         MPID_Abort(0, MPI_SUCCESS, 1, "Aborting with internal error");
154         /* There is no way to return an error code, so an abort is the
155          * only choice (the return value of this routine is not
156          * an error code) */
157     }
158     return 0;
159 }
160 
external32_float_convert(char * dest_buf,const char * src_buf,int dest_el_size,int src_el_size,int count)161 static int external32_float_convert(char *dest_buf,
162                                     const char *src_buf, int dest_el_size, int src_el_size,
163                                     int count)
164 {
165     const char *src_ptr = src_buf;
166     char *dest_ptr = dest_buf;
167     const char *src_end = src_buf + ((int) count * src_el_size);
168 
169     MPIR_Assert(dest_buf && src_buf);
170 
171     if (src_el_size == dest_el_size) {
172         if (src_el_size == 4) {
173             while (src_ptr != src_end) {
174                 FLOAT_convert((*(const FOUR_BYTE_FLOAT_TYPE *) src_ptr),
175                               (*(FOUR_BYTE_FLOAT_TYPE *) dest_ptr));
176 
177                 src_ptr += src_el_size;
178                 dest_ptr += dest_el_size;
179             }
180         } else if (src_el_size == 8) {
181             while (src_ptr != src_end) {
182                 FLOAT_convert((*(const EIGHT_BYTE_FLOAT_TYPE *) src_ptr),
183                               (*(EIGHT_BYTE_FLOAT_TYPE *) dest_ptr));
184 
185                 src_ptr += src_el_size;
186                 dest_ptr += dest_el_size;
187             }
188         }
189     } else {
190         /* TODO */
191         MPL_error_printf
192             ("Conversion of types whose size is not the same as the size in external32 is not supported\n");
193         MPID_Abort(0, MPI_SUCCESS, 1, "Aborting with internal error");
194         /* There is no way to return an error code, so an abort is the
195          * only choice (the return value of this routine is not
196          * an error code) */
197     }
198     return 0;
199 }
200 
201 /* segment_init
202  *
203  * buf    - datatype buffer location
204  * count  - number of instances of the datatype in the buffer
205  * handle - handle for datatype (could be derived or not)
206  * segp   - pointer to previously allocated segment structure
207  *
208  * Notes:
209  * - Assumes that the segment has been allocated.
210  *
211  */
segment_init(const void * buf,MPI_Aint count,MPI_Datatype handle,struct MPIR_Segment * segp)212 static inline void segment_init(const void *buf,
213                                 MPI_Aint count, MPI_Datatype handle, struct MPIR_Segment *segp)
214 {
215     MPI_Aint elmsize = 0;
216     int i, depth = 0;
217     int branch_detected = 0;
218 
219     struct MPII_Dataloop_stackelm *elmp;
220     MPII_Dataloop *dlp = 0, *sblp = &segp->builtin_loop;
221 
222 #ifdef MPII_DATALOOP_DEBUG_MANIPULATE
223     MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE, VERBOSE,
224                     (MPL_DBG_FDEST, "segment_init: count = %d, buf = %x\n", count, buf));
225 #endif
226 
227     if (!MPII_DATALOOP_HANDLE_HASLOOP(handle)) {
228         /* simplest case; datatype has no loop (basic) */
229 
230         MPIR_Datatype_get_size_macro(handle, elmsize);
231 
232         sblp->kind = MPII_DATALOOP_KIND_CONTIG | MPII_DATALOOP_FINAL_MASK;
233         sblp->loop_params.c_t.count = count;
234         sblp->loop_params.c_t.dataloop = 0;
235         sblp->el_size = elmsize;
236         MPIR_Datatype_get_basic_type(handle, sblp->el_type);
237         MPIR_Datatype_get_extent_macro(handle, sblp->el_extent);
238 
239         dlp = sblp;
240         depth = 1;
241     } else if (count == 0) {
242         /* only use the builtin */
243         sblp->kind = MPII_DATALOOP_KIND_CONTIG | MPII_DATALOOP_FINAL_MASK;
244         sblp->loop_params.c_t.count = 0;
245         sblp->loop_params.c_t.dataloop = 0;
246         sblp->el_size = 0;
247         sblp->el_extent = 0;
248 
249         dlp = sblp;
250         depth = 1;
251     } else if (count == 1) {
252         /* don't use the builtin */
253         MPIR_DATALOOP_GET_LOOPPTR(handle, dlp);
254     } else {
255         /* default: need to use builtin to handle contig; must check
256          * loop depth first
257          */
258         MPII_Dataloop *oldloop; /* loop from original type, before new count */
259         MPI_Aint type_size, type_extent;
260         MPI_Datatype el_type;
261 
262         MPIR_DATALOOP_GET_LOOPPTR(handle, oldloop);
263         MPIR_Assert(oldloop != NULL);
264         MPIR_Datatype_get_size_macro(handle, type_size);
265         MPIR_Datatype_get_extent_macro(handle, type_extent);
266         MPIR_Datatype_get_basic_type(handle, el_type);
267 
268         if (depth == 1 && ((oldloop->kind & MPII_DATALOOP_KIND_MASK) == MPII_DATALOOP_KIND_CONTIG)) {
269             if (type_size == type_extent) {
270                 /* use a contig */
271                 sblp->kind = MPII_DATALOOP_KIND_CONTIG | MPII_DATALOOP_FINAL_MASK;
272                 sblp->loop_params.c_t.count = count * oldloop->loop_params.c_t.count;
273                 sblp->loop_params.c_t.dataloop = NULL;
274                 sblp->el_size = oldloop->el_size;
275                 sblp->el_extent = oldloop->el_extent;
276                 sblp->el_type = oldloop->el_type;
277             } else {
278                 /* use a vector, with extent of original type becoming the stride */
279                 sblp->kind = MPII_DATALOOP_KIND_VECTOR | MPII_DATALOOP_FINAL_MASK;
280                 sblp->loop_params.v_t.count = count;
281                 sblp->loop_params.v_t.blocksize = oldloop->loop_params.c_t.count;
282                 sblp->loop_params.v_t.stride = type_extent;
283                 sblp->loop_params.v_t.dataloop = NULL;
284                 sblp->el_size = oldloop->el_size;
285                 sblp->el_extent = oldloop->el_extent;
286                 sblp->el_type = oldloop->el_type;
287             }
288         } else {
289             /* general case */
290             sblp->kind = MPII_DATALOOP_KIND_CONTIG;
291             sblp->loop_params.c_t.count = count;
292             sblp->loop_params.c_t.dataloop = oldloop;
293             sblp->el_size = type_size;
294             sblp->el_extent = type_extent;
295             sblp->el_type = el_type;
296 
297             depth++;    /* we're adding to the depth with the builtin */
298             MPIR_Assert(depth < (MPII_DATALOOP_MAX_DATATYPE_DEPTH));
299         }
300 
301         dlp = sblp;
302     }
303 
304     /* assert instead of return b/c dtype/dloop errorhandling code is inconsistent */
305     MPIR_Assert(depth < (MPII_DATALOOP_MAX_DATATYPE_DEPTH));
306 
307     /* initialize the rest of the segment values */
308     segp->handle = handle;
309     segp->ptr = (void *) buf;
310     segp->stream_off = 0;
311     segp->cur_sp = 0;
312     segp->valid_sp = 0;
313 
314     /* initialize the first stackelm in its entirety */
315     elmp = &(segp->stackelm[0]);
316     MPII_Dataloop_stackelm_load(elmp, dlp, 0);
317     branch_detected = elmp->may_require_reloading;
318 
319     /* Fill in parameters not set by MPII_Dataloop_stackelm_load */
320     elmp->orig_offset = 0;
321     elmp->curblock = elmp->orig_block;
322     /* MPII_Dataloop_stackelm_offset assumes correct orig_count, curcount, loop_p */
323     elmp->curoffset = /* elmp->orig_offset + */ MPII_Dataloop_stackelm_offset(elmp);
324 
325     i = 1;
326     while (!(dlp->kind & MPII_DATALOOP_FINAL_MASK)) {
327         /* get pointer to next dataloop */
328         switch (dlp->kind & MPII_DATALOOP_KIND_MASK) {
329             case MPII_DATALOOP_KIND_CONTIG:
330             case MPII_DATALOOP_KIND_VECTOR:
331             case MPII_DATALOOP_KIND_BLOCKINDEXED:
332             case MPII_DATALOOP_KIND_INDEXED:
333                 dlp = dlp->loop_params.cm_t.dataloop;
334                 break;
335             case MPII_DATALOOP_KIND_STRUCT:
336                 dlp = dlp->loop_params.s_t.dataloop_array[0];
337                 break;
338             default:
339                 /* --BEGIN ERROR HANDLING-- */
340                 MPIR_Assert(0);
341                 break;
342                 /* --END ERROR HANDLING-- */
343         }
344 
345         MPIR_Assert(i < MPII_DATALOOP_MAX_DATATYPE_DEPTH);
346 
347         /* loop_p, orig_count, orig_block, and curcount are all filled by us now.
348          * the rest are filled in at processing time.
349          */
350         elmp = &(segp->stackelm[i]);
351 
352         MPII_Dataloop_stackelm_load(elmp, dlp, branch_detected);
353         branch_detected = elmp->may_require_reloading;
354         i++;
355 
356     }
357 
358     segp->valid_sp = depth - 1;
359 }
360 
MPIR_Segment_alloc(const void * buf,MPI_Aint count,MPI_Datatype handle)361 struct MPIR_Segment *MPIR_Segment_alloc(const void *buf, MPI_Aint count, MPI_Datatype handle)
362 {
363     struct MPIR_Segment *segp;
364 
365     segp = (struct MPIR_Segment *) MPL_malloc(sizeof(struct MPIR_Segment), MPL_MEM_DATATYPE);
366     if (segp)
367         segment_init(buf, count, handle, segp);
368 
369     return segp;
370 }
371 
372 /* Segment_free
373  *
374  * Input Parameters:
375  * segp - pointer to segment
376  */
MPIR_Segment_free(struct MPIR_Segment * segp)377 void MPIR_Segment_free(struct MPIR_Segment *segp)
378 {
379     MPL_free(segp);
380     return;
381 }
382 
MPIR_Segment_pack(MPIR_Segment * segp,MPI_Aint first,MPI_Aint * lastp,void * streambuf)383 void MPIR_Segment_pack(MPIR_Segment * segp, MPI_Aint first, MPI_Aint * lastp, void *streambuf)
384 {
385     struct MPII_Dataloop_m2m_params params;     /* defined in dataloop_parts.h */
386 
387     DBG_SEGMENT(printf("Segment_pack...\n"));
388     /* experimenting with discarding buf value in the segment, keeping in
389      * per-use structure instead. would require moving the parameters around a
390      * bit.
391      */
392     params.userbuf = segp->ptr;
393     params.streambuf = streambuf;
394     params.direction = M2M_FROM_USERBUF;
395 
396     MPII_Segment_manipulate(segp, first, lastp, contig_m2m, vector_m2m, blkidx_m2m, index_m2m, NULL,    /* size fn */
397                             &params);
398     return;
399 }
400 
MPIR_Segment_unpack(MPIR_Segment * segp,MPI_Aint first,MPI_Aint * lastp,const void * streambuf)401 void MPIR_Segment_unpack(MPIR_Segment * segp, MPI_Aint first, MPI_Aint * lastp,
402                          const void *streambuf)
403 {
404     struct MPII_Dataloop_m2m_params params;
405 
406     DBG_SEGMENT(printf("Segment_unpack...\n"));
407     /* experimenting with discarding buf value in the segment, keeping in
408      * per-use structure instead. would require moving the parameters around a
409      * bit.
410      */
411     params.userbuf = segp->ptr;
412     params.streambuf = (void *) streambuf;
413     params.direction = M2M_TO_USERBUF;
414 
415     MPII_Segment_manipulate(segp, first, lastp, contig_m2m, vector_m2m, blkidx_m2m, index_m2m, NULL,    /* size fn */
416                             &params);
417     return;
418 }
419 
420 /* PIECE FUNCTIONS BELOW */
421 
contig_m2m(MPI_Aint * blocks_p,MPI_Datatype el_type,MPI_Aint rel_off,void * bufp ATTRIBUTE ((unused)),void * v_paramp)422 static int contig_m2m(MPI_Aint * blocks_p,
423                       MPI_Datatype el_type,
424                       MPI_Aint rel_off, void *bufp ATTRIBUTE((unused)), void *v_paramp)
425 {
426     MPI_Aint el_size;           /* MPI_Aint? */
427     MPI_Aint size;
428     struct MPII_Dataloop_m2m_params *paramp = v_paramp;
429 
430     MPIR_Datatype_get_size_macro(el_type, el_size);
431     size = *blocks_p * el_size;
432 
433     DBG_SEGMENT(printf("element type = %lx\n", (long) el_type));
434     DBG_SEGMENT(printf("contig m2m: elsize = %d, size = %d\n", (int) el_size, (int) size));
435 #ifdef MPID_SU_VERBOSE
436     dbg_printf("\t[contig unpack: do=" MPI_AINT_FMT_DEC_SPEC ", dp=%x, bp=%x, sz="
437                MPI_AINT_FMT_DEC_SPEC ", blksz=" MPI_AINT_FMT_DEC_SPEC "]\n", rel_off,
438                (unsigned) bufp, (unsigned) paramp->u.unpack.unpack_buffer, el_size, *blocks_p);
439 #endif
440 
441     if (paramp->direction == M2M_TO_USERBUF) {
442         MPIR_Memcpy((char *) paramp->userbuf + rel_off, paramp->streambuf, size);
443     } else {
444         MPIR_Memcpy(paramp->streambuf, (char *) paramp->userbuf + rel_off, size);
445     }
446     paramp->streambuf += size;
447     return 0;
448 }
449 
450 /* Segment_vector_m2m
451  *
452  * Note: this combines both packing and unpacking functionality.
453  *
454  * Note: this is only called when the starting position is at the beginning
455  * of a whole block in a vector type.
456  */
vector_m2m(MPI_Aint * blocks_p,MPI_Aint count ATTRIBUTE ((unused)),MPI_Aint blksz,MPI_Aint stride,MPI_Datatype el_type,MPI_Aint rel_off,void * bufp ATTRIBUTE ((unused)),void * v_paramp)457 static int vector_m2m(MPI_Aint * blocks_p, MPI_Aint count ATTRIBUTE((unused)), MPI_Aint blksz, MPI_Aint stride, MPI_Datatype el_type, MPI_Aint rel_off, /* offset into buffer */
458                       void *bufp ATTRIBUTE((unused)), void *v_paramp)
459 {
460     MPI_Aint i;
461     MPI_Aint el_size, whole_count, blocks_left;
462     struct MPII_Dataloop_m2m_params *paramp = v_paramp;
463     char *cbufp;
464 
465     cbufp = (char *) paramp->userbuf + rel_off;
466     MPIR_Datatype_get_size_macro(el_type, el_size);
467     DBG_SEGMENT(printf
468                 ("vector m2m: elsize = %d, count = %d, stride = %d, blocksize = %d\n",
469                  (int) el_size, (int) count, (int) stride, (int) blksz));
470 
471     whole_count = (MPI_Aint) ((blksz > 0) ? (*blocks_p / (MPI_Aint) blksz) : 0);
472     blocks_left = (MPI_Aint) ((blksz > 0) ? (*blocks_p % (MPI_Aint) blksz) : 0);
473 
474     if (paramp->direction == M2M_TO_USERBUF) {
475         if (el_size == 8 MPIR_ALIGN8_TEST(paramp->streambuf, cbufp)) {
476             MPII_COPY_TO_VEC(paramp->streambuf, cbufp, stride, int64_t, blksz, whole_count);
477             MPII_COPY_TO_VEC(paramp->streambuf, cbufp, 0, int64_t, blocks_left, 1);
478         } else if (el_size == 4 MPIR_ALIGN4_TEST(paramp->streambuf, cbufp)) {
479             MPII_COPY_TO_VEC((paramp->streambuf), cbufp, stride, int32_t, blksz, whole_count);
480             MPII_COPY_TO_VEC(paramp->streambuf, cbufp, 0, int32_t, blocks_left, 1);
481         } else if (el_size == 2) {
482             MPII_COPY_TO_VEC(paramp->streambuf, cbufp, stride, int16_t, blksz, whole_count);
483             MPII_COPY_TO_VEC(paramp->streambuf, cbufp, 0, int16_t, blocks_left, 1);
484         } else {
485             for (i = 0; i < whole_count; i++) {
486                 MPIR_Memcpy(cbufp, paramp->streambuf, ((MPI_Aint) blksz) * el_size);
487                 DBG_SEGMENT(printf("vec: memcpy %p %p %d\n", cbufp,
488                                    paramp->streambuf, (int) (blksz * el_size)));
489                 paramp->streambuf += ((MPI_Aint) blksz) * el_size;
490 
491                 cbufp += stride;
492             }
493             if (blocks_left) {
494                 MPIR_Memcpy(cbufp, paramp->streambuf, ((MPI_Aint) blocks_left) * el_size);
495                 DBG_SEGMENT(printf("vec(left): memcpy %p %p %d\n", cbufp,
496                                    paramp->streambuf, (int) (blocks_left * el_size)));
497                 paramp->streambuf += ((MPI_Aint) blocks_left) * el_size;
498             }
499         }
500     } else {    /* M2M_FROM_USERBUF */
501 
502         if (el_size == 8 MPIR_ALIGN8_TEST(cbufp, paramp->streambuf)) {
503             MPII_COPY_FROM_VEC(cbufp, paramp->streambuf, stride, int64_t, blksz, whole_count);
504             MPII_COPY_FROM_VEC(cbufp, paramp->streambuf, 0, int64_t, blocks_left, 1);
505         } else if (el_size == 4 MPIR_ALIGN4_TEST(cbufp, paramp->streambuf)) {
506             MPII_COPY_FROM_VEC(cbufp, paramp->streambuf, stride, int32_t, blksz, whole_count);
507             MPII_COPY_FROM_VEC(cbufp, paramp->streambuf, 0, int32_t, blocks_left, 1);
508         } else if (el_size == 2) {
509             MPII_COPY_FROM_VEC(cbufp, paramp->streambuf, stride, int16_t, blksz, whole_count);
510             MPII_COPY_FROM_VEC(cbufp, paramp->streambuf, 0, int16_t, blocks_left, 1);
511         } else {
512             for (i = 0; i < whole_count; i++) {
513                 MPIR_Memcpy(paramp->streambuf, cbufp, (MPI_Aint) blksz * el_size);
514                 DBG_SEGMENT(printf("vec: memcpy %p %p %d\n",
515                                    paramp->streambuf, cbufp, (int) (blksz * el_size)));
516                 paramp->streambuf += (MPI_Aint) blksz *el_size;
517                 cbufp += stride;
518             }
519             if (blocks_left) {
520                 MPIR_Memcpy(paramp->streambuf, cbufp, (MPI_Aint) blocks_left * el_size);
521                 DBG_SEGMENT(printf("vec(left): memcpy %p %p %d\n",
522                                    paramp->streambuf, cbufp, (int) (blocks_left * el_size)));
523                 paramp->streambuf += (MPI_Aint) blocks_left *el_size;
524             }
525         }
526     }
527 
528     return 0;
529 }
530 
blkidx_m2m(MPI_Aint * blocks_p,MPI_Aint count,MPI_Aint blocklen,MPI_Aint * offsetarray,MPI_Datatype el_type,MPI_Aint rel_off,void * bufp ATTRIBUTE ((unused)),void * v_paramp)531 static int blkidx_m2m(MPI_Aint * blocks_p,
532                       MPI_Aint count,
533                       MPI_Aint blocklen,
534                       MPI_Aint * offsetarray,
535                       MPI_Datatype el_type,
536                       MPI_Aint rel_off, void *bufp ATTRIBUTE((unused)), void *v_paramp)
537 {
538     MPI_Aint curblock = 0;
539     MPI_Aint el_size;
540     MPI_Aint blocks_left = *blocks_p;
541     char *cbufp;
542     struct MPII_Dataloop_m2m_params *paramp = v_paramp;
543 
544     MPIR_Datatype_get_size_macro(el_type, el_size);
545     DBG_SEGMENT(printf("blkidx m2m: elsize = %ld, count = %ld, blocklen = %ld,"
546                        " blocks_left = %ld\n", el_size, count, blocklen, blocks_left));
547 
548     while (blocks_left) {
549         char *src, *dest;
550 
551         MPIR_Assert(curblock < count);
552 
553         cbufp = (char *) paramp->userbuf + rel_off + offsetarray[curblock];
554 
555         /* there was some casting going on here at one time but now all types
556          * are promoted ot big values */
557         if (blocklen > blocks_left)
558             blocklen = blocks_left;
559 
560         if (paramp->direction == M2M_TO_USERBUF) {
561             src = paramp->streambuf;
562             dest = cbufp;
563         } else {
564             src = cbufp;
565             dest = paramp->streambuf;
566         }
567 
568         /* note: macro modifies dest buffer ptr, so we must reset */
569         if (el_size == 8 MPIR_ALIGN8_TEST(src, dest)) {
570             MPII_COPY_FROM_VEC(src, dest, 0, int64_t, blocklen, 1);
571         } else if (el_size == 4 MPIR_ALIGN4_TEST(src, dest)) {
572             MPII_COPY_FROM_VEC(src, dest, 0, int32_t, blocklen, 1);
573         } else if (el_size == 2) {
574             MPII_COPY_FROM_VEC(src, dest, 0, int16_t, blocklen, 1);
575         } else {
576             MPIR_Memcpy(dest, src, (MPI_Aint) blocklen * el_size);
577             DBG_SEGMENT(printf
578                         ("blkidx m3m:memcpy(%p,%p,%d)\n", dest, src, (int) (blocklen * el_size)));
579         }
580 
581         paramp->streambuf += (MPI_Aint) blocklen *el_size;
582         blocks_left -= blocklen;
583         curblock++;
584     }
585 
586     return 0;
587 }
588 
index_m2m(MPI_Aint * blocks_p,MPI_Aint count,MPI_Aint * blockarray,MPI_Aint * offsetarray,MPI_Datatype el_type,MPI_Aint rel_off,void * bufp ATTRIBUTE ((unused)),void * v_paramp)589 static int index_m2m(MPI_Aint * blocks_p,
590                      MPI_Aint count,
591                      MPI_Aint * blockarray,
592                      MPI_Aint * offsetarray,
593                      MPI_Datatype el_type,
594                      MPI_Aint rel_off, void *bufp ATTRIBUTE((unused)), void *v_paramp)
595 {
596     int curblock = 0;
597     MPI_Aint el_size;
598     MPI_Aint cur_block_sz, blocks_left = *blocks_p;
599     char *cbufp;
600     struct MPII_Dataloop_m2m_params *paramp = v_paramp;
601 
602     MPIR_Datatype_get_size_macro(el_type, el_size);
603     DBG_SEGMENT(printf("index m2m: elsize = %d, count = %d\n", (int) el_size, (int) count));
604 
605     while (blocks_left) {
606         char *src, *dest;
607 
608         MPIR_Assert(curblock < count);
609         cur_block_sz = blockarray[curblock];
610 
611         cbufp = (char *) paramp->userbuf + rel_off + offsetarray[curblock];
612 
613         if (cur_block_sz > blocks_left)
614             cur_block_sz = blocks_left;
615 
616         if (paramp->direction == M2M_TO_USERBUF) {
617             src = paramp->streambuf;
618             dest = cbufp;
619         } else {
620             src = cbufp;
621             dest = paramp->streambuf;
622         }
623 
624         /* note: macro modifies dest buffer ptr, so we must reset */
625         if (el_size == 8 MPIR_ALIGN8_TEST(src, dest)) {
626             MPII_COPY_FROM_VEC(src, dest, 0, int64_t, cur_block_sz, 1);
627         } else if (el_size == 4 MPIR_ALIGN4_TEST(src, dest)) {
628             MPII_COPY_FROM_VEC(src, dest, 0, int32_t, cur_block_sz, 1);
629         } else if (el_size == 2) {
630             MPII_COPY_FROM_VEC(src, dest, 0, int16_t, cur_block_sz, 1);
631         } else {
632             MPIR_Memcpy(dest, src, cur_block_sz * el_size);
633         }
634 
635         paramp->streambuf += cur_block_sz * el_size;
636         blocks_left -= cur_block_sz;
637         curblock++;
638     }
639 
640     return 0;
641 }
642 
contig_pack_external32_to_buf(MPI_Aint * blocks_p,MPI_Datatype el_type,MPI_Aint rel_off,void * bufp,void * v_paramp)643 static int contig_pack_external32_to_buf(MPI_Aint * blocks_p,
644                                          MPI_Datatype el_type,
645                                          MPI_Aint rel_off, void *bufp, void *v_paramp)
646 {
647     int src_el_size, dest_el_size;
648     struct piece_params *paramp = v_paramp;
649     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONTIG_PACK_EXTERNAL32_TO_BUF);
650 
651     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONTIG_PACK_EXTERNAL32_TO_BUF);
652 
653     src_el_size = MPIR_Datatype_get_basic_size(el_type);
654     dest_el_size = MPII_Dataloop_get_basic_size_external32(el_type);
655     MPIR_Assert(dest_el_size);
656 
657     /*
658      * h  = handle value
659      * do = datatype buffer offset
660      * dp = datatype buffer pointer
661      * bp = pack buffer pointer (current location, incremented as we go)
662      * sz = size of datatype (guess we could get this from handle value if
663      *      we wanted...)
664      */
665 #ifdef MPID_SP_VERBOSE
666     dbg_printf("\t[contig pack [external32]: do=%d, dp=%x, bp=%x, "
667                "src_el_sz=%d, dest_el_sz=%d, blksz=%d]\n",
668                rel_off,
669                (unsigned) bufp,
670                (unsigned) paramp->u.pack.pack_buffer, src_el_size, dest_el_size, (int) *blocks_p);
671 #endif
672 
673     /* TODO: DEAL WITH CASE WHERE ALL DATA DOESN'T FIT! */
674     if ((src_el_size == dest_el_size) && (src_el_size == 1)) {
675         MPIR_Memcpy(paramp->u.pack.pack_buffer, ((char *) bufp) + rel_off, *blocks_p);
676     } else if (is_float_type(el_type)) {
677         external32_float_convert(paramp->u.pack.pack_buffer,
678                                  ((char *) bufp) + rel_off, dest_el_size, src_el_size, *blocks_p);
679     } else {
680         external32_basic_convert(paramp->u.pack.pack_buffer,
681                                  ((char *) bufp) + rel_off, dest_el_size, src_el_size, *blocks_p);
682     }
683     paramp->u.pack.pack_buffer += (dest_el_size * (*blocks_p));
684 
685     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONTIG_PACK_EXTERNAL32_TO_BUF);
686     return 0;
687 }
688 
contig_unpack_external32_to_buf(MPI_Aint * blocks_p,MPI_Datatype el_type,MPI_Aint rel_off,void * bufp,void * v_paramp)689 static int contig_unpack_external32_to_buf(MPI_Aint * blocks_p,
690                                            MPI_Datatype el_type,
691                                            MPI_Aint rel_off, void *bufp, void *v_paramp)
692 {
693     int src_el_size, dest_el_size;
694     struct piece_params *paramp = v_paramp;
695     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONTIG_UNPACK_EXTERNAL32_TO_BUF);
696 
697     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONTIG_UNPACK_EXTERNAL32_TO_BUF);
698 
699     src_el_size = MPIR_Datatype_get_basic_size(el_type);
700     dest_el_size = MPII_Dataloop_get_basic_size_external32(el_type);
701     MPIR_Assert(dest_el_size);
702 
703     /*
704      * h  = handle value
705      * do = datatype buffer offset
706      * dp = datatype buffer pointer
707      * up = unpack buffer pointer (current location, incremented as we go)
708      * sz = size of datatype (guess we could get this from handle value if
709      *      we wanted...)
710      */
711 #ifdef MPID_SP_VERBOSE
712     dbg_printf("\t[contig unpack [external32]: do=%d, dp=%x, up=%x, "
713                "src_el_sz=%d, dest_el_sz=%d, blksz=%d]\n",
714                rel_off,
715                (unsigned) bufp,
716                (unsigned) paramp->u.unpack.unpack_buffer,
717                src_el_size, dest_el_size, (int) *blocks_p);
718 #endif
719 
720     /* TODO: DEAL WITH CASE WHERE ALL DATA DOESN'T FIT! */
721     if ((src_el_size == dest_el_size) && (src_el_size == 1)) {
722         MPIR_Memcpy(((char *) bufp) + rel_off, paramp->u.unpack.unpack_buffer, *blocks_p);
723     } else if (is_float_type(el_type)) {
724         external32_float_convert(((char *) bufp) + rel_off,
725                                  paramp->u.unpack.unpack_buffer,
726                                  dest_el_size, src_el_size, *blocks_p);
727     } else {
728         external32_basic_convert(((char *) bufp) + rel_off,
729                                  paramp->u.unpack.unpack_buffer,
730                                  dest_el_size, src_el_size, *blocks_p);
731     }
732     paramp->u.unpack.unpack_buffer += (dest_el_size * (*blocks_p));
733 
734     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONTIG_UNPACK_EXTERNAL32_TO_BUF);
735     return 0;
736 }
737 
MPIR_Segment_pack_external32(struct MPIR_Segment * segp,MPI_Aint first,MPI_Aint * lastp,void * pack_buffer)738 void MPIR_Segment_pack_external32(struct MPIR_Segment *segp,
739                                   MPI_Aint first, MPI_Aint * lastp, void *pack_buffer)
740 {
741     struct piece_params pack_params;
742     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIR_SEGMENT_PACK_EXTERNAL32);
743 
744     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIR_SEGMENT_PACK_EXTERNAL32);
745 
746     pack_params.u.pack.pack_buffer = (void *) pack_buffer;
747     MPII_Segment_manipulate(segp, first, lastp, contig_pack_external32_to_buf, NULL,    /* MPIR_Segment_vector_pack_external32_to_buf, */
748                             NULL,       /* blkidx */
749                             NULL,       /* MPIR_Segment_index_pack_external32_to_buf, */
750                             MPII_Dataloop_get_basic_size_external32, &pack_params);
751 
752     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIR_SEGMENT_PACK_EXTERNAL32);
753     return;
754 }
755 
MPIR_Segment_unpack_external32(struct MPIR_Segment * segp,MPI_Aint first,MPI_Aint * lastp,const void * unpack_buffer)756 void MPIR_Segment_unpack_external32(struct MPIR_Segment *segp,
757                                     MPI_Aint first, MPI_Aint * lastp, const void *unpack_buffer)
758 {
759     struct piece_params pack_params;
760     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIR_SEGMENT_UNPACK_EXTERNAL32);
761 
762     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIR_SEGMENT_UNPACK_EXTERNAL32);
763 
764     pack_params.u.unpack.unpack_buffer = unpack_buffer;
765     MPII_Segment_manipulate(segp, first, lastp, contig_unpack_external32_to_buf, NULL,  /* MPIR_Segment_vector_unpack_external32_to_buf, */
766                             NULL,       /* blkidx */
767                             NULL,       /* MPIR_Segment_index_unpack_external32_to_buf, */
768                             MPII_Dataloop_get_basic_size_external32, &pack_params);
769 
770     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIR_SEGMENT_UNPACK_EXTERNAL32);
771     return;
772 }
773 
MPIR_Type_access_contents(MPI_Datatype type,int ** ints_p,MPI_Aint ** aints_p,MPI_Datatype ** types_p)774 void MPIR_Type_access_contents(MPI_Datatype type,
775                                int **ints_p, MPI_Aint ** aints_p, MPI_Datatype ** types_p)
776 {
777     int nr_ints, nr_aints, nr_types, combiner;
778     int types_sz, struct_sz, ints_sz, epsilon;
779     MPIR_Datatype *dtp;
780     MPIR_Datatype_contents *cp;
781 
782     MPIR_Type_get_envelope(type, &nr_ints, &nr_aints, &nr_types, &combiner);
783 
784     /* hardcoded handling of MPICH contents format... */
785     MPIR_Datatype_get_ptr(type, dtp);
786     MPIR_Assert(dtp != NULL);
787 
788     cp = dtp->contents;
789     MPIR_Assert(cp != NULL);
790 
791     struct_sz = sizeof(MPIR_Datatype_contents);
792     types_sz = nr_types * sizeof(MPI_Datatype);
793     ints_sz = nr_ints * sizeof(int);
794 
795     if ((epsilon = struct_sz % MAX_ALIGNMENT)) {
796         struct_sz += MAX_ALIGNMENT - epsilon;
797     }
798     if ((epsilon = types_sz % MAX_ALIGNMENT)) {
799         types_sz += MAX_ALIGNMENT - epsilon;
800     }
801     if ((epsilon = ints_sz % MAX_ALIGNMENT)) {
802         ints_sz += MAX_ALIGNMENT - epsilon;
803     }
804     *types_p = (MPI_Datatype *) (((char *) cp) + struct_sz);
805     *ints_p = (int *) (((char *) (*types_p)) + types_sz);
806     *aints_p = (MPI_Aint *) (((char *) (*ints_p)) + ints_sz);
807     /* end of hardcoded handling of MPICH contents format */
808 
809     return;
810 }
811 
812 /* FIXME: Is this routine complete?  Why is it needed? If it is needed, it
813    must have a comment that describes why it is needed and the arguments
814    must have ATTRIBUTE((unused)) */
MPIR_Type_release_contents(MPI_Datatype type,int ** ints_p,MPI_Aint ** aints_p,MPI_Datatype ** types_p)815 void MPIR_Type_release_contents(MPI_Datatype type,
816                                 int **ints_p, MPI_Aint ** aints_p, MPI_Datatype ** types_p)
817 {
818     return;
819 }
820 
821 /* MPIR_Segment_to_iov
822 *
823 * Parameters:
824 * segp    - pointer to segment structure
825 * first   - first byte in segment to pack
826 * lastp   - in/out parameter describing last byte to pack (and afterwards
827 *           the last byte _actually_ packed)
828 *           NOTE: actually returns index of byte _after_ last one packed
829 * vectorp - pointer to (off, len) pairs to fill in
830 * lengthp - in/out parameter describing length of array (and afterwards
831 *           the amount of the array that has actual data)
832 */
MPIR_Segment_to_iov(struct MPIR_Segment * segp,MPI_Aint first,MPI_Aint * lastp,struct iovec * vectorp,int * lengthp)833 void MPIR_Segment_to_iov(struct MPIR_Segment *segp,
834                          MPI_Aint first, MPI_Aint * lastp, struct iovec *vectorp, int *lengthp)
835 {
836     struct piece_params packvec_params;
837     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIR_SEGMENT_TO_IOV);
838 
839     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIR_SEGMENT_TO_IOV);
840 
841     packvec_params.u.pack_vector.vectorp = vectorp;
842     packvec_params.u.pack_vector.index = 0;
843     packvec_params.u.pack_vector.length = *lengthp;
844 
845     MPIR_Assert(*lengthp > 0);
846 
847     MPII_Segment_manipulate(segp, first, lastp, contig_pack_to_iov, vector_pack_to_iov, NULL,   /* blkidx fn */
848                             NULL,       /* index fn */
849                             NULL, &packvec_params);
850 
851     /* last value already handled by MPII_Segment_manipulate */
852     *lengthp = packvec_params.u.pack_vector.index;
853     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIR_SEGMENT_TO_IOV);
854     return;
855 }
856 
857 
858 /*
859 * EVERYTHING BELOW HERE IS USED ONLY WITHIN THIS FILE
860 */
861 
862 /********** FUNCTIONS FOR CREATING AN IOV DESCRIBING BUFFER **********/
863 
contig_pack_to_iov(MPI_Aint * blocks_p,MPI_Datatype el_type,MPI_Aint rel_off,void * bufp,void * v_paramp)864 static int contig_pack_to_iov(MPI_Aint * blocks_p,
865                               MPI_Datatype el_type, MPI_Aint rel_off, void *bufp, void *v_paramp)
866 {
867     int el_size, last_idx;
868     MPI_Aint size;
869     intptr_t last_end = 0;
870     struct piece_params *paramp = v_paramp;
871     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_CONTIG_PACK_TO_IOV);
872 
873     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_CONTIG_PACK_TO_IOV);
874 
875     el_size = MPIR_Datatype_get_basic_size(el_type);
876     size = *blocks_p * (MPI_Aint) el_size;
877 
878     MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE, VERBOSE, (MPL_DBG_FDEST,
879                                                  "    contig to vec: do=" MPI_AINT_FMT_DEC_SPEC
880                                                  ", dp=%p, ind=%d, sz=%d, blksz="
881                                                  MPI_AINT_FMT_DEC_SPEC, (MPI_Aint) rel_off, bufp,
882                                                  paramp->u.pack_vector.index, el_size,
883                                                  (MPI_Aint) * blocks_p));
884 
885     last_idx = paramp->u.pack_vector.index - 1;
886     if (last_idx >= 0) {
887         last_end = ((intptr_t) paramp->u.pack_vector.vectorp[last_idx].iov_base) +
888             paramp->u.pack_vector.vectorp[last_idx].iov_len;
889     }
890 
891     if ((last_idx == paramp->u.pack_vector.length - 1) && (last_end != ((intptr_t) bufp + rel_off))) {
892         /* we have used up all our entries, and this region doesn't fit on
893          * the end of the last one.  setting blocks to 0 tells manipulation
894          * function that we are done (and that we didn't process any blocks).
895          */
896         *blocks_p = 0;
897         MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONTIG_PACK_TO_IOV);
898         return 1;
899     } else if (last_idx >= 0 && (last_end == ((intptr_t) bufp + rel_off))) {
900         /* add this size to the last vector rather than using up another one */
901         paramp->u.pack_vector.vectorp[last_idx].iov_len += size;
902     } else {
903         paramp->u.pack_vector.vectorp[last_idx + 1].iov_base = (void *) ((intptr_t) bufp + rel_off);
904         paramp->u.pack_vector.vectorp[last_idx + 1].iov_len = size;
905         paramp->u.pack_vector.index++;
906     }
907     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_CONTIG_PACK_TO_IOV);
908     return 0;
909 }
910 
911 /* vector_pack_to_iov
912  *
913  * Input Parameters:
914  * blocks_p - [inout] pointer to a count of blocks (total, for all noncontiguous pieces)
915  * count    - # of noncontiguous regions
916  * blksz    - size of each noncontiguous region
917  * stride   - distance in bytes from start of one region to start of next
918  * el_type - elemental type (e.g. MPI_INT)
919  * ...
920  *
921  * Note: this is only called when the starting position is at the beginning
922  * of a whole block in a vector type.
923  */
vector_pack_to_iov(MPI_Aint * blocks_p,MPI_Aint count,MPI_Aint blksz,MPI_Aint stride,MPI_Datatype el_type,MPI_Aint rel_off,void * bufp,void * v_paramp)924 static int vector_pack_to_iov(MPI_Aint * blocks_p, MPI_Aint count, MPI_Aint blksz, MPI_Aint stride, MPI_Datatype el_type, MPI_Aint rel_off,     /* offset into buffer */
925                               void *bufp,       /* start of buffer */
926                               void *v_paramp)
927 {
928     int i;
929     MPI_Aint size, blocks_left, basic_size;
930     struct piece_params *paramp = v_paramp;
931     MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_VECTOR_PACK_TO_IOV);
932 
933     MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_VECTOR_PACK_TO_IOV);
934 
935     basic_size = (MPI_Aint) MPIR_Datatype_get_basic_size(el_type);
936     blocks_left = *blocks_p;
937 
938     MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE, VERBOSE, (MPL_DBG_FDEST,
939                                                  "    vector to vec: do=" MPI_AINT_FMT_DEC_SPEC
940                                                  ", dp=%p"
941                                                  ", len=" MPI_AINT_FMT_DEC_SPEC
942                                                  ", ind=" MPI_AINT_FMT_DEC_SPEC
943                                                  ", ct=" MPI_AINT_FMT_DEC_SPEC
944                                                  ", blksz=" MPI_AINT_FMT_DEC_SPEC
945                                                  ", str=" MPI_AINT_FMT_DEC_SPEC
946                                                  ", blks=" MPI_AINT_FMT_DEC_SPEC,
947                                                  (MPI_Aint) rel_off,
948                                                  bufp,
949                                                  (MPI_Aint) paramp->u.pack_vector.length,
950                                                  (MPI_Aint) paramp->u.pack_vector.index,
951                                                  count,
952                                                  blksz, (MPI_Aint) stride, (MPI_Aint) * blocks_p));
953 
954     for (i = 0; i < count && blocks_left > 0; i++) {
955         int last_idx;
956         intptr_t last_end = 0;
957 
958         if (blocks_left > (MPI_Aint) blksz) {
959             size = ((MPI_Aint) blksz) * basic_size;
960             blocks_left -= (MPI_Aint) blksz;
961         } else {
962             /* last pass */
963             size = blocks_left * basic_size;
964             blocks_left = 0;
965         }
966 
967         last_idx = paramp->u.pack_vector.index - 1;
968         if (last_idx >= 0) {
969             last_end = ((intptr_t) paramp->u.pack_vector.vectorp[last_idx].iov_base) +
970                 paramp->u.pack_vector.vectorp[last_idx].iov_len;
971         }
972 
973         if ((last_idx == paramp->u.pack_vector.length - 1) &&
974             (last_end != ((intptr_t) bufp + rel_off))) {
975             /* we have used up all our entries, and this one doesn't fit on
976              * the end of the last one.
977              */
978             *blocks_p -= (blocks_left + (size / basic_size));
979 #ifdef MPID_SP_VERBOSE
980             MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE, VERBOSE,
981                             (MPL_DBG_FDEST,
982                              "\t[vector to vec exiting (1): next ind = %d, " MPI_AINT_FMT_DEC_SPEC
983                              " blocks processed.\n", paramp->u.pack_vector.index,
984                              (MPI_Aint) * blocks_p));
985 #endif
986             MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_VECTOR_PACK_TO_IOV);
987             return 1;
988         } else if (last_idx >= 0 && (last_end == ((intptr_t) bufp + rel_off))) {
989             /* add this size to the last vector rather than using up new one */
990             paramp->u.pack_vector.vectorp[last_idx].iov_len += size;
991         } else {
992             paramp->u.pack_vector.vectorp[last_idx + 1].iov_base =
993                 (void *) ((intptr_t) bufp + rel_off);
994             paramp->u.pack_vector.vectorp[last_idx + 1].iov_len = size;
995             paramp->u.pack_vector.index++;
996         }
997 
998         rel_off += stride;
999 
1000     }
1001 
1002 #ifdef MPID_SP_VERBOSE
1003     MPL_DBG_MSG_FMT(MPIR_DBG_DATATYPE, VERBOSE,
1004                     (MPL_DBG_FDEST,
1005                      "\t[vector to vec exiting (2): next ind = %d, " MPI_AINT_FMT_DEC_SPEC
1006                      " blocks processed.\n", paramp->u.pack_vector.index, (MPI_Aint) * blocks_p));
1007 #endif
1008 
1009     /* if we get here then we processed ALL the blocks; don't need to update
1010      * blocks_p
1011      */
1012     MPIR_Assert(blocks_left == 0);
1013     MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_VECTOR_PACK_TO_IOV);
1014     return 0;
1015 }
1016