1 /*
2  * Copyright (C) by Argonne National Laboratory
3  *     See COPYRIGHT in top-level directory
4  */
5 
6 #include "adio.h"
7 #include "adio_extern.h"
8 #include "ad_pvfs2.h"
9 
10 #include "ad_pvfs2_common.h"
11 
ADIOI_PVFS2_OldWriteStrided(ADIO_File fd,const void * buf,int count,MPI_Datatype datatype,int file_ptr_type,ADIO_Offset offset,ADIO_Status * status,int * error_code)12 void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, const void *buf, int count,
13                                  MPI_Datatype datatype, int file_ptr_type,
14                                  ADIO_Offset offset, ADIO_Status * status, int *error_code)
15 {
16     /* as with all the other WriteStrided functions, offset is in units of
17      * etype relative to the filetype */
18 
19     /* Since PVFS2 does not support file locking, can't do buffered writes
20      * as on Unix */
21 
22     ADIOI_Flatlist_node *flat_buf, *flat_file;
23     int i, j, k, bwr_size, fwr_size = 0, st_index = 0;
24     int sum, n_etypes_in_filetype, size_in_filetype;
25     MPI_Count bufsize;
26     int n_filetypes, etype_in_filetype;
27     ADIO_Offset abs_off_in_filetype = 0;
28     MPI_Count filetype_size, etype_size, buftype_size;
29     MPI_Aint filetype_extent, buftype_extent;
30     int buf_count, buftype_is_contig, filetype_is_contig;
31     ADIO_Offset off, disp, start_off, initial_off;
32     int flag, st_fwr_size, st_n_filetypes;
33     int err_flag = 0;
34 
35     int mem_list_count, file_list_count;
36     PVFS_size *mem_offsets;
37     int64_t *file_offsets;
38     int *mem_lengths;
39     int32_t *file_lengths;
40     int total_blks_to_write;
41 
42     int max_mem_list, max_file_list;
43 
44     int b_blks_wrote;
45     int f_data_wrote;
46     int size_wrote = 0, n_write_lists, extra_blks;
47 
48     int end_bwr_size, end_fwr_size;
49     int start_k, start_j, new_file_write, new_buffer_write;
50     int start_mem_offset;
51     PVFS_Request mem_req, file_req;
52     ADIOI_PVFS2_fs *pvfs_fs;
53     PVFS_sysresp_io resp_io;
54     MPI_Offset total_bytes_written = 0;
55     static char myname[] = "ADIOI_PVFS2_WRITESTRIDED";
56 
57     /* note: don't increase this: several parts of PVFS2 now
58      * assume this limit*/
59 #define MAX_ARRAY_SIZE 64
60 
61     /* --BEGIN ERROR HANDLING-- */
62     if (fd->atomicity) {
63         *error_code = MPIO_Err_create_code(MPI_SUCCESS,
64                                            MPIR_ERR_RECOVERABLE,
65                                            myname, __LINE__,
66                                            MPI_ERR_ARG,
67                                            "Atomic noncontiguous writes are not supported by PVFS2",
68                                            0);
69         return;
70     }
71     /* --END ERROR HANDLING-- */
72 
73     ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
74     ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
75 
76     /* the HDF5 tests showed a bug in this list processing code (see many many
77      * lines down below).  We added a workaround, but common HDF5 file types
78      * are actually contiguous and do not need the expensive workarond */
79     if (!filetype_is_contig) {
80         flat_file = ADIOI_Flatten_and_find(fd->filetype);
81         if (flat_file->count == 1 && !buftype_is_contig)
82             filetype_is_contig = 1;
83     }
84 
85     MPI_Type_size_x(fd->filetype, &filetype_size);
86     if (!filetype_size) {
87 #ifdef HAVE_STATUS_SET_BYTES
88         MPIR_Status_set_bytes(status, datatype, 0);
89 #endif
90         *error_code = MPI_SUCCESS;
91         return;
92     }
93 
94     MPI_Type_extent(fd->filetype, &filetype_extent);
95     MPI_Type_size_x(datatype, &buftype_size);
96     MPI_Type_extent(datatype, &buftype_extent);
97     etype_size = fd->etype_size;
98 
99     bufsize = buftype_size * count;
100 
101     pvfs_fs = (ADIOI_PVFS2_fs *) fd->fs_ptr;
102 
103     if (!buftype_is_contig && filetype_is_contig) {
104 
105 /* noncontiguous in memory, contiguous in file.  */
106         int64_t file_offset;
107         int32_t file_length;
108 
109         flat_buf = ADIOI_Flatten_and_find(datatype);
110 
111         if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
112             off = fd->disp + etype_size * offset;
113         } else
114             off = fd->fp_ind;
115 
116         file_list_count = 1;
117         file_offset = off;
118         file_length = 0;
119         total_blks_to_write = count * flat_buf->count;
120         b_blks_wrote = 0;
121 
122         /* allocate arrays according to max usage */
123         if (total_blks_to_write > MAX_ARRAY_SIZE)
124             mem_list_count = MAX_ARRAY_SIZE;
125         else
126             mem_list_count = total_blks_to_write;
127         mem_offsets = (PVFS_size *) ADIOI_Malloc(mem_list_count * sizeof(PVFS_size));
128         mem_lengths = (int *) ADIOI_Malloc(mem_list_count * sizeof(int));
129 
130         j = 0;
131         /* step through each block in memory, filling memory arrays */
132         while (b_blks_wrote < total_blks_to_write) {
133             for (i = 0; i < flat_buf->count; i++) {
134                 mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] =
135                     /* TODO: fix this warning by casting to an integer that's
136                      * the same size as a char * and /then/ casting to
137                      * PVFS_size */
138                     ((PVFS_size) buf + j * buftype_extent + flat_buf->indices[i]);
139                 mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = flat_buf->blocklens[i];
140                 file_length += flat_buf->blocklens[i];
141                 b_blks_wrote++;
142                 if (!(b_blks_wrote % MAX_ARRAY_SIZE) || (b_blks_wrote == total_blks_to_write)) {
143 
144                     /* in the case of the last write list call,
145                      * adjust mem_list_count */
146                     if (b_blks_wrote == total_blks_to_write) {
147                         mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE;
148                         /* in case last write list call fills max arrays */
149                         if (!mem_list_count)
150                             mem_list_count = MAX_ARRAY_SIZE;
151                     }
152                     err_flag = PVFS_Request_hindexed(mem_list_count,
153                                                      mem_lengths, mem_offsets, PVFS_BYTE, &mem_req);
154                     /* --BEGIN ERROR HANDLING-- */
155                     if (err_flag != 0) {
156                         *error_code = MPIO_Err_create_code(MPI_SUCCESS,
157                                                            MPIR_ERR_RECOVERABLE,
158                                                            myname, __LINE__,
159                                                            ADIOI_PVFS2_error_convert(err_flag),
160                                                            "Error in PVFS_Request_hindexed (memory)",
161                                                            0);
162                         break;
163                     }
164                     /* --END ERROR HANDLING-- */
165 
166                     err_flag = PVFS_Request_contiguous(file_length, PVFS_BYTE, &file_req);
167                     /* --BEGIN ERROR HANDLING-- */
168                     if (err_flag != 0) {
169                         *error_code = MPIO_Err_create_code(MPI_SUCCESS,
170                                                            MPIR_ERR_RECOVERABLE,
171                                                            myname, __LINE__,
172                                                            ADIOI_PVFS2_error_convert(err_flag),
173                                                            "Error in PVFS_Request_contiguous (file)",
174                                                            0);
175                         break;
176                     }
177                     /* --END ERROR HANDLING-- */
178 
179 #ifdef ADIOI_MPE_LOGGING
180                     MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
181 #endif
182                     err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req,
183                                               file_offset, PVFS_BOTTOM,
184                                               mem_req, &(pvfs_fs->credentials), &resp_io);
185 #ifdef ADIOI_MPE_LOGGING
186                     MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
187 #endif
188                     total_bytes_written += resp_io.total_completed;
189 
190                     /* in the case of error or the last write list call,
191                      * leave here */
192                     /* --BEGIN ERROR HANDLING-- */
193                     if (err_flag) {
194                         *error_code = MPIO_Err_create_code(MPI_SUCCESS,
195                                                            MPIR_ERR_RECOVERABLE,
196                                                            myname, __LINE__,
197                                                            ADIOI_PVFS2_error_convert(err_flag),
198                                                            "Error in PVFS_sys_write", 0);
199                         break;
200                     }
201                     /* --END ERROR HANDLING-- */
202                     if (b_blks_wrote == total_blks_to_write)
203                         break;
204 
205                     file_offset += file_length;
206                     file_length = 0;
207                     PVFS_Request_free(&mem_req);
208                     PVFS_Request_free(&file_req);
209                 }
210             }   /* for (i=0; i<flat_buf->count; i++) */
211             j++;
212         }       /* while (b_blks_wrote < total_blks_to_write) */
213         ADIOI_Free(mem_offsets);
214         ADIOI_Free(mem_lengths);
215 
216         if (file_ptr_type == ADIO_INDIVIDUAL)
217             fd->fp_ind += total_bytes_written;
218 
219         if (!err_flag)
220             *error_code = MPI_SUCCESS;
221 
222         fd->fp_sys_posn = -1;   /* clear this. */
223 
224 #ifdef HAVE_STATUS_SET_BYTES
225         MPIR_Status_set_bytes(status, datatype, bufsize);
226 /* This is a temporary way of filling in status. The right way is to
227    keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
228 #endif
229 
230         return;
231     }
232 
233 
234 
235 
236     /* if (!buftype_is_contig && filetype_is_contig) */
237     /* already know that file is noncontiguous from above */
238     /* noncontiguous in file */
239     /* filetype already flattened in ADIO_Open */
240     flat_file = ADIOI_Flatten_and_find(fd->filetype);
241 
242     disp = fd->disp;
243     initial_off = offset;
244 
245     /* for each case - ADIO_Individual pointer or explicit, find offset
246      * (file offset in bytes), n_filetypes (how many filetypes into file
247      * to start), fwr_size (remaining amount of data in present file
248      * block), and st_index (start point in terms of blocks in starting
249      * filetype) */
250     if (file_ptr_type == ADIO_INDIVIDUAL) {
251         offset = fd->fp_ind;    /* in bytes */
252         n_filetypes = -1;
253         flag = 0;
254         while (!flag) {
255             n_filetypes++;
256             for (i = 0; i < flat_file->count; i++) {
257                 if (disp + flat_file->indices[i] +
258                     ((ADIO_Offset) n_filetypes) * filetype_extent +
259                     flat_file->blocklens[i] >= offset) {
260                     st_index = i;
261                     fwr_size = disp + flat_file->indices[i] +
262                         ((ADIO_Offset) n_filetypes) * filetype_extent
263                         + flat_file->blocklens[i] - offset;
264                     flag = 1;
265                     break;
266                 }
267             }
268         }       /* while (!flag) */
269     } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
270     else {
271         n_etypes_in_filetype = filetype_size / etype_size;
272         n_filetypes = (int) (offset / n_etypes_in_filetype);
273         etype_in_filetype = (int) (offset % n_etypes_in_filetype);
274         size_in_filetype = etype_in_filetype * etype_size;
275 
276         sum = 0;
277         for (i = 0; i < flat_file->count; i++) {
278             sum += flat_file->blocklens[i];
279             if (sum > size_in_filetype) {
280                 st_index = i;
281                 fwr_size = sum - size_in_filetype;
282                 abs_off_in_filetype = flat_file->indices[i] +
283                     size_in_filetype - (sum - flat_file->blocklens[i]);
284                 break;
285             }
286         }
287 
288         /* abs. offset in bytes in the file */
289         offset = disp + ((ADIO_Offset) n_filetypes) * filetype_extent + abs_off_in_filetype;
290     }   /* else [file_ptr_type != ADIO_INDIVIDUAL] */
291 
292     start_off = offset;
293     st_fwr_size = fwr_size;
294     st_n_filetypes = n_filetypes;
295 
296     if (buftype_is_contig && !filetype_is_contig) {
297 
298 /* contiguous in memory, noncontiguous in file. should be the most
299    common case. */
300 
301         int mem_length;
302         intptr_t mem_offset;
303 
304         i = 0;
305         j = st_index;
306         off = offset;
307         n_filetypes = st_n_filetypes;
308 
309         mem_list_count = 1;
310 
311         /* determine how many blocks in file to write */
312         f_data_wrote = MPL_MIN(st_fwr_size, bufsize);
313         total_blks_to_write = 1;
314         if (j < (flat_file->count - 1))
315             j++;
316         else {
317             j = 0;
318             n_filetypes++;
319         }
320         while (f_data_wrote < bufsize) {
321             f_data_wrote += flat_file->blocklens[j];
322             total_blks_to_write++;
323             if (j < (flat_file->count - 1))
324                 j++;
325             else
326                 j = 0;
327         }
328 
329         j = st_index;
330         n_filetypes = st_n_filetypes;
331         n_write_lists = total_blks_to_write / MAX_ARRAY_SIZE;
332         extra_blks = total_blks_to_write % MAX_ARRAY_SIZE;
333 
334         mem_offset = (intptr_t) buf;
335         mem_length = 0;
336 
337         /* if at least one full writelist, allocate file arrays
338          * at max array size and don't free until very end */
339         if (n_write_lists) {
340             file_offsets = (int64_t *) ADIOI_Malloc(MAX_ARRAY_SIZE * sizeof(int64_t));
341             file_lengths = (int32_t *) ADIOI_Malloc(MAX_ARRAY_SIZE * sizeof(int32_t));
342         }
343         /* if there's no full writelist allocate file arrays according
344          * to needed size (extra_blks) */
345         else {
346             file_offsets = (int64_t *) ADIOI_Malloc(extra_blks * sizeof(int64_t));
347             file_lengths = (int32_t *) ADIOI_Malloc(extra_blks * sizeof(int32_t));
348         }
349 
350         /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
351         for (i = 0; i < n_write_lists; i++) {
352             file_list_count = MAX_ARRAY_SIZE;
353             if (!i) {
354                 file_offsets[0] = offset;
355                 file_lengths[0] = st_fwr_size;
356                 mem_length = st_fwr_size;
357             }
358             for (k = 0; k < MAX_ARRAY_SIZE; k++) {
359                 if (i || k) {
360                     file_offsets[k] = disp +
361                         ((ADIO_Offset) n_filetypes) * filetype_extent + flat_file->indices[j];
362                     file_lengths[k] = flat_file->blocklens[j];
363                     mem_length += file_lengths[k];
364                 }
365                 if (j < (flat_file->count - 1))
366                     j++;
367                 else {
368                     j = 0;
369                     n_filetypes++;
370                 }
371             }   /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
372 
373             err_flag = PVFS_Request_contiguous(mem_length, PVFS_BYTE, &mem_req);
374             /* --BEGIN ERROR HANDLING-- */
375             if (err_flag != 0) {
376                 *error_code = MPIO_Err_create_code(MPI_SUCCESS,
377                                                    MPIR_ERR_RECOVERABLE,
378                                                    myname, __LINE__,
379                                                    ADIOI_PVFS2_error_convert(err_flag),
380                                                    "Error in PVFS_Request_contiguous (memory)", 0);
381                 goto error_state;
382             }
383             /* --END ERROR HANDLING-- */
384 
385             err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
386                                              file_offsets, PVFS_BYTE, &file_req);
387             /* --BEGIN ERROR HANDLING-- */
388             if (err_flag != 0) {
389                 *error_code = MPIO_Err_create_code(MPI_SUCCESS,
390                                                    MPIR_ERR_RECOVERABLE,
391                                                    myname, __LINE__,
392                                                    ADIOI_PVFS2_error_convert(err_flag),
393                                                    "Error in PVFS_Request_hindexed (file)", 0);
394                 goto error_state;
395             }
396             /* --END ERROR HANDLING-- */
397 
398             /* PVFS_Request_hindexed already expresses the offsets into the
399              * file, so we should not pass in an offset if we are using
400              * hindexed for the file type */
401 #ifdef ADIOI_MPE_LOGGING
402             MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
403 #endif
404             err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
405                                       (void *) mem_offset, mem_req,
406                                       &(pvfs_fs->credentials), &resp_io);
407 #ifdef ADIOI_MPE_LOGGING
408             MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
409 #endif
410             /* --BEGIN ERROR HANDLING-- */
411             if (err_flag != 0) {
412                 *error_code = MPIO_Err_create_code(MPI_SUCCESS,
413                                                    MPIR_ERR_RECOVERABLE,
414                                                    myname, __LINE__,
415                                                    ADIOI_PVFS2_error_convert(err_flag),
416                                                    "Error in PVFS_sys_write", 0);
417                 goto error_state;
418             }
419             /* --END ERROR HANDLING-- */
420             total_bytes_written += resp_io.total_completed;
421 
422             mem_offset += mem_length;
423             mem_lengths = 0;
424             PVFS_Request_free(&file_req);
425             PVFS_Request_free(&mem_req);
426 
427         }       /* for (i=0; i<n_write_lists; i++) */
428 
429         /* for file arrays smaller than MAX_ARRAY_SIZE (last write_list call) */
430         if (extra_blks) {
431             file_list_count = extra_blks;
432             if (!i) {
433                 file_offsets[0] = offset;
434                 file_lengths[0] = MPL_MIN(st_fwr_size, bufsize);
435             }
436             for (k = 0; k < extra_blks; k++) {
437                 if (i || k) {
438                     file_offsets[k] = disp +
439                         ((ADIO_Offset) n_filetypes) * filetype_extent + flat_file->indices[j];
440                     if (k == (extra_blks - 1)) {
441                         file_lengths[k] = bufsize - (int32_t) mem_length
442                             - mem_offset + (int32_t) buf;
443                     } else
444                         file_lengths[k] = flat_file->blocklens[j];
445                 }       /* if (i || k) */
446                 mem_length += file_lengths[k];
447                 if (j < (flat_file->count - 1))
448                     j++;
449                 else {
450                     j = 0;
451                     n_filetypes++;
452                 }
453             }   /* for (k=0; k<extra_blks; k++) */
454 
455             err_flag = PVFS_Request_contiguous(mem_length, PVFS_BYTE, &mem_req);
456             /* --BEGIN ERROR HANDLING-- */
457             if (err_flag != 0) {
458                 *error_code = MPIO_Err_create_code(MPI_SUCCESS,
459                                                    MPIR_ERR_RECOVERABLE,
460                                                    myname, __LINE__,
461                                                    ADIOI_PVFS2_error_convert(err_flag),
462                                                    "Error in PVFS_Request_contiguous (memory)", 0);
463                 goto error_state;
464             }
465             /* --END ERROR HANDLING-- */
466 
467             err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
468                                              file_offsets, PVFS_BYTE, &file_req);
469             /* --BEGIN ERROR HANDLING-- */
470             if (err_flag != 0) {
471                 *error_code = MPIO_Err_create_code(MPI_SUCCESS,
472                                                    MPIR_ERR_RECOVERABLE,
473                                                    myname, __LINE__,
474                                                    ADIOI_PVFS2_error_convert(err_flag),
475                                                    "Error in PVFS_Request_hindexed(file)", 0);
476                 goto error_state;
477             }
478             /* --END ERROR HANDLING-- */
479 
480             /* as above, use 0 for 'offset' when using hindexed file type */
481 #ifdef ADIOI_MPE_LOGGING
482             MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
483 #endif
484             err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
485                                       (void *) mem_offset, mem_req,
486                                       &(pvfs_fs->credentials), &resp_io);
487 #ifdef ADIOI_MPE_LOGGING
488             MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
489 #endif
490             /* --BEGIN ERROR HANDLING-- */
491             if (err_flag != 0) {
492                 *error_code = MPIO_Err_create_code(MPI_SUCCESS,
493                                                    MPIR_ERR_RECOVERABLE,
494                                                    myname, __LINE__,
495                                                    ADIOI_PVFS2_error_convert(err_flag),
496                                                    "Error in PVFS_sys_write", 0);
497                 goto error_state;
498             }
499             /* --END ERROR HANDLING-- */
500             total_bytes_written += resp_io.total_completed;
501             PVFS_Request_free(&mem_req);
502             PVFS_Request_free(&file_req);
503         }
504     } else {
505         /* noncontiguous in memory as well as in file */
506 
507         flat_buf = ADIOI_Flatten_and_find(datatype);
508 
509         size_wrote = 0;
510         n_filetypes = st_n_filetypes;
511         fwr_size = st_fwr_size;
512         bwr_size = flat_buf->blocklens[0];
513         buf_count = 0;
514         start_mem_offset = 0;
515         start_k = k = 0;
516         start_j = st_index;
517         max_mem_list = 0;
518         max_file_list = 0;
519 
520         /* run through and file max_file_list and max_mem_list so that you
521          * can allocate the file and memory arrays less than MAX_ARRAY_SIZE
522          * if possible */
523 
524         while (size_wrote < bufsize) {
525             k = start_k;
526             new_buffer_write = 0;
527             mem_list_count = 0;
528             while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize - size_wrote)) {
529                 /* find mem_list_count and file_list_count such that both are
530                  * less than MAX_ARRAY_SIZE, the sum of their lengths are
531                  * equal, and the sum of all the data written and data to be
532                  * written in the next immediate write list is less than
533                  * bufsize */
534                 if (mem_list_count) {
535                     if ((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) {
536                         end_bwr_size = new_buffer_write +
537                             flat_buf->blocklens[k] - (bufsize - size_wrote);
538                         new_buffer_write = bufsize - size_wrote;
539                     } else {
540                         new_buffer_write += flat_buf->blocklens[k];
541                         end_bwr_size = flat_buf->blocklens[k];
542                     }
543                 } else {
544                     if (bwr_size > (bufsize - size_wrote)) {
545                         new_buffer_write = bufsize - size_wrote;
546                         bwr_size = new_buffer_write;
547                     } else
548                         new_buffer_write = bwr_size;
549                 }
550                 mem_list_count++;
551                 k = (k + 1) % flat_buf->count;
552             }   /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
553                  * (new_buffer_write < bufsize-size_wrote)) */
554             j = start_j;
555             new_file_write = 0;
556             file_list_count = 0;
557             while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_write < new_buffer_write)) {
558                 if (file_list_count) {
559                     if ((new_file_write + flat_file->blocklens[j]) > new_buffer_write) {
560                         end_fwr_size = new_buffer_write - new_file_write;
561                         new_file_write = new_buffer_write;
562                         j--;
563                     } else {
564                         new_file_write += flat_file->blocklens[j];
565                         end_fwr_size = flat_file->blocklens[j];
566                     }
567                 } else {
568                     if (fwr_size > new_buffer_write) {
569                         new_file_write = new_buffer_write;
570                         fwr_size = new_file_write;
571                     } else
572                         new_file_write = fwr_size;
573                 }
574                 file_list_count++;
575                 if (j < (flat_file->count - 1))
576                     j++;
577                 else
578                     j = 0;
579 
580                 k = start_k;
581                 if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) {
582                     new_buffer_write = 0;
583                     mem_list_count = 0;
584                     while (new_buffer_write < new_file_write) {
585                         if (mem_list_count) {
586                             if ((new_buffer_write + flat_buf->blocklens[k]) > new_file_write) {
587                                 end_bwr_size = new_file_write - new_buffer_write;
588                                 new_buffer_write = new_file_write;
589                                 k--;
590                             } else {
591                                 new_buffer_write += flat_buf->blocklens[k];
592                                 end_bwr_size = flat_buf->blocklens[k];
593                             }
594                         } else {
595                             new_buffer_write = bwr_size;
596                             if (bwr_size > (bufsize - size_wrote)) {
597                                 new_buffer_write = bufsize - size_wrote;
598                                 bwr_size = new_buffer_write;
599                             }
600                         }
601                         mem_list_count++;
602                         k = (k + 1) % flat_buf->count;
603                     }   /* while (new_buffer_write < new_file_write) */
604                 }       /* if ((new_file_write < new_buffer_write) &&
605                          * (file_list_count == MAX_ARRAY_SIZE)) */
606             }   /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
607                  * (new_buffer_write < bufsize-size_wrote)) */
608 
609             /*  fakes filling the writelist arrays of lengths found above  */
610             k = start_k;
611             j = start_j;
612             for (i = 0; i < mem_list_count; i++) {
613                 if (i) {
614                     if (i == (mem_list_count - 1)) {
615                         if (flat_buf->blocklens[k] == end_bwr_size)
616                             bwr_size = flat_buf->blocklens[(k + 1) % flat_buf->count];
617                         else {
618                             bwr_size = flat_buf->blocklens[k] - end_bwr_size;
619                             k--;
620                             buf_count--;
621                         }
622                     }
623                 }
624                 buf_count++;
625                 k = (k + 1) % flat_buf->count;
626             }   /* for (i=0; i<mem_list_count; i++) */
627             for (i = 0; i < file_list_count; i++) {
628                 if (i) {
629                     if (i == (file_list_count - 1)) {
630                         if (flat_file->blocklens[j] == end_fwr_size)
631                             fwr_size = flat_file->blocklens[(j + 1) % flat_file->count];
632                         else {
633                             fwr_size = flat_file->blocklens[j] - end_fwr_size;
634                             j--;
635                         }
636                     }
637                 }
638                 if (j < flat_file->count - 1)
639                     j++;
640                 else {
641                     j = 0;
642                     n_filetypes++;
643                 }
644             }   /* for (i=0; i<file_list_count; i++) */
645             size_wrote += new_buffer_write;
646             start_k = k;
647             start_j = j;
648             if (max_mem_list < mem_list_count)
649                 max_mem_list = mem_list_count;
650             if (max_file_list < file_list_count)
651                 max_file_list = file_list_count;
652         }       /* while (size_wrote < bufsize) */
653 
654         /* one last check before we actually carry out the operation:
655          * this code has hard-to-fix bugs when a noncontiguous file type has
656          * such large pieces that the sum of the lengths of the memory type is
657          * not larger than one of those pieces (and vice versa for large memory
658          * types and many pices of file types.  In these cases, give up and
659          * fall back to naive reads and writes.  The testphdf5 test created a
660          * type with two very large memory regions and 600 very small file
661          * regions.  The same test also created a type with one very large file
662          * region and many (700) very small memory regions.  both cases caused
663          * problems for this code */
664 
665         if (((file_list_count == 1) &&
666              (new_file_write < flat_file->blocklens[0])) ||
667             ((mem_list_count == 1) &&
668              (new_buffer_write < flat_buf->blocklens[0])) ||
669             ((file_list_count == MAX_ARRAY_SIZE) &&
670              (new_file_write < flat_buf->blocklens[0])) ||
671             ((mem_list_count == MAX_ARRAY_SIZE) && (new_buffer_write < flat_file->blocklens[0]))) {
672             ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype,
673                                          file_ptr_type, initial_off, status, error_code);
674             return;
675         }
676 
677 
678         mem_offsets = (PVFS_size *) ADIOI_Malloc(max_mem_list * sizeof(PVFS_size));
679         mem_lengths = (int *) ADIOI_Malloc(max_mem_list * sizeof(int));
680         file_offsets = (int64_t *) ADIOI_Malloc(max_file_list * sizeof(int64_t));
681         file_lengths = (int32_t *) ADIOI_Malloc(max_file_list * sizeof(int32_t));
682 
683         size_wrote = 0;
684         n_filetypes = st_n_filetypes;
685         fwr_size = st_fwr_size;
686         bwr_size = flat_buf->blocklens[0];
687         buf_count = 0;
688         start_mem_offset = 0;
689         start_k = k = 0;
690         start_j = st_index;
691 
692         /*  this section calculates mem_list_count and file_list_count
693          * and also finds the possibly odd sized last array elements
694          * in new_fwr_size and new_bwr_size  */
695 
696         while (size_wrote < bufsize) {
697             k = start_k;
698             new_buffer_write = 0;
699             mem_list_count = 0;
700             while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize - size_wrote)) {
701                 /* find mem_list_count and file_list_count such that both are
702                  * less than MAX_ARRAY_SIZE, the sum of their lengths are
703                  * equal, and the sum of all the data written and data to be
704                  * written in the next immediate write list is less than
705                  * bufsize */
706                 if (mem_list_count) {
707                     if ((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) {
708                         end_bwr_size = new_buffer_write +
709                             flat_buf->blocklens[k] - (bufsize - size_wrote);
710                         new_buffer_write = bufsize - size_wrote;
711                     } else {
712                         new_buffer_write += flat_buf->blocklens[k];
713                         end_bwr_size = flat_buf->blocklens[k];
714                     }
715                 } else {
716                     if (bwr_size > (bufsize - size_wrote)) {
717                         new_buffer_write = bufsize - size_wrote;
718                         bwr_size = new_buffer_write;
719                     } else
720                         new_buffer_write = bwr_size;
721                 }
722                 mem_list_count++;
723                 k = (k + 1) % flat_buf->count;
724             }   /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
725                  * (new_buffer_write < bufsize-size_wrote)) */
726             j = start_j;
727             new_file_write = 0;
728             file_list_count = 0;
729             while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_write < new_buffer_write)) {
730                 if (file_list_count) {
731                     if ((new_file_write + flat_file->blocklens[j]) > new_buffer_write) {
732                         end_fwr_size = new_buffer_write - new_file_write;
733                         new_file_write = new_buffer_write;
734                         j--;
735                     } else {
736                         new_file_write += flat_file->blocklens[j];
737                         end_fwr_size = flat_file->blocklens[j];
738                     }
739                 } else {
740                     if (fwr_size > new_buffer_write) {
741                         new_file_write = new_buffer_write;
742                         fwr_size = new_file_write;
743                     } else
744                         new_file_write = fwr_size;
745                 }
746                 file_list_count++;
747                 if (j < (flat_file->count - 1))
748                     j++;
749                 else
750                     j = 0;
751 
752                 k = start_k;
753                 if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) {
754                     new_buffer_write = 0;
755                     mem_list_count = 0;
756                     while (new_buffer_write < new_file_write) {
757                         if (mem_list_count) {
758                             if ((new_buffer_write + flat_buf->blocklens[k]) > new_file_write) {
759                                 end_bwr_size = new_file_write - new_buffer_write;
760                                 new_buffer_write = new_file_write;
761                                 k--;
762                             } else {
763                                 new_buffer_write += flat_buf->blocklens[k];
764                                 end_bwr_size = flat_buf->blocklens[k];
765                             }
766                         } else {
767                             new_buffer_write = bwr_size;
768                             if (bwr_size > (bufsize - size_wrote)) {
769                                 new_buffer_write = bufsize - size_wrote;
770                                 bwr_size = new_buffer_write;
771                             }
772                         }
773                         mem_list_count++;
774                         k = (k + 1) % flat_buf->count;
775                     }   /* while (new_buffer_write < new_file_write) */
776                 }       /* if ((new_file_write < new_buffer_write) &&
777                          * (file_list_count == MAX_ARRAY_SIZE)) */
778             }   /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
779                  * (new_buffer_write < bufsize-size_wrote)) */
780 
781             /*  fills the allocated writelist arrays  */
782             k = start_k;
783             j = start_j;
784             for (i = 0; i < mem_list_count; i++) {
785                 /* TODO: fix this warning by casting to an integer that's the
786                  * same size as a char * and /then/ casting to PVFS_size */
787                 mem_offsets[i] = ((PVFS_size) buf + buftype_extent *
788                                   (buf_count / flat_buf->count) + (int) flat_buf->indices[k]);
789 
790                 if (!i) {
791                     mem_lengths[0] = bwr_size;
792                     mem_offsets[0] += flat_buf->blocklens[k] - bwr_size;
793                 } else {
794                     if (i == (mem_list_count - 1)) {
795                         mem_lengths[i] = end_bwr_size;
796                         if (flat_buf->blocklens[k] == end_bwr_size)
797                             bwr_size = flat_buf->blocklens[(k + 1) % flat_buf->count];
798                         else {
799                             bwr_size = flat_buf->blocklens[k] - end_bwr_size;
800                             k--;
801                             buf_count--;
802                         }
803                     } else {
804                         mem_lengths[i] = flat_buf->blocklens[k];
805                     }
806                 }
807                 buf_count++;
808                 k = (k + 1) % flat_buf->count;
809             }   /* for (i=0; i<mem_list_count; i++) */
810             for (i = 0; i < file_list_count; i++) {
811                 file_offsets[i] = disp + flat_file->indices[j] +
812                     ((ADIO_Offset) n_filetypes) * filetype_extent;
813                 if (!i) {
814                     file_lengths[0] = fwr_size;
815                     file_offsets[0] += flat_file->blocklens[j] - fwr_size;
816                 } else {
817                     if (i == (file_list_count - 1)) {
818                         file_lengths[i] = end_fwr_size;
819                         if (flat_file->blocklens[j] == end_fwr_size)
820                             fwr_size = flat_file->blocklens[(j + 1) % flat_file->count];
821                         else {
822                             fwr_size = flat_file->blocklens[j] - end_fwr_size;
823                             j--;
824                         }
825                     } else
826                         file_lengths[i] = flat_file->blocklens[j];
827                 }
828                 if (j < flat_file->count - 1)
829                     j++;
830                 else {
831                     j = 0;
832                     n_filetypes++;
833                 }
834             }   /* for (i=0; i<file_list_count; i++) */
835 
836             err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths,
837                                              mem_offsets, PVFS_BYTE, &mem_req);
838             /* --BEGIN ERROR HANDLING-- */
839             if (err_flag != 0) {
840                 *error_code = MPIO_Err_create_code(MPI_SUCCESS,
841                                                    MPIR_ERR_RECOVERABLE,
842                                                    myname, __LINE__,
843                                                    ADIOI_PVFS2_error_convert(err_flag),
844                                                    "Error in PVFS_Request_hindexed (memory)", 0);
845                 goto error_state;
846             }
847             /* --END ERROR HANDLING-- */
848 
849             err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
850                                              file_offsets, PVFS_BYTE, &file_req);
851             /* --BEGIN ERROR HANDLING-- */
852             if (err_flag != 0) {
853                 *error_code = MPIO_Err_create_code(MPI_SUCCESS,
854                                                    MPIR_ERR_RECOVERABLE,
855                                                    myname, __LINE__,
856                                                    ADIOI_PVFS2_error_convert(err_flag),
857                                                    "Error in PVFS_Request_hindexed", 0);
858                 goto error_state;
859             }
860             /* --END ERROR HANDLING-- */
861 
862             /* offset will be expressed in memory and file datatypes */
863 
864 #ifdef ADIOI_MPE_LOGGING
865             MPE_Log_event(ADIOI_MPE_write_a, 0, NULL);
866 #endif
867             err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
868                                       PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io);
869 #ifdef ADIOI_MPE_LOGGING
870             MPE_Log_event(ADIOI_MPE_write_b, 0, NULL);
871 #endif
872             /* --BEGIN ERROR HANDLING-- */
873             if (err_flag != 0) {
874                 *error_code = MPIO_Err_create_code(MPI_SUCCESS,
875                                                    MPIR_ERR_RECOVERABLE,
876                                                    myname, __LINE__,
877                                                    ADIOI_PVFS2_error_convert(err_flag),
878                                                    "Error in PVFS_sys_write", 0);
879                 goto error_state;
880             }
881             /* --END ERROR HANDLING-- */
882 
883             size_wrote += new_buffer_write;
884             total_bytes_written += resp_io.total_completed;
885             start_k = k;
886             start_j = j;
887             PVFS_Request_free(&mem_req);
888             PVFS_Request_free(&file_req);
889         }       /* while (size_wrote < bufsize) */
890         ADIOI_Free(mem_offsets);
891         ADIOI_Free(mem_lengths);
892     }
893     /* when incrementing fp_ind, need to also take into account the file type:
894      * consider an N-element 1-d subarray with a lb and ub: (|---xxxxx-----|
895      * if we wrote N elements, offset needs to point at beginning of type, not
896      * at empty region at offset N+1).
897      *
898      * As we discussed on mpich-discuss in may/june 2009, the code below might
899      * look wierd, but by putting fp_ind at the last byte written, the next
900      * time we run through the strided code we'll update the fp_ind to the
901      * right location. */
902     if (file_ptr_type == ADIO_INDIVIDUAL) {
903         fd->fp_ind = file_offsets[file_list_count - 1] + file_lengths[file_list_count - 1];
904     }
905     ADIOI_Free(file_offsets);
906     ADIOI_Free(file_lengths);
907 
908     *error_code = MPI_SUCCESS;
909 
910   error_state:
911     fd->fp_sys_posn = -1;       /* set it to null. */
912 
913 #ifdef HAVE_STATUS_SET_BYTES
914     MPIR_Status_set_bytes(status, datatype, bufsize);
915 /* This is a temporary way of filling in status. The right way is to
916    keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
917 #endif
918 
919 }
920