1 /* -*- Mode: C; c-basic-offset:4 ; -*-
2  * vim: ts=8 sts=4 sw=4 noexpandtab
3  *
4  *   Copyright (C) 2008 University of Chicago.
5  *   See COPYRIGHT notice in top-level directory.
6  */
7 
8 #include "adio.h"
9 #include "adio_extern.h"
10 #include "ad_pvfs2.h"
11 
12 #include "ad_pvfs2_common.h"
13 
ADIOI_PVFS2_OldWriteStrided(ADIO_File fd,void * buf,int count,MPI_Datatype datatype,int file_ptr_type,ADIO_Offset offset,ADIO_Status * status,int * error_code)14 void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, void *buf, int count,
15 			MPI_Datatype datatype, int file_ptr_type,
16 			ADIO_Offset offset, ADIO_Status *status,
17 			int *error_code)
18 {
19     /* as with all the other WriteStrided functions, offset is in units of
20      * etype relative to the filetype */
21 
22     /* Since PVFS2 does not support file locking, can't do buffered writes
23        as on Unix */
24 
25     ADIOI_Flatlist_node *flat_buf, *flat_file;
26     int i, j, k, bwr_size, fwr_size=0, st_index=0;
27     int bufsize, sum, n_etypes_in_filetype, size_in_filetype;
28     int n_filetypes, etype_in_filetype;
29     ADIO_Offset abs_off_in_filetype=0;
30     int filetype_size, etype_size, buftype_size;
31     MPI_Aint filetype_extent, buftype_extent;
32     int buf_count, buftype_is_contig, filetype_is_contig;
33     ADIO_Offset off, disp, start_off, initial_off;
34     int flag, st_fwr_size, st_n_filetypes;
35     int err_flag=0;
36 
37     int mem_list_count, file_list_count;
38     PVFS_size * mem_offsets;
39     int64_t *file_offsets;
40     int *mem_lengths;
41     int32_t *file_lengths;
42     int total_blks_to_write;
43 
44     int max_mem_list, max_file_list;
45 
46     int b_blks_wrote;
47     int f_data_wrote;
48     int size_wrote=0, n_write_lists, extra_blks;
49 
50     int end_bwr_size, end_fwr_size;
51     int start_k, start_j, new_file_write, new_buffer_write;
52     int start_mem_offset;
53     PVFS_Request mem_req, file_req;
54     ADIOI_PVFS2_fs * pvfs_fs;
55     PVFS_sysresp_io resp_io;
56     MPI_Offset total_bytes_written=0;
57     static char myname[] = "ADIOI_PVFS2_WRITESTRIDED";
58 
59     /* note: don't increase this: several parts of PVFS2 now
60      * assume this limit*/
61 #define MAX_ARRAY_SIZE 64
62 
63     /* --BEGIN ERROR HANDLING-- */
64     if (fd->atomicity) {
65 	*error_code = MPIO_Err_create_code(MPI_SUCCESS,
66 					   MPIR_ERR_RECOVERABLE,
67 					   myname, __LINE__,
68 					   MPI_ERR_ARG,
69 					   "Atomic noncontiguous writes are not supported by PVFS2", 0);
70 	return;
71     }
72     /* --END ERROR HANDLING-- */
73 
74     ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
75     ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
76 
77     /* the HDF5 tests showed a bug in this list processing code (see many many
78      * lines down below).  We added a workaround, but common HDF5 file types
79      * are actually contiguous and do not need the expensive workarond */
80     if (!filetype_is_contig) {
81 	flat_file = ADIOI_Flatlist;
82 	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
83 	if (flat_file->count == 1 && !buftype_is_contig)
84 	    filetype_is_contig = 1;
85     }
86 
87     MPI_Type_size(fd->filetype, &filetype_size);
88     if ( ! filetype_size ) {
89 	*error_code = MPI_SUCCESS;
90 	return;
91     }
92 
93     MPI_Type_extent(fd->filetype, &filetype_extent);
94     MPI_Type_size(datatype, &buftype_size);
95     MPI_Type_extent(datatype, &buftype_extent);
96     etype_size = fd->etype_size;
97 
98     bufsize = buftype_size * count;
99 
100     pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr;
101 
102     if (!buftype_is_contig && filetype_is_contig) {
103 
104 /* noncontiguous in memory, contiguous in file.  */
105         int64_t file_offsets;
106 	int32_t file_lengths;
107 
108 	ADIOI_Flatten_datatype(datatype);
109 	flat_buf = ADIOI_Flatlist;
110 	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
111 
112 	if (file_ptr_type == ADIO_EXPLICIT_OFFSET) {
113 	    off = fd->disp + etype_size * offset;
114 	}
115 	else off = fd->fp_ind;
116 
117 	file_list_count = 1;
118 	file_offsets = off;
119 	file_lengths = 0;
120 	total_blks_to_write = count*flat_buf->count;
121 	b_blks_wrote = 0;
122 
123 	/* allocate arrays according to max usage */
124 	if (total_blks_to_write > MAX_ARRAY_SIZE)
125 	    mem_list_count = MAX_ARRAY_SIZE;
126 	else mem_list_count = total_blks_to_write;
127 	mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size));
128 	mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int));
129 
130 	j = 0;
131 	/* step through each block in memory, filling memory arrays */
132 	while (b_blks_wrote < total_blks_to_write) {
133 	    for (i=0; i<flat_buf->count; i++) {
134 		mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] =
135 		    /* TODO: fix this warning by casting to an integer that's
136 		     * the same size as a char * and /then/ casting to
137 		     * PVFS_size */
138 		    ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]);
139 		mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] =
140 		    flat_buf->blocklens[i];
141 		file_lengths += flat_buf->blocklens[i];
142 		b_blks_wrote++;
143 		if (!(b_blks_wrote % MAX_ARRAY_SIZE) ||
144 		    (b_blks_wrote == total_blks_to_write)) {
145 
146 		    /* in the case of the last write list call,
147 		       adjust mem_list_count */
148 		    if (b_blks_wrote == total_blks_to_write) {
149 		        mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE;
150 			/* in case last write list call fills max arrays */
151 			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
152 		    }
153 		    err_flag = PVFS_Request_hindexed(mem_list_count,
154 						     mem_lengths, mem_offsets,
155 						     PVFS_BYTE, &mem_req);
156 		    /* --BEGIN ERROR HANDLING-- */
157 		    if (err_flag != 0) {
158 			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
159 							   MPIR_ERR_RECOVERABLE,
160 							   myname, __LINE__,
161 							   ADIOI_PVFS2_error_convert(err_flag),
162 							   "Error in PVFS_Request_hindexed (memory)", 0);
163 			break;
164 		    }
165 		    /* --END ERROR HANDLING-- */
166 
167 		    err_flag = PVFS_Request_contiguous(file_lengths,
168 						       PVFS_BYTE, &file_req);
169 		    /* --BEGIN ERROR HANDLING-- */
170 		    if (err_flag != 0) {
171 			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
172 							   MPIR_ERR_RECOVERABLE,
173 							   myname, __LINE__,
174 							   ADIOI_PVFS2_error_convert(err_flag),
175 							   "Error in PVFS_Request_contiguous (file)", 0);
176 			break;
177 		    }
178 		    /* --END ERROR HANDLING-- */
179 
180 #ifdef ADIOI_MPE_LOGGING
181                     MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
182 #endif
183 		    err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req,
184 					      file_offsets, PVFS_BOTTOM,
185 					      mem_req,
186 					      &(pvfs_fs->credentials),
187 					      &resp_io);
188 #ifdef ADIOI_MPE_LOGGING
189                     MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
190 #endif
191 		    total_bytes_written += resp_io.total_completed;
192 
193 		    /* in the case of error or the last write list call,
194 		     * leave here */
195 		    /* --BEGIN ERROR HANDLING-- */
196 		    if (err_flag) {
197 			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
198 							   MPIR_ERR_RECOVERABLE,
199 							   myname, __LINE__,
200 							   ADIOI_PVFS2_error_convert(err_flag),
201 							   "Error in PVFS_sys_write", 0);
202 			break;
203 		    }
204 		    /* --END ERROR HANDLING-- */
205 		    if (b_blks_wrote == total_blks_to_write) break;
206 
207 		    file_offsets += file_lengths;
208 		    file_lengths = 0;
209 		    PVFS_Request_free(&mem_req);
210 		    PVFS_Request_free(&file_req);
211 		}
212 	    } /* for (i=0; i<flat_buf->count; i++) */
213 	    j++;
214 	} /* while (b_blks_wrote < total_blks_to_write) */
215 	ADIOI_Free(mem_offsets);
216 	ADIOI_Free(mem_lengths);
217 
218 	if (file_ptr_type == ADIO_INDIVIDUAL)
219 	    fd->fp_ind += total_bytes_written;
220 
221 	if (!err_flag)  *error_code = MPI_SUCCESS;
222 
223 	fd->fp_sys_posn = -1;   /* clear this. */
224 
225 #ifdef HAVE_STATUS_SET_BYTES
226 	MPIR_Status_set_bytes(status, datatype, bufsize);
227 /* This is a temporary way of filling in status. The right way is to
228    keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
229 #endif
230 
231 	ADIOI_Delete_flattened(datatype);
232 	return;
233     } /* if (!buftype_is_contig && filetype_is_contig) */
234 
235     /* already know that file is noncontiguous from above */
236     /* noncontiguous in file */
237 
238 /* filetype already flattened in ADIO_Open */
239     flat_file = ADIOI_Flatlist;
240     while (flat_file->type != fd->filetype) flat_file = flat_file->next;
241 
242     disp = fd->disp;
243     initial_off = offset;
244 
245     /* for each case - ADIO_Individual pointer or explicit, find offset
246        (file offset in bytes), n_filetypes (how many filetypes into file
247        to start), fwr_size (remaining amount of data in present file
248        block), and st_index (start point in terms of blocks in starting
249        filetype) */
250     if (file_ptr_type == ADIO_INDIVIDUAL) {
251         offset = fd->fp_ind; /* in bytes */
252 	n_filetypes = -1;
253 	flag = 0;
254 	while (!flag) {
255 	    n_filetypes++;
256 	    for (i=0; i<flat_file->count; i++) {
257 	        if (disp + flat_file->indices[i] +
258 		    ((ADIO_Offset) n_filetypes)*filetype_extent +
259 		      flat_file->blocklens[i] >= offset) {
260 		  st_index = i;
261 		  fwr_size = disp + flat_file->indices[i] +
262 		    ((ADIO_Offset) n_filetypes)*filetype_extent
263 		    + flat_file->blocklens[i] - offset;
264 		  flag = 1;
265 		  break;
266 		}
267 	    }
268 	} /* while (!flag) */
269     } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
270     else {
271         n_etypes_in_filetype = filetype_size/etype_size;
272 	n_filetypes = (int) (offset / n_etypes_in_filetype);
273 	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
274 	size_in_filetype = etype_in_filetype * etype_size;
275 
276 	sum = 0;
277 	for (i=0; i<flat_file->count; i++) {
278 	    sum += flat_file->blocklens[i];
279 	    if (sum > size_in_filetype) {
280 	        st_index = i;
281 		fwr_size = sum - size_in_filetype;
282 		abs_off_in_filetype = flat_file->indices[i] +
283 		    size_in_filetype - (sum - flat_file->blocklens[i]);
284 		break;
285 	    }
286 	}
287 
288 	/* abs. offset in bytes in the file */
289 	offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
290 	    abs_off_in_filetype;
291     } /* else [file_ptr_type != ADIO_INDIVIDUAL] */
292 
293     start_off = offset;
294     st_fwr_size = fwr_size;
295     st_n_filetypes = n_filetypes;
296 
297     if (buftype_is_contig && !filetype_is_contig) {
298 
299 /* contiguous in memory, noncontiguous in file. should be the most
300    common case. */
301 
302         int mem_lengths;
303 	char *mem_offsets;
304 
305 	i = 0;
306 	j = st_index;
307 	off = offset;
308 	n_filetypes = st_n_filetypes;
309 
310 	mem_list_count = 1;
311 
312 	/* determine how many blocks in file to write */
313 	f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize);
314 	total_blks_to_write = 1;
315 	if (j < (flat_file->count -1)) j++;
316 	else {
317 	    j = 0;
318 	    n_filetypes++;
319 	}
320 	while (f_data_wrote < bufsize) {
321 	    f_data_wrote += flat_file->blocklens[j];
322 	    total_blks_to_write++;
323 	    if (j<(flat_file->count-1)) j++;
324 	    else j = 0;
325 	}
326 
327 	j = st_index;
328 	n_filetypes = st_n_filetypes;
329 	n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE;
330 	extra_blks = total_blks_to_write%MAX_ARRAY_SIZE;
331 
332 	mem_offsets = buf;
333 	mem_lengths = 0;
334 
335 	/* if at least one full writelist, allocate file arrays
336 	   at max array size and don't free until very end */
337 	if (n_write_lists) {
338 	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
339 						  sizeof(int64_t));
340 	    file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
341 						  sizeof(int32_t));
342 	}
343 	/* if there's no full writelist allocate file arrays according
344 	   to needed size (extra_blks) */
345 	else {
346 	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
347                                                   sizeof(int64_t));
348             file_lengths = (int32_t*)ADIOI_Malloc(extra_blks*
349                                                   sizeof(int32_t));
350         }
351 
352         /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
353         for (i=0; i<n_write_lists; i++) {
354             file_list_count = MAX_ARRAY_SIZE;
355             if(!i) {
356                 file_offsets[0] = offset;
357                 file_lengths[0] = st_fwr_size;
358                 mem_lengths = st_fwr_size;
359             }
360             for (k=0; k<MAX_ARRAY_SIZE; k++) {
361                 if (i || k) {
362                     file_offsets[k] = disp +
363 			((ADIO_Offset)n_filetypes)*filetype_extent
364 			+ flat_file->indices[j];
365                     file_lengths[k] = flat_file->blocklens[j];
366                     mem_lengths += file_lengths[k];
367                 }
368                 if (j<(flat_file->count - 1)) j++;
369                 else {
370                     j = 0;
371                     n_filetypes++;
372                 }
373             } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
374 
375 	    err_flag = PVFS_Request_contiguous(mem_lengths,
376 					       PVFS_BYTE, &mem_req);
377 	    /* --BEGIN ERROR HANDLING-- */
378 	    if (err_flag != 0) {
379 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
380 						   MPIR_ERR_RECOVERABLE,
381 						   myname, __LINE__,
382 						   ADIOI_PVFS2_error_convert(err_flag),
383 						   "Error in PVFS_Request_contiguous (memory)", 0);
384 		goto error_state;
385 	    }
386 	    /* --END ERROR HANDLING-- */
387 
388 	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
389 					     file_offsets, PVFS_BYTE,
390 					     &file_req);
391 	    /* --BEGIN ERROR HANDLING-- */
392 	    if (err_flag != 0) {
393 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
394 						   MPIR_ERR_RECOVERABLE,
395 						   myname, __LINE__,
396 						   ADIOI_PVFS2_error_convert(err_flag),
397 						   "Error in PVFS_Request_hindexed (file)", 0);
398 		goto error_state;
399 	    }
400 	    /* --END ERROR HANDLING-- */
401 
402 	    /* PVFS_Request_hindexed already expresses the offsets into the
403 	     * file, so we should not pass in an offset if we are using
404 	     * hindexed for the file type */
405 #ifdef ADIOI_MPE_LOGGING
406             MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
407 #endif
408 	    err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
409 				      mem_offsets, mem_req,
410 				      &(pvfs_fs->credentials), &resp_io);
411 #ifdef ADIOI_MPE_LOGGING
412             MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
413 #endif
414 	    /* --BEGIN ERROR HANDLING-- */
415 	    if (err_flag != 0) {
416 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
417 						   MPIR_ERR_RECOVERABLE,
418 						   myname, __LINE__,
419 						   ADIOI_PVFS2_error_convert(err_flag),
420 						   "Error in PVFS_sys_write", 0);
421 		goto error_state;
422 	    }
423 	    /* --END ERROR HANDLING-- */
424 	    total_bytes_written += resp_io.total_completed;
425 
426             mem_offsets += mem_lengths;
427             mem_lengths = 0;
428 	    PVFS_Request_free(&file_req);
429 	    PVFS_Request_free(&mem_req);
430 
431         } /* for (i=0; i<n_write_lists; i++) */
432 
433         /* for file arrays smaller than MAX_ARRAY_SIZE (last write_list call) */
434         if (extra_blks) {
435             file_list_count = extra_blks;
436             if(!i) {
437                 file_offsets[0] = offset;
438                 file_lengths[0] = ADIOI_MIN(st_fwr_size, bufsize);
439             }
440             for (k=0; k<extra_blks; k++) {
441                 if(i || k) {
442                     file_offsets[k] = disp +
443 			((ADIO_Offset)n_filetypes)*filetype_extent +
444 			flat_file->indices[j];
445                     if (k == (extra_blks - 1)) {
446                         file_lengths[k] = bufsize - (int32_t) mem_lengths
447                           - (int32_t) mem_offsets + (int32_t)  buf;
448                     }
449                     else file_lengths[k] = flat_file->blocklens[j];
450                 } /* if(i || k) */
451                 mem_lengths += file_lengths[k];
452                 if (j<(flat_file->count - 1)) j++;
453                 else {
454                     j = 0;
455                     n_filetypes++;
456                 }
457             } /* for (k=0; k<extra_blks; k++) */
458 
459 	    err_flag = PVFS_Request_contiguous(mem_lengths,
460 					       PVFS_BYTE, &mem_req);
461 	    /* --BEGIN ERROR HANDLING-- */
462 	    if (err_flag != 0) {
463 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
464 						   MPIR_ERR_RECOVERABLE,
465 						   myname, __LINE__,
466 						   ADIOI_PVFS2_error_convert(err_flag),
467 						   "Error in PVFS_Request_contiguous (memory)", 0);
468 		goto error_state;
469 	    }
470 	    /* --END ERROR HANDLING-- */
471 
472 	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
473 					     file_offsets, PVFS_BYTE,
474 					     &file_req);
475 	    /* --BEGIN ERROR HANDLING-- */
476 	    if (err_flag != 0) {
477 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
478 						   MPIR_ERR_RECOVERABLE,
479 						   myname, __LINE__,
480 						   ADIOI_PVFS2_error_convert(err_flag),
481 						   "Error in PVFS_Request_hindexed(file)", 0);
482 		goto error_state;
483 	    }
484 	    /* --END ERROR HANDLING-- */
485 
486 	    /* as above, use 0 for 'offset' when using hindexed file type*/
487 #ifdef ADIOI_MPE_LOGGING
488             MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
489 #endif
490 	    err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
491 				      mem_offsets, mem_req,
492 				      &(pvfs_fs->credentials), &resp_io);
493 #ifdef ADIOI_MPE_LOGGING
494             MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
495 #endif
496 	    /* --BEGIN ERROR HANDLING-- */
497 	    if (err_flag != 0) {
498 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
499 						   MPIR_ERR_RECOVERABLE,
500 						   myname, __LINE__,
501 						   ADIOI_PVFS2_error_convert(err_flag),
502 						   "Error in PVFS_sys_write", 0);
503 		goto error_state;
504 	    }
505 	    /* --END ERROR HANDLING-- */
506 	    total_bytes_written += resp_io.total_completed;
507 	    PVFS_Request_free(&mem_req);
508 	    PVFS_Request_free(&file_req);
509         }
510     }
511     else {
512         /* noncontiguous in memory as well as in file */
513 
514         ADIOI_Flatten_datatype(datatype);
515 	flat_buf = ADIOI_Flatlist;
516 	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
517 
518 	size_wrote = 0;
519 	n_filetypes = st_n_filetypes;
520 	fwr_size = st_fwr_size;
521 	bwr_size = flat_buf->blocklens[0];
522 	buf_count = 0;
523 	start_mem_offset = 0;
524 	start_k = k = 0;
525 	start_j = st_index;
526 	max_mem_list = 0;
527 	max_file_list = 0;
528 
529 	/* run through and file max_file_list and max_mem_list so that you
530 	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
531 	   if possible */
532 
533 	while (size_wrote < bufsize) {
534 	    k = start_k;
535 	    new_buffer_write = 0;
536 	    mem_list_count = 0;
537 	    while ((mem_list_count < MAX_ARRAY_SIZE) &&
538 		   (new_buffer_write < bufsize-size_wrote)) {
539 	        /* find mem_list_count and file_list_count such that both are
540 		   less than MAX_ARRAY_SIZE, the sum of their lengths are
541 		   equal, and the sum of all the data written and data to be
542 		   written in the next immediate write list is less than
543 		   bufsize */
544 	        if(mem_list_count) {
545 		    if((new_buffer_write + flat_buf->blocklens[k] +
546 			size_wrote) > bufsize) {
547 		        end_bwr_size = new_buffer_write +
548 			    flat_buf->blocklens[k] - (bufsize - size_wrote);
549 			new_buffer_write = bufsize - size_wrote;
550 		    }
551 		    else {
552 		        new_buffer_write += flat_buf->blocklens[k];
553 			end_bwr_size = flat_buf->blocklens[k];
554 		    }
555 		}
556 		else {
557 		    if (bwr_size > (bufsize - size_wrote)) {
558 		        new_buffer_write = bufsize - size_wrote;
559 			bwr_size = new_buffer_write;
560 		    }
561 		    else new_buffer_write = bwr_size;
562 		}
563 		mem_list_count++;
564 		k = (k + 1)%flat_buf->count;
565 	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
566 	       (new_buffer_write < bufsize-size_wrote)) */
567 	    j = start_j;
568 	    new_file_write = 0;
569 	    file_list_count = 0;
570 	    while ((file_list_count < MAX_ARRAY_SIZE) &&
571 		   (new_file_write < new_buffer_write)) {
572 	        if(file_list_count) {
573 		    if((new_file_write + flat_file->blocklens[j]) >
574 		       new_buffer_write) {
575 		        end_fwr_size = new_buffer_write - new_file_write;
576 			new_file_write = new_buffer_write;
577 			j--;
578 		    }
579 		    else {
580 		        new_file_write += flat_file->blocklens[j];
581 			end_fwr_size = flat_file->blocklens[j];
582 		    }
583 		}
584 		else {
585 		    if (fwr_size > new_buffer_write) {
586 		        new_file_write = new_buffer_write;
587 			fwr_size = new_file_write;
588 		    }
589 		    else new_file_write = fwr_size;
590 		}
591 		file_list_count++;
592 		if (j < (flat_file->count - 1)) j++;
593 		else j = 0;
594 
595 		k = start_k;
596 		if ((new_file_write < new_buffer_write) &&
597 		    (file_list_count == MAX_ARRAY_SIZE)) {
598 		    new_buffer_write = 0;
599 		    mem_list_count = 0;
600 		    while (new_buffer_write < new_file_write) {
601 		        if(mem_list_count) {
602 			    if((new_buffer_write + flat_buf->blocklens[k]) >
603 			       new_file_write) {
604 			        end_bwr_size = new_file_write -
605 				    new_buffer_write;
606 				new_buffer_write = new_file_write;
607 				k--;
608 			    }
609 			    else {
610 			        new_buffer_write += flat_buf->blocklens[k];
611 				end_bwr_size = flat_buf->blocklens[k];
612 			    }
613 			}
614 			else {
615 			    new_buffer_write = bwr_size;
616 			    if (bwr_size > (bufsize - size_wrote)) {
617 			        new_buffer_write = bufsize - size_wrote;
618 				bwr_size = new_buffer_write;
619 			    }
620 			}
621 			mem_list_count++;
622 			k = (k + 1)%flat_buf->count;
623 		    } /* while (new_buffer_write < new_file_write) */
624 		} /* if ((new_file_write < new_buffer_write) &&
625 		     (file_list_count == MAX_ARRAY_SIZE)) */
626 	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
627 		 (new_buffer_write < bufsize-size_wrote)) */
628 
629 	    /*  fakes filling the writelist arrays of lengths found above  */
630 	    k = start_k;
631 	    j = start_j;
632 	    for (i=0; i<mem_list_count; i++) {
633 		if(i) {
634 		    if (i == (mem_list_count - 1)) {
635 			if (flat_buf->blocklens[k] == end_bwr_size)
636 			    bwr_size = flat_buf->blocklens[(k+1)%
637 							  flat_buf->count];
638 			else {
639 			    bwr_size = flat_buf->blocklens[k] - end_bwr_size;
640 			    k--;
641 			    buf_count--;
642 			}
643 		    }
644 		}
645 		buf_count++;
646 		k = (k + 1)%flat_buf->count;
647 	    } /* for (i=0; i<mem_list_count; i++) */
648 	    for (i=0; i<file_list_count; i++) {
649 		if (i) {
650 		    if (i == (file_list_count - 1)) {
651 			if (flat_file->blocklens[j] == end_fwr_size)
652 			    fwr_size = flat_file->blocklens[(j+1)%
653 							  flat_file->count];
654 			else {
655 			    fwr_size = flat_file->blocklens[j] - end_fwr_size;
656 			    j--;
657 			}
658 		    }
659 		}
660 		if (j < flat_file->count - 1) j++;
661 		else {
662 		    j = 0;
663 		    n_filetypes++;
664 		}
665 	    } /* for (i=0; i<file_list_count; i++) */
666 	    size_wrote += new_buffer_write;
667 	    start_k = k;
668 	    start_j = j;
669 	    if (max_mem_list < mem_list_count)
670 	        max_mem_list = mem_list_count;
671 	    if (max_file_list < file_list_count)
672 	        max_file_list = file_list_count;
673 	} /* while (size_wrote < bufsize) */
674 
675 	/* one last check before we actually carry out the operation:
676 	 * this code has hard-to-fix bugs when a noncontiguous file type has
677 	 * such large pieces that the sum of the lengths of the memory type is
678 	 * not larger than one of those pieces (and vice versa for large memory
679 	 * types and many pices of file types.  In these cases, give up and
680 	 * fall back to naive reads and writes.  The testphdf5 test created a
681 	 * type with two very large memory regions and 600 very small file
682 	 * regions.  The same test also created a type with one very large file
683 	 * region and many (700) very small memory regions.  both cases caused
684 	 * problems for this code */
685 
686 	if ( ( (file_list_count == 1) &&
687 		    (new_file_write < flat_file->blocklens[0] ) ) ||
688 		((mem_list_count == 1) &&
689 		    (new_buffer_write < flat_buf->blocklens[0]) ) ||
690 		((file_list_count == MAX_ARRAY_SIZE) &&
691 		    (new_file_write < flat_buf->blocklens[0]) ) ||
692 		( (mem_list_count == MAX_ARRAY_SIZE) &&
693 		    (new_buffer_write < flat_file->blocklens[0])) )
694 	{
695 	    ADIOI_Delete_flattened(datatype);
696 	    ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype,
697 		    file_ptr_type, initial_off, status, error_code);
698 	    return;
699 	}
700 
701 
702 	mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size));
703 	mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int));
704 	file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t));
705 	file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t));
706 
707 	size_wrote = 0;
708 	n_filetypes = st_n_filetypes;
709 	fwr_size = st_fwr_size;
710 	bwr_size = flat_buf->blocklens[0];
711 	buf_count = 0;
712 	start_mem_offset = 0;
713 	start_k = k = 0;
714 	start_j = st_index;
715 
716 	/*  this section calculates mem_list_count and file_list_count
717 	    and also finds the possibly odd sized last array elements
718 	    in new_fwr_size and new_bwr_size  */
719 
720 	while (size_wrote < bufsize) {
721 	    k = start_k;
722 	    new_buffer_write = 0;
723 	    mem_list_count = 0;
724 	    while ((mem_list_count < MAX_ARRAY_SIZE) &&
725 		   (new_buffer_write < bufsize-size_wrote)) {
726 	        /* find mem_list_count and file_list_count such that both are
727 		   less than MAX_ARRAY_SIZE, the sum of their lengths are
728 		   equal, and the sum of all the data written and data to be
729 		   written in the next immediate write list is less than
730 		   bufsize */
731 	        if(mem_list_count) {
732 		    if((new_buffer_write + flat_buf->blocklens[k] +
733 			size_wrote) > bufsize) {
734 		        end_bwr_size = new_buffer_write +
735 			    flat_buf->blocklens[k] - (bufsize - size_wrote);
736 			new_buffer_write = bufsize - size_wrote;
737 		    }
738 		    else {
739 		        new_buffer_write += flat_buf->blocklens[k];
740 			end_bwr_size = flat_buf->blocklens[k];
741 		    }
742 		}
743 		else {
744 		    if (bwr_size > (bufsize - size_wrote)) {
745 		        new_buffer_write = bufsize - size_wrote;
746 			bwr_size = new_buffer_write;
747 		    }
748 		    else new_buffer_write = bwr_size;
749 		}
750 		mem_list_count++;
751 		k = (k + 1)%flat_buf->count;
752 	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
753 	       (new_buffer_write < bufsize-size_wrote)) */
754 	    j = start_j;
755 	    new_file_write = 0;
756 	    file_list_count = 0;
757 	    while ((file_list_count < MAX_ARRAY_SIZE) &&
758 		   (new_file_write < new_buffer_write)) {
759 	        if(file_list_count) {
760 		    if((new_file_write + flat_file->blocklens[j]) >
761 		       new_buffer_write) {
762 		        end_fwr_size = new_buffer_write - new_file_write;
763 			new_file_write = new_buffer_write;
764 			j--;
765 		    }
766 		    else {
767 		        new_file_write += flat_file->blocklens[j];
768 			end_fwr_size = flat_file->blocklens[j];
769 		    }
770 		}
771 		else {
772 		    if (fwr_size > new_buffer_write) {
773 		        new_file_write = new_buffer_write;
774 			fwr_size = new_file_write;
775 		    }
776 		    else new_file_write = fwr_size;
777 		}
778 		file_list_count++;
779 		if (j < (flat_file->count - 1)) j++;
780 		else j = 0;
781 
782 		k = start_k;
783 		if ((new_file_write < new_buffer_write) &&
784 		    (file_list_count == MAX_ARRAY_SIZE)) {
785 		    new_buffer_write = 0;
786 		    mem_list_count = 0;
787 		    while (new_buffer_write < new_file_write) {
788 		        if(mem_list_count) {
789 			    if((new_buffer_write + flat_buf->blocklens[k]) >
790 			       new_file_write) {
791 			        end_bwr_size = new_file_write -
792 				  new_buffer_write;
793 				new_buffer_write = new_file_write;
794 				k--;
795 			    }
796 			    else {
797 			        new_buffer_write += flat_buf->blocklens[k];
798 				end_bwr_size = flat_buf->blocklens[k];
799 			    }
800 			}
801 			else {
802 			    new_buffer_write = bwr_size;
803 			    if (bwr_size > (bufsize - size_wrote)) {
804 			        new_buffer_write = bufsize - size_wrote;
805 				bwr_size = new_buffer_write;
806 			    }
807 			}
808 			mem_list_count++;
809 			k = (k + 1)%flat_buf->count;
810 		    } /* while (new_buffer_write < new_file_write) */
811 		} /* if ((new_file_write < new_buffer_write) &&
812 		     (file_list_count == MAX_ARRAY_SIZE)) */
813 	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
814 		 (new_buffer_write < bufsize-size_wrote)) */
815 
816 	    /*  fills the allocated writelist arrays  */
817 	    k = start_k;
818 	    j = start_j;
819 	    for (i=0; i<mem_list_count; i++) {
820 		/* TODO: fix this warning by casting to an integer that's the
821 		 * same size as a char * and /then/ casting to PVFS_size */
822 	        mem_offsets[i] = ((PVFS_size)buf + buftype_extent*
823 				  (buf_count/flat_buf->count) +
824 				  (int)flat_buf->indices[k]);
825 
826 		if(!i) {
827 		    mem_lengths[0] = bwr_size;
828 		    mem_offsets[0] += flat_buf->blocklens[k] - bwr_size;
829 		}
830 		else {
831 		    if (i == (mem_list_count - 1)) {
832 		        mem_lengths[i] = end_bwr_size;
833 			if (flat_buf->blocklens[k] == end_bwr_size)
834 			    bwr_size = flat_buf->blocklens[(k+1)%
835 							  flat_buf->count];
836 			else {
837 			    bwr_size = flat_buf->blocklens[k] - end_bwr_size;
838 			    k--;
839 			    buf_count--;
840 			}
841 		    }
842 		    else {
843 		        mem_lengths[i] = flat_buf->blocklens[k];
844 		    }
845 		}
846 		buf_count++;
847 		k = (k + 1)%flat_buf->count;
848 	    } /* for (i=0; i<mem_list_count; i++) */
849 	    for (i=0; i<file_list_count; i++) {
850 	        file_offsets[i] = disp + flat_file->indices[j] +
851 		    ((ADIO_Offset)n_filetypes) * filetype_extent;
852 	        if (!i) {
853 		    file_lengths[0] = fwr_size;
854 		    file_offsets[0] += flat_file->blocklens[j] - fwr_size;
855 		}
856 		else {
857 		    if (i == (file_list_count - 1)) {
858 		        file_lengths[i] = end_fwr_size;
859 			if (flat_file->blocklens[j] == end_fwr_size)
860 			    fwr_size = flat_file->blocklens[(j+1)%
861 							  flat_file->count];
862 			else {
863 			    fwr_size = flat_file->blocklens[j] - end_fwr_size;
864 			    j--;
865 			}
866 		    }
867 		    else file_lengths[i] = flat_file->blocklens[j];
868 		}
869 		if (j < flat_file->count - 1) j++;
870 		else {
871 		    j = 0;
872 		    n_filetypes++;
873 		}
874 	    } /* for (i=0; i<file_list_count; i++) */
875 
876 	    err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths,
877 					     mem_offsets, PVFS_BYTE, &mem_req);
878 	    /* --BEGIN ERROR HANDLING-- */
879 	    if (err_flag != 0 ) {
880 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
881 						   MPIR_ERR_RECOVERABLE,
882 						   myname, __LINE__,
883 						   ADIOI_PVFS2_error_convert(err_flag),
884 						   "Error in PVFS_Request_hindexed (memory)", 0);
885 		goto error_state;
886 	    }
887 	    /* --END ERROR HANDLING-- */
888 
889 	    err_flag = PVFS_Request_hindexed(file_list_count, file_lengths,
890 					     file_offsets, PVFS_BYTE,
891 					     &file_req);
892 	    /* --BEGIN ERROR HANDLING-- */
893 	    if (err_flag != 0) {
894 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
895 						   MPIR_ERR_RECOVERABLE,
896 						   myname, __LINE__,
897 						   ADIOI_PVFS2_error_convert(err_flag),
898 						   "Error in PVFS_Request_hindexed", 0);
899 		goto error_state;
900 	    }
901 	    /* --END ERROR HANDLING-- */
902 
903 	    /* offset will be expressed in memory and file datatypes */
904 
905 #ifdef ADIOI_MPE_LOGGING
906             MPE_Log_event( ADIOI_MPE_write_a, 0, NULL );
907 #endif
908 	    err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0,
909 				      PVFS_BOTTOM, mem_req,
910 				      &(pvfs_fs->credentials), &resp_io);
911 #ifdef ADIOI_MPE_LOGGING
912             MPE_Log_event( ADIOI_MPE_write_b, 0, NULL );
913 #endif
914 	    /* --BEGIN ERROR HANDLING-- */
915 	    if (err_flag != 0) {
916 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
917 						   MPIR_ERR_RECOVERABLE,
918 						   myname, __LINE__,
919 						   ADIOI_PVFS2_error_convert(err_flag),
920 						   "Error in PVFS_sys_write", 0);
921 		goto error_state;
922 	    }
923 	    /* --END ERROR HANDLING-- */
924 
925 	    size_wrote += new_buffer_write;
926 	    total_bytes_written += resp_io.total_completed;
927 	    start_k = k;
928 	    start_j = j;
929 	    PVFS_Request_free(&mem_req);
930 	    PVFS_Request_free(&file_req);
931 	} /* while (size_wrote < bufsize) */
932 	ADIOI_Free(mem_offsets);
933 	ADIOI_Free(mem_lengths);
934     }
935     /* when incrementing fp_ind, need to also take into account the file type:
936      * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
937      * if we wrote N elements, offset needs to point at beginning of type, not
938      * at empty region at offset N+1).
939      *
940      * As we discussed on mpich-discuss in may/june 2009, the code below might
941      * look wierd, but by putting fp_ind at the last byte written, the next
942      * time we run through the strided code we'll update the fp_ind to the
943      * right location. */
944     if (file_ptr_type == ADIO_INDIVIDUAL) {
945 	fd->fp_ind = file_offsets[file_list_count-1]+
946 	    file_lengths[file_list_count-1];
947     }
948     ADIOI_Free(file_offsets);
949     ADIOI_Free(file_lengths);
950 
951     *error_code = MPI_SUCCESS;
952 
953 error_state:
954     fd->fp_sys_posn = -1;   /* set it to null. */
955 
956 #ifdef HAVE_STATUS_SET_BYTES
957     MPIR_Status_set_bytes(status, datatype, bufsize);
958 /* This is a temporary way of filling in status. The right way is to
959    keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */
960 #endif
961 
962     if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
963 }
964