1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*-
2  * vim: ts=8 sts=4 sw=4 noexpandtab
3  *
4  *   Copyright (C) 2008 University of Chicago.
5  *   See COPYRIGHT notice in top-level directory.
6  */
7 
8 #include "adio.h"
9 #include "adio_extern.h"
10 #include "ad_zoidfs.h"
11 
12 #include "ad_zoidfs_common.h"
13 
14 /* Copied from ADIOI_PVFS2_OldReadStrided.  It would be good to have fewer
15  * copies of this code... */
ADIOI_ZOIDFS_ReadStrided(ADIO_File fd,void * buf,int count,MPI_Datatype datatype,int file_ptr_type,ADIO_Offset offset,ADIO_Status * status,int * error_code)16 void ADIOI_ZOIDFS_ReadStrided(ADIO_File fd, void *buf, int count,
17 			     MPI_Datatype datatype, int file_ptr_type,
18 			     ADIO_Offset offset, ADIO_Status *status, int
19 			     *error_code)
20 {
21     /* offset is in units of etype relative to the filetype. */
22     ADIOI_Flatlist_node *flat_buf, *flat_file;
23     int i, j, k,  brd_size, frd_size=0, st_index=0;
24     int sum, n_etypes_in_filetype, size_in_filetype;
25     MPI_Count bufsize;
26     int n_filetypes, etype_in_filetype;
27     ADIO_Offset abs_off_in_filetype=0;
28     MPI_Count filetype_size, etype_size, buftype_size;
29     MPI_Aint filetype_extent, buftype_extent;
30     int buf_count, buftype_is_contig, filetype_is_contig;
31     ADIO_Offset off, disp, start_off, initial_off;
32     int flag, st_frd_size, st_n_filetypes;
33 
34     size_t mem_list_count, file_list_count;
35     void ** mem_offsets;
36     uint64_t *file_offsets;
37     size_t *mem_lengths;
38     uint64_t *file_lengths;
39     int total_blks_to_read;
40 
41     int max_mem_list, max_file_list;
42 
43     int b_blks_read;
44     int f_data_read;
45     int size_read=0, n_read_lists, extra_blks;
46 
47     int end_brd_size, end_frd_size;
48     int start_k, start_j, new_file_read, new_buffer_read;
49     int start_mem_offset;
50     ADIOI_ZOIDFS_object * zoidfs_obj_ptr;
51     int err_flag=0;
52     MPI_Offset total_bytes_read = 0;
53     static char myname[] = "ADIOI_ZOIDFS_ReadStrided";
54 
55     /* note: I don't know what zoidfs will do if you pass it a super-long list,
56      * so let's keep with the PVFS limit for now */
57 #define MAX_ARRAY_SIZE 64
58 
59     *error_code = MPI_SUCCESS;  /* changed below if error */
60 
61     ADIOI_Datatype_iscontig(datatype, &buftype_is_contig);
62     ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig);
63 
64     /* the HDF5 tests showed a bug in this list processing code (see many many
65      * lines down below).  We added a workaround, but common HDF5 file types
66      * are actually contiguous and do not need the expensive workarond */
67     if (!filetype_is_contig) {
68 	flat_file = ADIOI_Flatlist;
69 	while (flat_file->type != fd->filetype) flat_file = flat_file->next;
70 	if (flat_file->count == 1 && !buftype_is_contig)
71 	    filetype_is_contig = 1;
72     }
73 
74     MPI_Type_size_x(fd->filetype, &filetype_size);
75     if ( ! filetype_size ) {
76 #ifdef HAVE_STATUS_SET_BYTES
77 	MPIR_Status_set_bytes(status, datatype, 0);
78 #endif
79 	*error_code = MPI_SUCCESS;
80 	return;
81     }
82 
83     MPI_Type_extent(fd->filetype, &filetype_extent);
84     MPI_Type_size_x(datatype, &buftype_size);
85     MPI_Type_extent(datatype, &buftype_extent);
86     etype_size = fd->etype_size;
87 
88     bufsize = buftype_size * count;
89 
90     zoidfs_obj_ptr = (ADIOI_ZOIDFS_object *)fd->fs_ptr;
91 
92     if (!buftype_is_contig && filetype_is_contig) {
93 
94 /* noncontiguous in memory, contiguous in file. */
95         uint64_t file_offsets;
96 	uint64_t file_lengths;
97 
98 	ADIOI_Flatten_datatype(datatype);
99 	flat_buf = ADIOI_Flatlist;
100 	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
101 
102 	off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind :
103 	    fd->disp + etype_size * offset;
104 
105 	file_list_count = 1;
106 	file_offsets = off;
107 	file_lengths = 0;
108 	total_blks_to_read = count*flat_buf->count;
109 	b_blks_read = 0;
110 
111 	/* allocate arrays according to max usage */
112 	if (total_blks_to_read > MAX_ARRAY_SIZE)
113 	    mem_list_count = MAX_ARRAY_SIZE;
114 	else mem_list_count = total_blks_to_read;
115 	mem_offsets = (void*)ADIOI_Malloc(mem_list_count*sizeof(void*));
116 	mem_lengths = (size_t*)ADIOI_Malloc(mem_list_count*sizeof(size_t));
117 
118 	/* TODO: CHECK RESULTS OF MEMORY ALLOCATION */
119 
120 	j = 0;
121 	/* step through each block in memory, filling memory arrays */
122 	while (b_blks_read < total_blks_to_read) {
123 	    for (i=0; i<flat_buf->count; i++) {
124 		mem_offsets[b_blks_read % MAX_ARRAY_SIZE] =
125 		    buf + j*buftype_extent + flat_buf->indices[i];
126 		mem_lengths[b_blks_read % MAX_ARRAY_SIZE] =
127 		    flat_buf->blocklens[i];
128 		file_lengths += flat_buf->blocklens[i];
129 		b_blks_read++;
130 		if (!(b_blks_read % MAX_ARRAY_SIZE) ||
131 		    (b_blks_read == total_blks_to_read)) {
132 
133 		    /* in the case of the last read list call,
134 		       adjust mem_list_count */
135 		    if (b_blks_read == total_blks_to_read) {
136 		        mem_list_count = total_blks_to_read % MAX_ARRAY_SIZE;
137 			/* in case last read list call fills max arrays */
138 			if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE;
139 		    }
140 #ifdef ADIOI_MPE_LOGGING
141                     MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
142 #endif
143 		    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
144 				    zoidfs_read(zoidfs_obj_ptr,
145 					    mem_list_count,
146 					    mem_offsets, mem_lengths,
147 					    1, &file_offsets, &file_lengths, ZOIDFS_NO_OP_HINT));
148 #ifdef ADIOI_MPE_LOGGING
149                     MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
150 #endif
151 		    /* --BEGIN ERROR HANDLING-- */
152 		    if (err_flag != ZFS_OK) {
153 			*error_code = MPIO_Err_create_code(MPI_SUCCESS,
154 							   MPIR_ERR_RECOVERABLE,
155 							   myname, __LINE__,
156 							   ADIOI_ZOIDFS_error_convert(err_flag),
157 							   "Error in zoidfs_read", 0);
158 			goto error_state;
159 		    }
160 		    total_bytes_read += file_lengths;
161 		    /* --END ERROR HANDLING-- */
162 
163 		    /* in the case of error or the last read list call,
164 		     * leave here */
165 		    if (err_flag || b_blks_read == total_blks_to_read) break;
166 
167 		    file_offsets += file_lengths;
168 		    file_lengths = 0;
169 		}
170 	    } /* for (i=0; i<flat_buf->count; i++) */
171 	    j++;
172 	} /* while (b_blks_read < total_blks_to_read) */
173 	ADIOI_Free(mem_offsets);
174 	ADIOI_Free(mem_lengths);
175 
176         if (file_ptr_type == ADIO_INDIVIDUAL)
177 	    fd->fp_ind += total_bytes_read;
178 
179 	fd->fp_sys_posn = -1;  /* set it to null. */
180 
181 #ifdef HAVE_STATUS_SET_BYTES
182 	MPIR_Status_set_bytes(status, datatype, bufsize);
183 	/* This isa temporary way of filling in status.  The right way is to
184 	   keep tracke of how much data was actually read adn placed in buf
185 	   by ADIOI_BUFFERED_READ. */
186 #endif
187 	ADIOI_Delete_flattened(datatype);
188 
189 	return;
190     } /* if (!buftype_is_contig && filetype_is_contig) */
191 
192     /* know file is noncontiguous from above */
193     /* noncontiguous in file */
194 
195     /* filetype already flattened in ADIO_Open */
196     flat_file = ADIOI_Flatlist;
197     while (flat_file->type != fd->filetype) flat_file = flat_file->next;
198 
199     disp = fd->disp;
200     initial_off = offset;
201 
202 
203     /* for each case - ADIO_Individual pointer or explicit, find the file
204        offset in bytes (offset), n_filetypes (how many filetypes into
205        file to start), frd_size (remaining amount of data in present
206        file block), and st_index (start point in terms of blocks in
207        starting filetype) */
208     if (file_ptr_type == ADIO_INDIVIDUAL) {
209         offset = fd->fp_ind; /* in bytes */
210 	n_filetypes = -1;
211 	flag = 0;
212 	while (!flag) {
213 	    n_filetypes++;
214 	    for (i=0; i<flat_file->count; i++) {
215 	        if (disp + flat_file->indices[i] +
216 		    ((ADIO_Offset) n_filetypes)*filetype_extent +
217 		    flat_file->blocklens[i]  >= offset) {
218 		    st_index = i;
219 		    frd_size = disp + flat_file->indices[i] +
220 				    ((ADIO_Offset) n_filetypes)*filetype_extent
221 				      + flat_file->blocklens[i] - offset;
222 		    flag = 1;
223 		    break;
224 		}
225 	    }
226 	} /* while (!flag) */
227     } /* if (file_ptr_type == ADIO_INDIVIDUAL) */
228     else {
229         n_etypes_in_filetype = filetype_size/etype_size;
230 	n_filetypes = (int) (offset / n_etypes_in_filetype);
231 	etype_in_filetype = (int) (offset % n_etypes_in_filetype);
232 	size_in_filetype = etype_in_filetype * etype_size;
233 
234 	sum = 0;
235 	for (i=0; i<flat_file->count; i++) {
236 	    sum += flat_file->blocklens[i];
237 	    if (sum > size_in_filetype) {
238 	        st_index = i;
239 		frd_size = sum - size_in_filetype;
240 		abs_off_in_filetype = flat_file->indices[i] +
241 		    size_in_filetype - (sum - flat_file->blocklens[i]);
242 		break;
243 	    }
244 	}
245 
246 	/* abs. offset in bytes in the file */
247 	offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent +
248 	    abs_off_in_filetype;
249     } /* else [file_ptr_type != ADIO_INDIVIDUAL] */
250 
251     start_off = offset;
252     st_frd_size = frd_size;
253     st_n_filetypes = n_filetypes;
254 
255     if (buftype_is_contig && !filetype_is_contig) {
256 
257 /* contiguous in memory, noncontiguous in file. should be the most
258    common case. */
259 
260 	/* only one memory off-len pair, so no array here */
261         size_t mem_lengths;
262 	size_t mem_offsets;
263 
264 	i = 0;
265 	j = st_index;
266 	n_filetypes = st_n_filetypes;
267 
268 	mem_list_count = 1;
269 
270 	/* determine how many blocks in file to read */
271 	f_data_read = ADIOI_MIN(st_frd_size, bufsize);
272 	total_blks_to_read = 1;
273 	if (j < (flat_file->count-1)) j++;
274 	else {
275 	    j = 0;
276 	    n_filetypes++;
277 	}
278 	while (f_data_read < bufsize) {
279 	    f_data_read += flat_file->blocklens[j];
280 	    total_blks_to_read++;
281 	    if (j<(flat_file->count-1)) j++;
282 	    else j = 0;
283 	}
284 
285 	j = st_index;
286 	n_filetypes = st_n_filetypes;
287 	n_read_lists = total_blks_to_read/MAX_ARRAY_SIZE;
288 	extra_blks = total_blks_to_read%MAX_ARRAY_SIZE;
289 
290 	mem_offsets = (size_t)buf;
291 	mem_lengths = 0;
292 
293 	/* if at least one full readlist, allocate file arrays
294 	   at max array size and don't free until very end */
295 	if (n_read_lists) {
296 	    file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
297 						  sizeof(int64_t));
298 	    file_lengths = (uint64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE*
299 						  sizeof(uint64_t));
300 	}
301 	/* if there's no full readlist allocate file arrays according
302 	   to needed size (extra_blks) */
303 	else {
304 	    file_offsets = (int64_t*)ADIOI_Malloc(extra_blks*
305 						  sizeof(int64_t));
306 	    file_lengths = (uint64_t*)ADIOI_Malloc(extra_blks*
307 						  sizeof(uint64_t));
308 	}
309 
310 	/* for file arrays that are of MAX_ARRAY_SIZE, build arrays */
311 	for (i=0; i<n_read_lists; i++) {
312 	    file_list_count = MAX_ARRAY_SIZE;
313 	    if(!i) {
314 	        file_offsets[0] = offset;
315 		file_lengths[0] = st_frd_size;
316 		mem_lengths = st_frd_size;
317 	    }
318 	    for (k=0; k<MAX_ARRAY_SIZE; k++) {
319 	        if (i || k) {
320 		    file_offsets[k] = disp +
321 			((ADIO_Offset)n_filetypes)*filetype_extent
322 		      + flat_file->indices[j];
323 		    file_lengths[k] = flat_file->blocklens[j];
324 		    mem_lengths += file_lengths[k];
325 		}
326 		if (j<(flat_file->count - 1)) j++;
327 		else {
328 		    j = 0;
329 		    n_filetypes++;
330 		}
331 	    } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */
332 	    /* --END ERROR HANDLING-- */
333 #ifdef ADIOI_MPE_LOGGING
334             MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
335 #endif
336 	    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
337 			    zoidfs_read(zoidfs_obj_ptr,
338 				    1, buf, &mem_lengths,
339 				    file_list_count,
340 				    file_offsets, file_lengths, ZOIDFS_NO_OP_HINT));
341 #ifdef ADIOI_MPE_LOGGING
342             MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
343 #endif
344 	    /* --BEGIN ERROR HANDLING-- */
345 	    if (err_flag != ZFS_OK) {
346 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
347 						   MPIR_ERR_RECOVERABLE,
348 						   myname, __LINE__,
349 						   ADIOI_ZOIDFS_error_convert(err_flag),
350 						   "Error in zoidfs_read", 0);
351 		goto error_state;
352 	    }
353 	    /* --END ERROR HANDING-- */
354 	    total_bytes_read += mem_lengths;
355 
356 	    mem_offsets += mem_lengths;
357 	    mem_lengths = 0;
358 	} /* for (i=0; i<n_read_lists; i++) */
359 
360 	/* for file arrays smaller than MAX_ARRAY_SIZE (last read_list call) */
361 	if (extra_blks) {
362 	    file_list_count = extra_blks;
363 	    if(!i) {
364 	        file_offsets[0] = offset;
365 		file_lengths[0] = ADIOI_MIN(st_frd_size, bufsize);
366 	    }
367 	    for (k=0; k<extra_blks; k++) {
368 	        if(i || k) {
369 		    file_offsets[k] = disp +
370 			((ADIO_Offset)n_filetypes)*filetype_extent +
371 			flat_file->indices[j];
372 		    if (k == (extra_blks - 1)) {
373 		        file_lengths[k] = bufsize - mem_lengths
374 			  - mem_offsets + (size_t)buf;
375 		    }
376 		    else file_lengths[k] = flat_file->blocklens[j];
377 		} /* if(i || k) */
378 		mem_lengths += file_lengths[k];
379 		if (j<(flat_file->count - 1)) j++;
380 		else {
381 		    j = 0;
382 		    n_filetypes++;
383 		}
384 	    } /* for (k=0; k<extra_blks; k++) */
385 #ifdef ADIOI_MPE_LOGGING
386             MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
387 #endif
388 	    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
389 			    zoidfs_read(zoidfs_obj_ptr, 1,
390 				   (void **)&mem_offsets,
391 				   &mem_lengths,
392 				   file_list_count,
393 				   file_offsets, file_lengths, ZOIDFS_NO_OP_HINT));
394 #ifdef ADIOI_MPE_LOGGING
395             MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
396 #endif
397 	    /* --BEGIN ERROR HANDLING-- */
398 	    if (err_flag != 0) {
399 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
400 						   MPIR_ERR_RECOVERABLE,
401 						   myname, __LINE__,
402 						   ADIOI_ZOIDFS_error_convert(err_flag),
403 						   "Error in zoidfs_read", 0);
404 		goto error_state;
405 	    }
406 	    /* --END ERROR HANDLING-- */
407 	    total_bytes_read += mem_lengths;
408 	}
409     }
410     else {
411 /* noncontiguous in memory as well as in file */
412 
413         ADIOI_Flatten_datatype(datatype);
414 	flat_buf = ADIOI_Flatlist;
415 	while (flat_buf->type != datatype) flat_buf = flat_buf->next;
416 
417 	size_read = 0;
418 	n_filetypes = st_n_filetypes;
419 	frd_size = st_frd_size;
420 	brd_size = flat_buf->blocklens[0];
421 	buf_count = 0;
422 	start_mem_offset = 0;
423 	start_k = k = 0;
424 	start_j = st_index;
425 	max_mem_list = 0;
426 	max_file_list = 0;
427 
428 	/* run through and file max_file_list and max_mem_list so that you
429 	   can allocate the file and memory arrays less than MAX_ARRAY_SIZE
430 	   if possible */
431 
432 	while (size_read < bufsize) {
433 	    k = start_k;
434 	    new_buffer_read = 0;
435 	    mem_list_count = 0;
436 	    while ((mem_list_count < MAX_ARRAY_SIZE) &&
437 		   (new_buffer_read < bufsize-size_read)) {
438 	        /* find mem_list_count and file_list_count such that both are
439 		   less than MAX_ARRAY_SIZE, the sum of their lengths are
440 		   equal, and the sum of all the data read and data to be
441 		   read in the next immediate read list is less than
442 		   bufsize */
443 	        if(mem_list_count) {
444 		    if((new_buffer_read + flat_buf->blocklens[k] +
445 			size_read) > bufsize) {
446 		        end_brd_size = new_buffer_read +
447 			    flat_buf->blocklens[k] - (bufsize - size_read);
448 			new_buffer_read = bufsize - size_read;
449 		    }
450 		    else {
451 		        new_buffer_read += flat_buf->blocklens[k];
452 			end_brd_size = flat_buf->blocklens[k];
453 		    }
454 		}
455 		else {
456 		    if (brd_size > (bufsize - size_read)) {
457 		        new_buffer_read = bufsize - size_read;
458 			brd_size = new_buffer_read;
459 		    }
460 		    else new_buffer_read = brd_size;
461 		}
462 		mem_list_count++;
463 		k = (k + 1)%flat_buf->count;
464 	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
465 	       (new_buffer_read < bufsize-size_read)) */
466 	    j = start_j;
467 	    new_file_read = 0;
468 	    file_list_count = 0;
469 	    while ((file_list_count < MAX_ARRAY_SIZE) &&
470 		   (new_file_read < new_buffer_read)) {
471 	        if(file_list_count) {
472 		    if((new_file_read + flat_file->blocklens[j]) >
473 		       new_buffer_read) {
474 		        end_frd_size = new_buffer_read - new_file_read;
475 			new_file_read = new_buffer_read;
476 			j--;
477 		    }
478 		    else {
479 		        new_file_read += flat_file->blocklens[j];
480 			end_frd_size = flat_file->blocklens[j];
481 		    }
482 		}
483 		else {
484 		    if (frd_size > new_buffer_read) {
485 		        new_file_read = new_buffer_read;
486 			frd_size = new_file_read;
487 		    }
488 		    else new_file_read = frd_size;
489 		}
490 		file_list_count++;
491 		if (j < (flat_file->count - 1)) j++;
492 		else j = 0;
493 
494 		k = start_k;
495 		if ((new_file_read < new_buffer_read) &&
496 		    (file_list_count == MAX_ARRAY_SIZE)) {
497 		    new_buffer_read = 0;
498 		    mem_list_count = 0;
499 		    while (new_buffer_read < new_file_read) {
500 		        if(mem_list_count) {
501 			    if((new_buffer_read + flat_buf->blocklens[k]) >
502 			       new_file_read) {
503 			        end_brd_size = new_file_read - new_buffer_read;
504 				new_buffer_read = new_file_read;
505 				k--;
506 			    }
507 			    else {
508 			        new_buffer_read += flat_buf->blocklens[k];
509 				end_brd_size = flat_buf->blocklens[k];
510 			    }
511 			}
512 			else {
513 			    new_buffer_read = brd_size;
514 			    if (brd_size > (bufsize - size_read)) {
515 			        new_buffer_read = bufsize - size_read;
516 				brd_size = new_buffer_read;
517 			    }
518 			}
519 			mem_list_count++;
520 			k = (k + 1)%flat_buf->count;
521 		    } /* while (new_buffer_read < new_file_read) */
522 		} /* if ((new_file_read < new_buffer_read) && (file_list_count
523 		     == MAX_ARRAY_SIZE)) */
524 	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
525 		 (new_buffer_read < bufsize-size_read)) */
526 
527 	    /*  fakes filling the readlist arrays of lengths found above  */
528 	    k = start_k;
529 	    j = start_j;
530 	    for (i=0; i<mem_list_count; i++) {
531 		if(i) {
532 		    if (i == (mem_list_count - 1)) {
533 			if (flat_buf->blocklens[k] == end_brd_size)
534 			    brd_size = flat_buf->blocklens[(k+1)%
535 							  flat_buf->count];
536 			else {
537 			    brd_size = flat_buf->blocklens[k] - end_brd_size;
538 			    k--;
539 			    buf_count--;
540 			}
541 		    }
542 		}
543 		buf_count++;
544 		k = (k + 1)%flat_buf->count;
545 	    } /* for (i=0; i<mem_list_count; i++) */
546 	    for (i=0; i<file_list_count; i++) {
547 		if (i) {
548 		    if (i == (file_list_count - 1)) {
549 			if (flat_file->blocklens[j] == end_frd_size)
550 			    frd_size = flat_file->blocklens[(j+1)%
551 							  flat_file->count];
552 			else {
553 			    frd_size = flat_file->blocklens[j] - end_frd_size;
554 			    j--;
555 			}
556 		    }
557 		}
558 		if (j < flat_file->count - 1) j++;
559 		else {
560 		    j = 0;
561 		    n_filetypes++;
562 		}
563 	    } /* for (i=0; i<file_list_count; i++) */
564 	    size_read += new_buffer_read;
565 	    start_k = k;
566 	    start_j = j;
567 	    if (max_mem_list < mem_list_count)
568 	        max_mem_list = mem_list_count;
569 	    if (max_file_list < file_list_count)
570 	        max_file_list = file_list_count;
571 	} /* while (size_read < bufsize) */
572 
573 	/* one last check before we actually carry out the operation:
574 	 * this code has hard-to-fix bugs when a noncontiguous file type has
575 	 * such large pieces that the sum of the lengths of the memory type is
576 	 * not larger than one of those pieces (and vice versa for large memory
577 	 * types and many pices of file types.  In these cases, give up and
578 	 * fall back to naive reads and writes.  The testphdf5 test created a
579 	 * type with two very large memory regions and 600 very small file
580 	 * regions.  The same test also created a type with one very large file
581 	 * region and many (700) very small memory regions.  both cases caused
582 	 * problems for this code */
583 
584 	if ( ( (file_list_count == 1) &&
585 		    (new_file_read < flat_file->blocklens[0] ) ) ||
586 		((mem_list_count == 1) &&
587 		    (new_buffer_read < flat_buf->blocklens[0]) ) ||
588 		((file_list_count == MAX_ARRAY_SIZE) &&
589 		    (new_file_read < flat_buf->blocklens[0]) ) ||
590 		( (mem_list_count == MAX_ARRAY_SIZE) &&
591 		    (new_buffer_read < flat_file->blocklens[0])) )
592 	{
593 
594 	    ADIOI_Delete_flattened(datatype);
595 	    ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype,
596 		    file_ptr_type, initial_off, status, error_code);
597 	    return;
598 	}
599 
600 	mem_offsets = (void *)ADIOI_Malloc(max_mem_list*sizeof(void *));
601 	mem_lengths = (size_t*)ADIOI_Malloc(max_mem_list*sizeof(size_t));
602 	file_offsets = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t));
603 	file_lengths = (uint64_t *)ADIOI_Malloc(max_file_list*sizeof(uint64_t));
604 
605 	size_read = 0;
606 	n_filetypes = st_n_filetypes;
607 	frd_size = st_frd_size;
608 	brd_size = flat_buf->blocklens[0];
609 	buf_count = 0;
610 	start_mem_offset = 0;
611 	start_k = k = 0;
612 	start_j = st_index;
613 
614 	/*  this section calculates mem_list_count and file_list_count
615 	    and also finds the possibly odd sized last array elements
616 	    in new_frd_size and new_brd_size  */
617 
618 	while (size_read < bufsize) {
619 	    k = start_k;
620 	    new_buffer_read = 0;
621 	    mem_list_count = 0;
622 	    while ((mem_list_count < MAX_ARRAY_SIZE) &&
623 		   (new_buffer_read < bufsize-size_read)) {
624 	        /* find mem_list_count and file_list_count such that both are
625 		   less than MAX_ARRAY_SIZE, the sum of their lengths are
626 		   equal, and the sum of all the data read and data to be
627 		   read in the next immediate read list is less than
628 		   bufsize */
629 	        if(mem_list_count) {
630 		    if((new_buffer_read + flat_buf->blocklens[k] +
631 			size_read) > bufsize) {
632 		        end_brd_size = new_buffer_read +
633 			    flat_buf->blocklens[k] - (bufsize - size_read);
634 			new_buffer_read = bufsize - size_read;
635 		    }
636 		    else {
637 		        new_buffer_read += flat_buf->blocklens[k];
638 			end_brd_size = flat_buf->blocklens[k];
639 		    }
640 		}
641 		else {
642 		    if (brd_size > (bufsize - size_read)) {
643 		        new_buffer_read = bufsize - size_read;
644 			brd_size = new_buffer_read;
645 		    }
646 		    else new_buffer_read = brd_size;
647 		}
648 		mem_list_count++;
649 		k = (k + 1)%flat_buf->count;
650 	     } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
651 	       (new_buffer_read < bufsize-size_read)) */
652 	    j = start_j;
653 	    new_file_read = 0;
654 	    file_list_count = 0;
655 	    while ((file_list_count < MAX_ARRAY_SIZE) &&
656 		   (new_file_read < new_buffer_read)) {
657 	        if(file_list_count) {
658 		    if((new_file_read + flat_file->blocklens[j]) >
659 		       new_buffer_read) {
660 		        end_frd_size = new_buffer_read - new_file_read;
661 			new_file_read = new_buffer_read;
662 			j--;
663 		    }
664 		    else {
665 		        new_file_read += flat_file->blocklens[j];
666 			end_frd_size = flat_file->blocklens[j];
667 		    }
668 		}
669 		else {
670 		    if (frd_size > new_buffer_read) {
671 		        new_file_read = new_buffer_read;
672 			frd_size = new_file_read;
673 		    }
674 		    else new_file_read = frd_size;
675 		}
676 		file_list_count++;
677 		if (j < (flat_file->count - 1)) j++;
678 		else j = 0;
679 
680 		k = start_k;
681 		if ((new_file_read < new_buffer_read) &&
682 		    (file_list_count == MAX_ARRAY_SIZE)) {
683 		    new_buffer_read = 0;
684 		    mem_list_count = 0;
685 		    while (new_buffer_read < new_file_read) {
686 		        if(mem_list_count) {
687 			    if((new_buffer_read + flat_buf->blocklens[k]) >
688 			       new_file_read) {
689 			        end_brd_size = new_file_read - new_buffer_read;
690 				new_buffer_read = new_file_read;
691 				k--;
692 			    }
693 			    else {
694 			        new_buffer_read += flat_buf->blocklens[k];
695 				end_brd_size = flat_buf->blocklens[k];
696 			    }
697 			}
698 			else {
699 			    new_buffer_read = brd_size;
700 			    if (brd_size > (bufsize - size_read)) {
701 			        new_buffer_read = bufsize - size_read;
702 				brd_size = new_buffer_read;
703 			    }
704 			}
705 			mem_list_count++;
706 			k = (k + 1)%flat_buf->count;
707 		    } /* while (new_buffer_read < new_file_read) */
708 		} /* if ((new_file_read < new_buffer_read) && (file_list_count
709 		     == MAX_ARRAY_SIZE)) */
710 	    } /* while ((mem_list_count < MAX_ARRAY_SIZE) &&
711 		 (new_buffer_read < bufsize-size_read)) */
712 
713 	    /*  fills the allocated readlist arrays  */
714 	    k = start_k;
715 	    j = start_j;
716 	    for (i=0; i<mem_list_count; i++) {
717 	        mem_offsets[i] = buf +
718 			buftype_extent* (buf_count/flat_buf->count) +
719 					 flat_buf->indices[k];
720 		if(!i) {
721 		    mem_lengths[0] = brd_size;
722 		    mem_offsets[0] += flat_buf->blocklens[k] - brd_size;
723 		}
724 		else {
725 		    if (i == (mem_list_count - 1)) {
726 		        mem_lengths[i] = end_brd_size;
727 			if (flat_buf->blocklens[k] == end_brd_size)
728 			    brd_size = flat_buf->blocklens[(k+1)%
729 							  flat_buf->count];
730 			else {
731 			    brd_size = flat_buf->blocklens[k] - end_brd_size;
732 			    k--;
733 			    buf_count--;
734 			}
735 		    }
736 		    else {
737 		        mem_lengths[i] = flat_buf->blocklens[k];
738 		    }
739 		}
740 		buf_count++;
741 		k = (k + 1)%flat_buf->count;
742 	    } /* for (i=0; i<mem_list_count; i++) */
743 	    for (i=0; i<file_list_count; i++) {
744 	        file_offsets[i] = disp + flat_file->indices[j] +
745 		    ((ADIO_Offset)n_filetypes) * filetype_extent;
746 	        if (!i) {
747 		    file_lengths[0] = frd_size;
748 		    file_offsets[0] += flat_file->blocklens[j] - frd_size;
749 		}
750 		else {
751 		    if (i == (file_list_count - 1)) {
752 		        file_lengths[i] = end_frd_size;
753 			if (flat_file->blocklens[j] == end_frd_size)
754 			    frd_size = flat_file->blocklens[(j+1)%
755 							  flat_file->count];
756 			else {
757 			    frd_size = flat_file->blocklens[j] - end_frd_size;
758 			    j--;
759 			}
760 		    }
761 		    else file_lengths[i] = flat_file->blocklens[j];
762 		}
763 		if (j < flat_file->count - 1) j++;
764 		else {
765 		    j = 0;
766 		    n_filetypes++;
767 		}
768 	    } /* for (i=0; i<file_list_count; i++) */
769 
770 #ifdef ADIOI_MPE_LOGGING
771             MPE_Log_event( ADIOI_MPE_read_a, 0, NULL );
772 #endif
773 	    NO_STALE(err_flag, fd, zoidfs_obj_ptr,
774 			    zoidfs_read(zoidfs_obj_ptr,
775 				    mem_list_count, mem_offsets, mem_lengths,
776 				    file_list_count,
777 				    file_offsets, file_lengths, ZOIDFS_NO_OP_HINT));
778 #ifdef ADIOI_MPE_LOGGING
779             MPE_Log_event( ADIOI_MPE_read_b, 0, NULL );
780 #endif
781 	    /* --BEGIN ERROR HANDLING-- */
782 	    if (err_flag != ZFS_OK) {
783 		*error_code = MPIO_Err_create_code(MPI_SUCCESS,
784 						   MPIR_ERR_RECOVERABLE,
785 						   myname, __LINE__,
786 						   ADIOI_ZOIDFS_error_convert(err_flag),
787 						   "Error in zoidfs_read", 0);
788 	    }
789 	    /* --END ERROR HANDLING-- */
790 	    size_read += new_buffer_read;
791 	    total_bytes_read += new_buffer_read; /* XXX: is this right? */
792 	    start_k = k;
793 	    start_j = j;
794 	} /* while (size_read < bufsize) */
795 	ADIOI_Free(mem_offsets);
796 	ADIOI_Free(mem_lengths);
797     }
798     /* Other ADIO routines will convert absolute bytes into counts of datatypes */
799     /* when incrementing fp_ind, need to also take into account the file type:
800      * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----|
801      * if we wrote N elements, offset needs to point at beginning of type, not
802      * at empty region at offset N+1)
803      *
804      * As we discussed on mpich-discuss in may/june 2009, the code below might
805      * look wierd, but by putting fp_ind at the last byte written, the next
806      * time we run through the strided code we'll update the fp_ind to the
807      * right location. */
808     if (file_ptr_type == ADIO_INDIVIDUAL) {
809 	fd->fp_ind = file_offsets[file_list_count-1]+
810 	    file_lengths[file_list_count-1];
811     }
812 
813     ADIOI_Free(file_offsets);
814     ADIOI_Free(file_lengths);
815 
816     if (err_flag == 0) *error_code = MPI_SUCCESS;
817 
818 error_state:
819     fd->fp_sys_posn = -1;   /* set it to null. */
820 
821 #ifdef HAVE_STATUS_SET_BYTES
822     MPIR_Status_set_bytes(status, datatype, bufsize);
823     /* This is a temporary way of filling in status. The right way is to
824        keep track of how much data was actually read and placed in buf
825        by ADIOI_BUFFERED_READ. */
826 #endif
827 
828     if (!buftype_is_contig) ADIOI_Delete_flattened(datatype);
829 }
830 
831