1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Copyright by The HDF Group. *
3 * All rights reserved. *
4 * *
5 * This file is part of HDF5. The full HDF5 copyright notice, including *
6 * terms governing use, modification, and redistribution, is contained in *
7 * the COPYING file, which can be found at the root of the source code *
8 * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases. *
9 * If you do not have access to either file, you may request a copy from *
10 * help@hdfgroup.org. *
11 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
12
13 /*
14 * Author: Albert Cheng of NCSA, Oct 24, 2001.
15 */
16
17 #include "hdf5.h"
18
19 #ifdef H5_STDC_HEADERS
20 #include <errno.h>
21 #include <fcntl.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #endif
25
26 #ifdef H5_HAVE_UNISTD_H
27 #include <sys/types.h>
28 #include <unistd.h>
29 #endif
30
31 #ifdef H5_HAVE_SYS_STAT_H
32 #include <sys/stat.h>
33 #endif
34
35 #ifdef H5_HAVE_PARALLEL
36
37 #include <mpi.h>
38
39 #ifndef MPI_FILE_NULL /*MPIO may be defined in mpi.h already */
40 # include <mpio.h>
41 #endif /* !MPI_FILE_NULL */
42
43 #include "pio_perf.h"
44
45 /* Macro definitions */
46
47 #if H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 6
48 # define H5DCREATE(fd, name, type, space, dcpl) H5Dcreate(fd, name, type, space, dcpl)
49 # define H5DOPEN(fd, name) H5Dopen(fd, name)
50 #else
51 # define H5DCREATE(fd, name, type, space, dcpl) H5Dcreate2(fd, name, type, space, H5P_DEFAULT, dcpl, H5P_DEFAULT)
52 # define H5DOPEN(fd, name) H5Dopen2(fd, name, H5P_DEFAULT)
53 #endif
54
55 /* sizes of various items. these sizes won't change during program execution */
56 /* The following three must have the same type */
57 #define ELMT_SIZE (sizeof(unsigned char)) /* we're doing bytes */
58 #define ELMT_MPI_TYPE MPI_BYTE
59 #define ELMT_H5_TYPE H5T_NATIVE_UCHAR
60
61 #define GOTOERROR(errcode) { ret_code = errcode; goto done; }
62 #define GOTODONE { goto done; }
63 #define ERRMSG(mesg) { \
64 HDfprintf(stderr, "Proc %d: ", pio_mpi_rank_g); \
65 HDfprintf(stderr, "*** Assertion failed (%s) at line %4d in %s\n", \
66 mesg, (int)__LINE__, __FILE__); \
67 }
68
69 #define MSG(mesg) { \
70 HDfprintf(stderr, "Proc %d: ", pio_mpi_rank_g); \
71 HDfprintf(stderr, "(%s) at line %4d in %s\n", \
72 mesg, (int)__LINE__, __FILE__); \
73 }
74
75 /* verify: if val is false (0), print mesg. */
76 #define VRFY(val, mesg) do { \
77 if (!val) { \
78 ERRMSG(mesg); \
79 GOTOERROR(FAIL); \
80 } \
81 } while(0)
82
83
84 /* POSIX I/O macros */
85 #ifdef H5_HAVE_WIN32_API
86 /* Can't link against the library, so this test will use the older, non-Unicode
87 * _open() call on Windows.
88 */
89 #define HDopen(S,F,...) _open(S, F | _O_BINARY, __VA_ARGS__)
90 #endif /* H5_HAVE_WIN32_API */
91 #define POSIXCREATE(fn) HDopen(fn, O_CREAT|O_TRUNC|O_RDWR, 0600)
92 #define POSIXOPEN(fn, F) HDopen(fn, F, 0600)
93 #define POSIXCLOSE(F) HDclose(F)
94 #define POSIXSEEK(F,L) HDlseek(F, L, SEEK_SET)
95 #define POSIXWRITE(F,B,S) HDwrite(F,B,S)
96 #define POSIXREAD(F,B,S) HDread(F,B,S)
97
98 enum {
99 PIO_CREATE = 1,
100 PIO_WRITE = 2,
101 PIO_READ = 4
102 };
103
104 /* Global variables */
105 static int clean_file_g = -1; /*whether to cleanup temporary test */
106 /*files. -1 is not defined; */
107 /*0 is no cleanup; 1 is do cleanup */
108
109 /*
110 * In a parallel machine, the filesystem suitable for compiling is
111 * unlikely a parallel file system that is suitable for parallel I/O.
112 * There is no standard pathname for the parallel file system. /tmp
113 * is about the best guess.
114 */
115 #ifndef HDF5_PARAPREFIX
116 # define HDF5_PARAPREFIX ""
117 #endif /* !HDF5_PARAPREFIX */
118
119 #ifndef MIN
120 # define MIN(a,b) ((a) < (b) ? (a) : (b))
121 #endif /* !MIN */
122
123 /* the different types of file descriptors we can expect */
124 typedef union _file_descr {
125 int posixfd; /* POSIX file handle*/
126 MPI_File mpifd; /* MPI file */
127 hid_t h5fd; /* HDF5 file */
128 } file_descr;
129
130 /* local functions */
131 static char *pio_create_filename(iotype iot, const char *base_name,
132 char *fullname, size_t size);
133 static herr_t do_write(results *res, file_descr *fd, parameters *parms,
134 long ndsets, off_t nelmts, size_t buf_size, void *buffer);
135 static herr_t do_read(results *res, file_descr *fd, parameters *parms,
136 long ndsets, off_t nelmts, size_t buf_size, void *buffer /*out*/);
137 static herr_t do_fopen(parameters *param, char *fname, file_descr *fd /*out*/,
138 int flags);
139 static herr_t do_fclose(iotype iot, file_descr *fd);
140 static void do_cleanupfile(iotype iot, char *fname);
141
142 /*
143 * Function: do_pio
144 * Purpose: PIO Engine where Parallel IO are executed.
145 * Return: results
146 * Programmer: Albert Cheng, Bill Wendling 2001/12/12
147 * Modifications:
148 * Added 2D testing (Christian Chilan, 10. August 2005)
149 */
150 results
do_pio(parameters param)151 do_pio(parameters param)
152 {
153 /* return codes */
154 herr_t ret_code = 0; /*return code */
155 results res;
156
157 file_descr fd;
158 iotype iot;
159
160 char fname[FILENAME_MAX];
161 long nf;
162 long ndsets;
163 off_t nbytes; /*number of bytes per dataset */
164 off_t snbytes; /*general dataset size */
165 /*for 1D, it is the actual dataset size */
166 /*for 2D, it is the size of a side of the dataset square */
167 char *buffer = NULL; /*data buffer pointer */
168 size_t buf_size; /*general buffer size in bytes */
169 /*for 1D, it is the actual buffer size */
170 /*for 2D, it is the length of the buffer rectangle */
171 size_t blk_size; /*data block size in bytes */
172 size_t bsize; /*actual buffer size */
173
174 /* HDF5 variables */
175 herr_t hrc; /*HDF5 return code */
176
177 /* Sanity check parameters */
178
179 /* IO type */
180 iot = param.io_type;
181
182 switch (iot) {
183 case MPIO:
184 fd.mpifd = MPI_FILE_NULL;
185 res.timers = io_time_new(MPI_CLOCK);
186 break;
187 case POSIXIO:
188 fd.posixfd = -1;
189 res.timers = io_time_new(MPI_CLOCK);
190 break;
191 case PHDF5:
192 fd.h5fd = -1;
193 res.timers = io_time_new(MPI_CLOCK);
194 break;
195 default:
196 /* unknown request */
197 HDfprintf(stderr, "Unknown IO type request (%d)\n", iot);
198 GOTOERROR(FAIL);
199 }
200
201 ndsets = param.num_dsets; /* number of datasets per file */
202 nbytes = param.num_bytes; /* number of bytes per dataset */
203 buf_size = param.buf_size;
204 blk_size = param.blk_size;
205
206 if (!param.dim2d){
207 snbytes = nbytes; /* General dataset size */
208 bsize = buf_size; /* Actual buffer size */
209 }
210 else {
211 snbytes = (off_t)sqrt(nbytes); /* General dataset size */
212 bsize = buf_size * blk_size; /* Actual buffer size */
213 }
214
215 if (param.num_files < 0 ) {
216 HDfprintf(stderr,
217 "number of files must be >= 0 (%ld)\n",
218 param.num_files);
219 GOTOERROR(FAIL);
220 }
221
222 if (ndsets < 0 ) {
223 HDfprintf(stderr,
224 "number of datasets per file must be >= 0 (%ld)\n",
225 ndsets);
226 GOTOERROR(FAIL);
227 }
228
229 if (param.num_procs <= 0 ) {
230 HDfprintf(stderr,
231 "maximum number of process to use must be > 0 (%d)\n",
232 param.num_procs);
233 GOTOERROR(FAIL);
234 }
235
236 /* Validate transfer buffer size & block size*/
237 if(blk_size<=0) {
238 HDfprintf(stderr,
239 "Transfer block size (%zu) must be > 0\n", blk_size);
240 GOTOERROR(FAIL);
241 }
242 if(buf_size<=0) {
243 HDfprintf(stderr,
244 "Transfer buffer size (%zu) must be > 0\n", buf_size);
245 GOTOERROR(FAIL);
246 }
247 if ((buf_size % blk_size) != 0){
248 HDfprintf(stderr,
249 "Transfer buffer size (%zu) must be a multiple of the "
250 "interleaved I/O block size (%zu)\n",
251 buf_size, blk_size);
252 GOTOERROR(FAIL);
253 }
254 if((snbytes%pio_mpi_nprocs_g)!=0) {
255 HDfprintf(stderr,
256 "Dataset size (%" H5_PRINTF_LL_WIDTH "d) must be a multiple of the "
257 "number of processes (%d)\n",
258 (long long)snbytes, pio_mpi_nprocs_g);
259 GOTOERROR(FAIL);
260 }
261
262 if (!param.dim2d){
263 if(((snbytes/pio_mpi_nprocs_g)%buf_size)!=0) {
264 HDfprintf(stderr,
265 "Dataset size/process (%" H5_PRINTF_LL_WIDTH "d) must be a multiple of the "
266 "trasfer buffer size (%zu)\n",
267 (long long)(snbytes/pio_mpi_nprocs_g), buf_size);
268 GOTOERROR(FAIL);
269 }
270 }
271 else {
272 if((snbytes%buf_size)!=0) {
273 HDfprintf(stderr,
274 "Dataset side size (%" H5_PRINTF_LL_WIDTH "d) must be a multiple of the "
275 "trasfer buffer size (%zu)\n",
276 (long long)snbytes, buf_size);
277 GOTOERROR(FAIL);
278 }
279 }
280
281 /* Allocate transfer buffer */
282 if ((buffer = malloc(bsize)) == NULL){
283 HDfprintf(stderr, "malloc for transfer buffer size (%zu) failed\n",
284 bsize);
285 GOTOERROR(FAIL);
286 }
287
288 if (pio_debug_level >= 4) {
289 int myrank;
290
291 MPI_Comm_rank(pio_comm_g, &myrank);
292
293 /* output all of the times for all iterations */
294 if (myrank == 0)
295 HDfprintf(output, "Timer details:\n");
296 }
297
298 for (nf = 1; nf <= param.num_files; nf++) {
299 /*
300 * Write performance measurement
301 */
302 /* Open file for write */
303 char base_name[256];
304
305 HDsprintf(base_name, "#pio_tmp_%lu", nf);
306 pio_create_filename(iot, base_name, fname, sizeof(fname));
307 if (pio_debug_level > 0)
308 HDfprintf(output, "rank %d: data filename=%s\n",
309 pio_mpi_rank_g, fname);
310
311 /* Need barrier to make sure everyone starts at the same time */
312 MPI_Barrier(pio_comm_g);
313
314 set_time(res.timers, HDF5_GROSS_WRITE_FIXED_DIMS, TSTART);
315 hrc = do_fopen(¶m, fname, &fd, PIO_CREATE | PIO_WRITE);
316
317 VRFY((hrc == SUCCESS), "do_fopen failed");
318
319 set_time(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, TSTART);
320 hrc = do_write(&res, &fd, ¶m, ndsets, nbytes, buf_size, buffer);
321 set_time(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, TSTOP);
322
323 VRFY((hrc == SUCCESS), "do_write failed");
324
325 /* Close file for write */
326 hrc = do_fclose(iot, &fd);
327
328 set_time(res.timers, HDF5_GROSS_WRITE_FIXED_DIMS, TSTOP);
329 VRFY((hrc == SUCCESS), "do_fclose failed");
330
331 if (!param.h5_write_only) {
332 /*
333 * Read performance measurement
334 */
335 /* Need barrier to make sure everyone is done writing and has
336 * closed the file. Also to make sure everyone starts reading
337 * at the same time.
338 */
339 MPI_Barrier(pio_comm_g);
340
341 /* Open file for read */
342 set_time(res.timers, HDF5_GROSS_READ_FIXED_DIMS, TSTART);
343 hrc = do_fopen(¶m, fname, &fd, PIO_READ);
344
345 VRFY((hrc == SUCCESS), "do_fopen failed");
346
347 set_time(res.timers, HDF5_FINE_READ_FIXED_DIMS, TSTART);
348 hrc = do_read(&res, &fd, ¶m, ndsets, nbytes, buf_size, buffer);
349 set_time(res.timers, HDF5_FINE_READ_FIXED_DIMS, TSTOP);
350 VRFY((hrc == SUCCESS), "do_read failed");
351
352 /* Close file for read */
353 hrc = do_fclose(iot, &fd);
354
355 set_time(res.timers, HDF5_GROSS_READ_FIXED_DIMS, TSTOP);
356 VRFY((hrc == SUCCESS), "do_fclose failed");
357 }
358
359 /* Need barrier to make sure everyone is done with the file */
360 /* before it may be removed by do_cleanupfile */
361 MPI_Barrier(pio_comm_g);
362 do_cleanupfile(iot, fname);
363 }
364
365 done:
366 /* clean up */
367 /* release HDF5 objects */
368
369 /* close any opened files */
370 /* no remove(fname) because that should have happened normally. */
371 switch (iot) {
372 case POSIXIO:
373 if (fd.posixfd != -1)
374 hrc = do_fclose(iot, &fd);
375 break;
376 case MPIO:
377 if (fd.mpifd != MPI_FILE_NULL)
378 hrc = do_fclose(iot, &fd);
379 break;
380 case PHDF5:
381 if (fd.h5fd != -1)
382 hrc = do_fclose(iot, &fd);
383 break;
384 }
385
386 /* release generic resources */
387 if(buffer)
388 HDfree(buffer);
389 res.ret_code = ret_code;
390 return res;
391 }
392
393 /*
394 * Function: pio_create_filename
395 * Purpose: Create a new filename to write to. Determine the correct
396 * suffix to append to the filename by the type of I/O we're
397 * doing. Also, place in the /tmp/{$USER,$LOGIN} directory if
398 * USER or LOGIN are specified in the environment.
399 * Return: Pointer to filename or NULL
400 * Programmer: Bill Wendling, 21. November 2001
401 * Modifications:
402 */
403 static char *
pio_create_filename(iotype iot,const char * base_name,char * fullname,size_t size)404 pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t size)
405 {
406 const char *prefix, *suffix = "";
407 char *ptr, last = '\0';
408 size_t i, j;
409
410 if (!base_name || !fullname || size < 1)
411 return NULL;
412
413 HDmemset(fullname, 0, size);
414
415 switch (iot) {
416 case POSIXIO:
417 suffix = ".posix";
418 break;
419 case MPIO:
420 suffix = ".mpio";
421 break;
422 case PHDF5:
423 suffix = ".h5";
424 break;
425 }
426
427 /* First use the environment variable and then try the constant */
428 prefix = HDgetenv("HDF5_PARAPREFIX");
429
430 #ifdef HDF5_PARAPREFIX
431 if (!prefix)
432 prefix = HDF5_PARAPREFIX;
433 #endif /* HDF5_PARAPREFIX */
434
435 /* Prepend the prefix value to the base name */
436 if (prefix && *prefix) {
437 /* If the prefix specifies the HDF5_PARAPREFIX directory, then
438 * default to using the "/tmp/$USER" or "/tmp/$LOGIN"
439 * directory instead. */
440 register char *user, *login, *subdir;
441
442 user = HDgetenv("USER");
443 login = HDgetenv("LOGIN");
444 subdir = (user ? user : login);
445
446 if (subdir) {
447 for (i = 0; i < size-1 && prefix[i]; i++)
448 fullname[i] = prefix[i];
449
450 fullname[i++] = '/';
451
452 for (j = 0; i < size && subdir[j]; i++, j++)
453 fullname[i] = subdir[j];
454 }
455 else {
456 /* We didn't append the prefix yet */
457 HDstrncpy(fullname, prefix, size);
458 fullname[size - 1] = '\0';
459 }
460
461 if ((HDstrlen(fullname) + HDstrlen(base_name) + 1) < size) {
462 /* Append the base_name with a slash first. Multiple slashes are
463 * handled below. */
464 h5_stat_t buf;
465
466 if (HDstat(fullname, &buf) < 0)
467 /* The directory doesn't exist just yet */
468 if (HDmkdir(fullname, (mode_t) 0755) < 0 && errno != EEXIST) {
469 /* We couldn't make the "/tmp/${USER,LOGIN}" subdirectory.
470 * Default to PREFIX's original prefix value. */
471 HDstrcpy(fullname, prefix);
472 }
473
474 HDstrcat(fullname, "/");
475 HDstrcat(fullname, base_name);
476 }
477 else {
478 /* Buffer is too small */
479 return NULL;
480 }
481 }
482 else if (HDstrlen(base_name) >= size) {
483 /* Buffer is too small */
484 return NULL;
485 }
486 else {
487 HDstrcpy(fullname, base_name);
488 }
489
490 /* Append a suffix */
491 if (suffix) {
492 if (HDstrlen(fullname) + HDstrlen(suffix) >= size)
493 return NULL;
494
495 HDstrcat(fullname, suffix);
496 }
497
498 /* Remove any double slashes in the filename */
499 for (ptr = fullname, i = j = 0; ptr && i < size; i++, ptr++) {
500 if (*ptr != '/' || last != '/')
501 fullname[j++] = *ptr;
502
503 last = *ptr;
504 }
505
506 return fullname;
507 }
508
509 /*
510 * Function: do_write
511 * Purpose: Write the required amount of data to the file.
512 * Return: SUCCESS or FAIL
513 * Programmer: Albert Cheng, Bill Wendling, 2001/12/13
514 * Modifications:
515 * Added 2D testing (Christian Chilan, 10. August 2005)
516 */
517 static herr_t
do_write(results * res,file_descr * fd,parameters * parms,long ndsets,off_t nbytes,size_t buf_size,void * buffer)518 do_write(results *res, file_descr *fd, parameters *parms, long ndsets,
519 off_t nbytes, size_t buf_size, void *buffer)
520 {
521 int ret_code = SUCCESS;
522 int rc; /*routine return code */
523 long ndset;
524 size_t blk_size; /* The block size to subdivide the xfer buffer into */
525 off_t nbytes_xfer; /* Total number of bytes transferred so far */
526 size_t nbytes_xfer_advance; /* Number of bytes transferred in a single I/O operation */
527 size_t nbytes_toxfer; /* Number of bytes to transfer a particular time */
528 char dname[64];
529 off_t dset_offset=0; /*dataset offset in a file */
530 off_t bytes_begin[2]; /*first elmt this process transfer */
531 off_t bytes_count; /*number of elmts this process transfer */
532 off_t snbytes=0; /*size of a side of the dataset square */
533 unsigned char *buf_p; /* Current buffer pointer */
534
535 /* POSIX variables */
536 off_t file_offset; /* File offset of the next transfer */
537 off_t file_offset_advance; /* File offset advance after each I/O operation */
538 off_t posix_file_offset; /* Base file offset of the next transfer */
539
540 /* MPI variables */
541 MPI_Offset mpi_file_offset; /* Base file offset of the next transfer*/
542 MPI_Offset mpi_offset; /* Offset in MPI file */
543 MPI_Offset mpi_offset_advance; /* Offset advance after each I/O operation */
544 MPI_Datatype mpi_file_type; /* MPI derived type for 1D file */
545 MPI_Datatype mpi_blk_type; /* MPI derived type for 1D buffer */
546 MPI_Datatype mpi_cont_type; /* MPI derived type for 2D contiguous file */
547 MPI_Datatype mpi_partial_buffer_cont; /* MPI derived type for partial 2D contiguous buffer */
548 MPI_Datatype mpi_inter_type; /* MPI derived type for 2D interleaved file */
549 MPI_Datatype mpi_partial_buffer_inter; /* MPI derived type for partial 2D interleaved buffer */
550 MPI_Datatype mpi_full_buffer; /* MPI derived type for 2D full buffer */
551 MPI_Datatype mpi_full_chunk; /* MPI derived type for 2D full chunk */
552 MPI_Datatype mpi_chunk_inter_type; /* MPI derived type for 2D chunk interleaved file */
553 MPI_Datatype mpi_collective_type; /* Generic MPI derived type for 2D collective access */
554 MPI_Status mpi_status;
555 int mrc; /* MPI return code */
556
557 /* HDF5 variables */
558 herr_t hrc; /*HDF5 return code */
559 hsize_t h5dims[2]; /*dataset dim sizes */
560 hid_t h5dset_space_id = -1; /*dataset space ID */
561 hid_t h5mem_space_id = -1; /*memory dataspace ID */
562 hid_t h5ds_id = -1; /*dataset handle */
563 hsize_t h5block[2]; /*dataspace selection */
564 hsize_t h5stride[2];
565 hsize_t h5count[2];
566 hsize_t h5start[2];
567 hssize_t h5offset[2]; /* Selection offset within dataspace */
568 hid_t h5dcpl = -1; /* Dataset creation property list */
569 hid_t h5dxpl = -1; /* Dataset transfer property list */
570
571 /* Get the parameters from the parameter block */
572 blk_size=parms->blk_size;
573
574 /* There are two kinds of transfer patterns, contiguous and interleaved.
575 * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n
576 * where n is rank of the last process.
577 * In contiguous pattern, data are accessed as
578 * 000...111...222...nnn...
579 * In interleaved pattern, data are accessed as
580 * 012...n012...n...
581 * These are all in the scope of one dataset.
582 */
583
584 /* 1D dataspace */
585 if (!parms->dim2d){
586 /* Contiguous Pattern: */
587 if (!parms->interleaved) {
588 bytes_begin[0] = (off_t)(((double)nbytes*pio_mpi_rank_g)/pio_mpi_nprocs_g);
589 } /* end if */
590 /* Interleaved Pattern: */
591 else {
592 bytes_begin[0] = (off_t)(blk_size*pio_mpi_rank_g);
593 } /* end else */
594
595 /* Prepare buffer for verifying data */
596 if (parms->verify)
597 memset(buffer,pio_mpi_rank_g+1,buf_size);
598 }/* end if */
599 /* 2D dataspace */
600 else {
601 /* nbytes is always the number of bytes per dataset (1D or 2D). If the
602 dataspace is 2D, snbytes is the size of a side of the dataset square.
603 */
604 snbytes = (off_t)sqrt(nbytes);
605
606 /* Contiguous Pattern: */
607 if (!parms->interleaved) {
608 bytes_begin[0] = (off_t)((double)snbytes*pio_mpi_rank_g / pio_mpi_nprocs_g);
609 bytes_begin[1] = 0;
610 } /* end if */
611 /* Interleaved Pattern: */
612 else {
613 bytes_begin[0] = 0;
614
615 if(!parms->h5_use_chunks || parms->io_type==PHDF5)
616 bytes_begin[1] = (off_t)(blk_size*pio_mpi_rank_g);
617 else
618 bytes_begin[1] = (off_t)(blk_size*blk_size*pio_mpi_rank_g);
619 } /* end else */
620
621 /* Prepare buffer for verifying data */
622 if (parms->verify)
623 HDmemset(buffer,pio_mpi_rank_g+1,buf_size*blk_size);
624 } /* end else */
625
626
627 /* Calculate the total number of bytes (bytes_count) to be
628 * transferred by this process. It may be different for different
629 * transfer pattern due to rounding to integral values.
630 */
631 /*
632 * Calculate the beginning bytes of this process and the next.
633 * bytes_count is the difference between these two beginnings.
634 * This way, it eliminates any rounding errors.
635 * (This is tricky, don't mess with the formula, rounding errors
636 * can easily get introduced) */
637 bytes_count = (off_t)(((double)nbytes*(pio_mpi_rank_g+1)) / pio_mpi_nprocs_g)
638 - (off_t)(((double)nbytes*pio_mpi_rank_g) / pio_mpi_nprocs_g);
639
640 /* debug */
641 if (pio_debug_level >= 4) {
642 HDprint_rank(output);
643 if (!parms->dim2d) {
644 HDfprintf(output, "Debug(do_write): "
645 "buf_size=%zu, bytes_begin=%" H5_PRINTF_LL_WIDTH "d, bytes_count=%" H5_PRINTF_LL_WIDTH "d\n",
646 buf_size, (long long)bytes_begin[0],
647 (long long)bytes_count);
648 } else {
649 HDfprintf(output, "Debug(do_write): "
650 "linear buf_size=%zu, bytes_begin=(%" H5_PRINTF_LL_WIDTH "d,%" H5_PRINTF_LL_WIDTH "d), bytes_count=%" H5_PRINTF_LL_WIDTH "d\n",
651 buf_size*blk_size, (long long)bytes_begin[0],
652 (long long)bytes_begin[1], (long long)bytes_count);
653 }
654 }
655
656 /* I/O Access specific setup */
657 switch (parms->io_type) {
658 case POSIXIO:
659 /* No extra setup */
660 break;
661
662 case MPIO: /* MPI-I/O setup */
663 /* 1D dataspace */
664 if (!parms->dim2d){
665 /* Build block's derived type */
666 mrc = MPI_Type_contiguous((int)blk_size,
667 MPI_BYTE, &mpi_blk_type);
668 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
669
670 /* Build file's derived type */
671 mrc = MPI_Type_vector((int)(buf_size/blk_size), (int)1,
672 (int)pio_mpi_nprocs_g, mpi_blk_type, &mpi_file_type);
673 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
674
675 /* Commit file type */
676 mrc = MPI_Type_commit( &mpi_file_type );
677 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
678
679 /* Commit buffer type */
680 mrc = MPI_Type_commit( &mpi_blk_type );
681 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
682 } /* end if */
683 /* 2D dataspace */
684 else {
685 /* Build partial buffer derived type for contiguous access */
686
687 mrc = MPI_Type_contiguous((int)buf_size, MPI_BYTE,
688 &mpi_partial_buffer_cont);
689 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
690
691 /* Commit partial buffer derived type */
692 mrc = MPI_Type_commit(&mpi_partial_buffer_cont);
693 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
694
695 /* Build contiguous file's derived type */
696 mrc = MPI_Type_vector((int)blk_size, (int)1, (int)(snbytes/buf_size),
697 mpi_partial_buffer_cont, &mpi_cont_type);
698 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
699
700 /* Commit contiguous file type */
701 mrc = MPI_Type_commit(&mpi_cont_type);
702 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
703
704 /* Build partial buffer derived type for interleaved access */
705 mrc = MPI_Type_contiguous((int)blk_size, MPI_BYTE,
706 &mpi_partial_buffer_inter);
707 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
708
709 /* Commit partial buffer derived type */
710 mrc = MPI_Type_commit(&mpi_partial_buffer_inter);
711 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
712
713 /* Build interleaved file's derived type */
714 mrc = MPI_Type_vector((int)buf_size, (int)1, (int)(snbytes/blk_size),
715 mpi_partial_buffer_inter, &mpi_inter_type);
716 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
717
718 /* Commit interleaved file type */
719 mrc = MPI_Type_commit(&mpi_inter_type);
720 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
721
722 /* Build full buffer derived type */
723 mrc = MPI_Type_contiguous((int)(blk_size*buf_size), MPI_BYTE,
724 &mpi_full_buffer);
725 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
726
727 /* Commit full buffer derived type */
728 mrc = MPI_Type_commit(&mpi_full_buffer);
729 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
730
731 /* Build full chunk derived type */
732 mrc = MPI_Type_contiguous((int)(blk_size*blk_size), MPI_BYTE,
733 &mpi_full_chunk);
734 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
735
736 /* Commit full chunk derived type */
737 mrc = MPI_Type_commit(&mpi_full_chunk);
738 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
739
740 /* Build chunk interleaved file's derived type */
741 mrc = MPI_Type_vector((int)(buf_size/blk_size), (int)1, (int)(snbytes/blk_size),
742 mpi_full_chunk, &mpi_chunk_inter_type);
743 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
744
745 /* Commit chunk interleaved file type */
746 mrc = MPI_Type_commit(&mpi_chunk_inter_type);
747 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
748
749 } /* end else */
750 break;
751
752 case PHDF5: /* HDF5 setup */
753 /* 1D dataspace */
754 if (!parms->dim2d){
755 if(nbytes>0) {
756 /* define a contiguous dataset of nbytes native bytes */
757 h5dims[0] = nbytes;
758 h5dset_space_id = H5Screate_simple(1, h5dims, NULL);
759 VRFY((h5dset_space_id >= 0), "H5Screate_simple");
760
761 /* Set up the file dset space id to select the pattern to access */
762 if (!parms->interleaved){
763 /* Contiguous pattern */
764 h5start[0] = bytes_begin[0];
765 h5stride[0] = h5block[0] = blk_size;
766 h5count[0] = buf_size/blk_size;
767 } /* end if */
768 else {
769 /* Interleaved access pattern */
770 /* Skip offset over blocks of other processes */
771 h5start[0] = bytes_begin[0];
772 h5stride[0] = blk_size*pio_mpi_nprocs_g;
773 h5block[0] = blk_size;
774 h5count[0] = buf_size/blk_size;
775 } /* end else */
776 hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET,
777 h5start, h5stride, h5count, h5block);
778 VRFY((hrc >= 0), "H5Sselect_hyperslab");
779 } /* end if */
780 else {
781 h5dset_space_id = H5Screate(H5S_SCALAR);
782 VRFY((h5dset_space_id >= 0), "H5Screate");
783 } /* end else */
784
785 /* Create the memory dataspace that corresponds to the xfer buffer */
786 if(buf_size>0) {
787 h5dims[0] = buf_size;
788 h5mem_space_id = H5Screate_simple(1, h5dims, NULL);
789 VRFY((h5mem_space_id >= 0), "H5Screate_simple");
790 } /* end if */
791 else {
792 h5mem_space_id = H5Screate(H5S_SCALAR);
793 VRFY((h5mem_space_id >= 0), "H5Screate");
794 } /* end else */
795 } /* end if */
796 /* 2D dataspace */
797 else {
798 if(nbytes>0) {
799 /* define a contiguous dataset of nbytes native bytes */
800 h5dims[0] = snbytes;
801 h5dims[1] = snbytes;
802 h5dset_space_id = H5Screate_simple(2, h5dims, NULL);
803 VRFY((h5dset_space_id >= 0), "H5Screate_simple");
804
805 /* Set up the file dset space id to select the pattern to access */
806 if (!parms->interleaved){
807 /* Contiguous pattern */
808 h5start[0] = bytes_begin[0];
809 h5start[1] = bytes_begin[1];
810 h5stride[0] = 1;
811 h5stride[1] = h5block[0] = h5block[1] = blk_size;
812 h5count[0] = 1;
813 h5count[1] = buf_size/blk_size;
814 } /* end if */
815 else {
816 /* Interleaved access pattern */
817 /* Skip offset over blocks of other processes */
818 h5start[0] = bytes_begin[0];
819 h5start[1] = bytes_begin[1];
820 h5stride[0] = blk_size;
821 h5stride[1] = blk_size*pio_mpi_nprocs_g;
822 h5block[0] = h5block[1] = blk_size;
823 h5count[0] = buf_size/blk_size;
824 h5count[1] = 1;
825 } /* end else */
826 hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET,
827 h5start, h5stride, h5count, h5block);
828 VRFY((hrc >= 0), "H5Sselect_hyperslab");
829 } /* end if */
830 else {
831 h5dset_space_id = H5Screate(H5S_SCALAR);
832 VRFY((h5dset_space_id >= 0), "H5Screate");
833 } /* end else */
834
835 /* Create the memory dataspace that corresponds to the xfer buffer */
836 if(buf_size>0) {
837 if (!parms->interleaved){
838 h5dims[0] = blk_size;
839 h5dims[1] = buf_size;
840 }else{
841 h5dims[0] = buf_size;
842 h5dims[1] = blk_size;
843 }
844 h5mem_space_id = H5Screate_simple(2, h5dims, NULL);
845 VRFY((h5mem_space_id >= 0), "H5Screate_simple");
846 } /* end if */
847 else {
848 h5mem_space_id = H5Screate(H5S_SCALAR);
849 VRFY((h5mem_space_id >= 0), "H5Screate");
850 } /* end else */
851 } /* end else */
852
853 /* Create the dataset transfer property list */
854 h5dxpl = H5Pcreate(H5P_DATASET_XFER);
855 if (h5dxpl < 0) {
856 HDfprintf(stderr, "HDF5 Property List Create failed\n");
857 GOTOERROR(FAIL);
858 }
859
860 /* Change to collective I/O, if asked */
861 if(parms->collective) {
862 hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE);
863 if (hrc < 0) {
864 HDfprintf(stderr, "HDF5 Property List Set failed\n");
865 GOTOERROR(FAIL);
866 } /* end if */
867 } /* end if */
868 break;
869 } /* end switch */
870
871 for (ndset = 1; ndset <= ndsets; ++ndset) {
872
873 /* Calculate dataset offset within a file */
874
875 /* create dataset */
876 switch (parms->io_type) {
877 case POSIXIO:
878 case MPIO:
879 /* both posix and mpi io just need dataset offset in file*/
880 dset_offset = (ndset - 1) * nbytes;
881 break;
882
883 case PHDF5:
884 h5dcpl = H5Pcreate(H5P_DATASET_CREATE);
885 if (h5dcpl < 0) {
886 HDfprintf(stderr, "HDF5 Property List Create failed\n");
887 GOTOERROR(FAIL);
888 }
889 /* 1D dataspace */
890 if (!parms->dim2d){
891 /* Make the dataset chunked if asked */
892 if(parms->h5_use_chunks) {
893 /* Set the chunk size to be the same as the buffer size */
894 h5dims[0] = blk_size;
895 hrc = H5Pset_chunk(h5dcpl, 1, h5dims);
896 if (hrc < 0) {
897 HDfprintf(stderr, "HDF5 Property List Set failed\n");
898 GOTOERROR(FAIL);
899 } /* end if */
900 } /* end if */
901 }/* end if */
902 else{
903 /* 2D dataspace */
904 if(parms->h5_use_chunks) {
905 /* Set the chunk size to be the same as the block size */
906 h5dims[0] = blk_size;
907 h5dims[1] = blk_size;
908 hrc = H5Pset_chunk(h5dcpl, 2, h5dims);
909 if (hrc < 0) {
910 HDfprintf(stderr, "HDF5 Property List Set failed\n");
911 GOTOERROR(FAIL);
912 } /* end if */
913 } /* end if */
914 }/* end else */
915
916 HDsprintf(dname, "Dataset_%ld", ndset);
917 h5ds_id = H5DCREATE(fd->h5fd, dname, ELMT_H5_TYPE,
918 h5dset_space_id, h5dcpl);
919
920 if (h5ds_id < 0) {
921 HDfprintf(stderr, "HDF5 Dataset Create failed\n");
922 GOTOERROR(FAIL);
923 }
924
925 hrc = H5Pclose(h5dcpl);
926 /* verifying the close of the dcpl */
927 if (hrc < 0) {
928 HDfprintf(stderr, "HDF5 Property List Close failed\n");
929 GOTOERROR(FAIL);
930 }
931 break;
932 }
933
934 /* The task is to transfer bytes_count bytes, starting at
935 * bytes_begin position, using transfer buffer of buf_size bytes.
936 * If interleaved, select buf_size at a time, in round robin
937 * fashion, according to number of process. Otherwise, select
938 * all bytes_count in contiguous.
939 */
940 nbytes_xfer = 0 ;
941
942 /* 1D dataspace */
943 if (!parms->dim2d){
944 /* Set base file offset for all I/O patterns and POSIX access */
945 posix_file_offset = dset_offset + bytes_begin[0];
946
947 /* Set base file offset for all I/O patterns and MPI access */
948 mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin[0]);
949 } /* end if */
950 else {
951 /* Set base file offset for all I/O patterns and POSIX access */
952 posix_file_offset=dset_offset + bytes_begin[0]*snbytes+
953 bytes_begin[1];
954
955 /* Set base file offset for all I/O patterns and MPI access */
956 mpi_file_offset=(MPI_Offset)(dset_offset + bytes_begin[0]*snbytes+
957 bytes_begin[1]);
958 } /* end else */
959
960 /* Start "raw data" write timer */
961 set_time(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, TSTART);
962
963 while (nbytes_xfer < bytes_count){
964 /* Write */
965 /* Calculate offset of write within a dataset/file */
966 switch (parms->io_type) {
967 case POSIXIO:
968 /* 1D dataspace */
969 if (!parms->dim2d){
970 /* Contiguous pattern */
971 if (!parms->interleaved) {
972 /* Compute file offset */
973 file_offset = posix_file_offset + (off_t)nbytes_xfer;
974
975 /* only care if seek returns error */
976 rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
977 VRFY((rc==0), "POSIXSEEK");
978
979 /* check if all bytes are written */
980 rc = ((ssize_t)buf_size ==
981 POSIXWRITE(fd->posixfd, buffer, buf_size));
982 VRFY((rc != 0), "POSIXWRITE");
983
984 /* Advance global offset in dataset */
985 nbytes_xfer+=buf_size;
986 } /* end if */
987 /* Interleaved access pattern */
988 else {
989 /* Set the base of user's buffer */
990 buf_p=(unsigned char *)buffer;
991
992 /* Set the number of bytes to transfer this time */
993 nbytes_toxfer = buf_size;
994
995 /* Loop over the buffers to write */
996 while(nbytes_toxfer>0) {
997 /* Skip offset over blocks of other processes */
998 file_offset = posix_file_offset +
999 (off_t)(nbytes_xfer*pio_mpi_nprocs_g);
1000
1001 /* only care if seek returns error */
1002 rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
1003 VRFY((rc==0), "POSIXSEEK");
1004
1005 /* check if all bytes are written */
1006 rc = ((ssize_t)blk_size ==
1007 POSIXWRITE(fd->posixfd, buf_p, blk_size));
1008 VRFY((rc != 0), "POSIXWRITE");
1009
1010 /* Advance location in buffer */
1011 buf_p+=blk_size;
1012
1013 /* Advance global offset in dataset */
1014 nbytes_xfer+=blk_size;
1015
1016 /* Decrement number of bytes left this time */
1017 nbytes_toxfer-=blk_size;
1018 } /* end while */
1019 } /* end else */
1020 } /* end if */
1021 /* 2D dataspace */
1022 else {
1023 /* Contiguous storage */
1024 if (!parms->h5_use_chunks) {
1025 /* Contiguous access pattern */
1026 if (!parms->interleaved) {
1027 /* Compute file offset */
1028 file_offset=posix_file_offset+(off_t)(((nbytes_xfer/blk_size)
1029 /snbytes)*(blk_size*snbytes)+((nbytes_xfer/blk_size)%snbytes));
1030
1031 /* Number of bytes to be transferred per I/O operation */
1032 nbytes_xfer_advance = buf_size;
1033
1034 /* Global offset advance after each I/O operation */
1035 file_offset_advance = (off_t)snbytes;
1036 } /* end if */
1037 /* Interleaved access pattern */
1038 else {
1039 /* Compute file offset */
1040 file_offset=posix_file_offset+(off_t)((((nbytes_xfer/buf_size)
1041 *pio_mpi_nprocs_g)/snbytes)*(buf_size*snbytes)
1042 +((nbytes_xfer/buf_size)*pio_mpi_nprocs_g)%snbytes);
1043
1044 /* Number of bytes to be transferred per I/O operation */
1045 nbytes_xfer_advance = blk_size;
1046
1047 /* Global offset advance after each I/O operation */
1048 file_offset_advance = (off_t)snbytes;
1049 } /* end else */
1050 } /* end if */
1051 /* Chunked storage */
1052 else {
1053 /*Contiguous access pattern */
1054 if (!parms->interleaved) {
1055 /* Compute file offset */
1056 file_offset=posix_file_offset+(off_t)nbytes_xfer;
1057
1058 /* Number of bytes to be transferred per I/O operation */
1059 nbytes_xfer_advance = blk_size * buf_size;
1060
1061 /* Global offset advance after each I/O operation */
1062 file_offset_advance = 0;
1063 } /* end if */
1064 /*Interleaved access pattern */
1065 else {
1066 /* Compute file offset */
1067 /* Before simplification */
1068 /* file_offset=posix_file_offset+(off_t)((nbytes_xfer/(buf_size/blk_size)
1069 *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))*(buf_size/blk_size
1070 *snbytes/blk_size*(blk_size*blk_size))+((nbytes_xfer/(buf_size/blk_size))
1071 *pio_mpi_nprocs_g)%(snbytes/blk_size*(blk_size*blk_size))); */
1072
1073 file_offset=posix_file_offset+(off_t)(((nbytes_xfer/(buf_size/blk_size)
1074 *pio_mpi_nprocs_g)/(snbytes*blk_size))*(buf_size*snbytes)+((nbytes_xfer/(buf_size/blk_size))
1075 *pio_mpi_nprocs_g)%(snbytes*blk_size));
1076
1077 /* Number of bytes to be transferred per I/O operation */
1078 nbytes_xfer_advance = blk_size * blk_size;
1079
1080 /* Global offset advance after each I/O operation */
1081 /* file_offset_advance = (off_t)(snbytes/blk_size*(blk_size*blk_size)); */
1082 file_offset_advance = (off_t)(snbytes*blk_size);
1083 } /* end else */
1084 } /* end else */
1085
1086 /* Common code for file access */
1087
1088 /* Set the base of user's buffer */
1089 buf_p = (unsigned char *)buffer;
1090
1091 /* Set the number of bytes to transfer this time */
1092 nbytes_toxfer = buf_size*blk_size;
1093
1094 /* Loop over portions of the buffer to write */
1095 while(nbytes_toxfer>0){
1096 /* only care if seek returns error */
1097 rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
1098 VRFY((rc==0), "POSIXSEEK");
1099
1100 /* check if all bytes are written */
1101 rc = ((ssize_t)nbytes_xfer_advance ==
1102 POSIXWRITE(fd->posixfd, buf_p, nbytes_xfer_advance));
1103 VRFY((rc != 0), "POSIXWRITE");
1104
1105 /* Advance location in buffer */
1106 buf_p+=nbytes_xfer_advance;
1107
1108 /* Advance global offset in dataset */
1109 nbytes_xfer+=nbytes_xfer_advance;
1110
1111 /* Decrement number of bytes left this time */
1112 nbytes_toxfer-=nbytes_xfer_advance;
1113
1114 /* Partially advance file offset */
1115 file_offset+=file_offset_advance;
1116 } /* end while */
1117
1118 } /* end else */
1119
1120 break;
1121
1122 case MPIO:
1123 /* 1D dataspace */
1124 if (!parms->dim2d){
1125 /* Independent file access */
1126 if(!parms->collective) {
1127 /* Contiguous pattern */
1128 if (!parms->interleaved){
1129 /* Compute offset in file */
1130 mpi_offset = mpi_file_offset +
1131 nbytes_xfer;
1132
1133 /* Perform independent write */
1134 mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buffer,
1135 (int)(buf_size/blk_size), mpi_blk_type,
1136 &mpi_status);
1137 VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
1138
1139 /* Advance global offset in dataset */
1140 nbytes_xfer+=buf_size;
1141 } /* end if */
1142 /* Interleaved access pattern */
1143 else {
1144 /* Set the base of user's buffer */
1145 buf_p=(unsigned char *)buffer;
1146
1147 /* Set the number of bytes to transfer this time */
1148 nbytes_toxfer = buf_size;
1149
1150 /* Loop over the buffers to write */
1151 while(nbytes_toxfer>0) {
1152 /* Skip offset over blocks of other processes */
1153 mpi_offset = mpi_file_offset +
1154 (nbytes_xfer*pio_mpi_nprocs_g);
1155
1156 /* Perform independent write */
1157 mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buf_p,
1158 (int)1, mpi_blk_type, &mpi_status);
1159 VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
1160
1161 /* Advance location in buffer */
1162 buf_p+=blk_size;
1163
1164 /* Advance global offset in dataset */
1165 nbytes_xfer+=blk_size;
1166
1167 /* Decrement number of bytes left this time */
1168 nbytes_toxfer-=blk_size;
1169 } /* end while */
1170 } /* end else */
1171 } /* end if */
1172 /* Collective file access */
1173 else {
1174 /* Contiguous access pattern */
1175 if (!parms->interleaved){
1176 /* Compute offset in file */
1177 mpi_offset = mpi_file_offset +
1178 nbytes_xfer;
1179
1180 /* Perform independent write */
1181 mrc = MPI_File_write_at_all(fd->mpifd, mpi_offset, buffer,
1182 (int)(buf_size/blk_size), mpi_blk_type, &mpi_status);
1183 VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
1184
1185 /* Advance global offset in dataset */
1186 nbytes_xfer+=buf_size;
1187 } /* end if */
1188 /* Interleaved access pattern */
1189 else {
1190 /* Compute offset in file */
1191 mpi_offset = mpi_file_offset +
1192 (nbytes_xfer*pio_mpi_nprocs_g);
1193
1194 /* Set the file view */
1195 mrc = MPI_File_set_view(fd->mpifd, mpi_offset, mpi_blk_type,
1196 mpi_file_type, (char*)"native", h5_io_info_g);
1197 VRFY((mrc==MPI_SUCCESS), "MPIO_VIEW");
1198
1199 /* Perform write */
1200 mrc = MPI_File_write_at_all(fd->mpifd, 0, buffer,
1201 (int)(buf_size/blk_size), mpi_blk_type, &mpi_status);
1202 VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
1203
1204 /* Advance global offset in dataset */
1205 nbytes_xfer+=buf_size;
1206 } /* end else */
1207 } /* end else */
1208 } /* end if */
1209 /* 2D dataspace */
1210 else {
1211 /* Contiguous storage */
1212 if (!parms->h5_use_chunks) {
1213 /* Contiguous access pattern */
1214 if (!parms->interleaved) {
1215 /* Compute offset in file */
1216 mpi_offset=mpi_file_offset+((nbytes_xfer/blk_size)/snbytes)*
1217 (blk_size*snbytes)+((nbytes_xfer/blk_size)%snbytes);
1218
1219 /* Number of bytes to be transferred per I/O operation */
1220 nbytes_xfer_advance = buf_size;
1221
1222 /* Global offset advance after each I/O operation */
1223 mpi_offset_advance = snbytes;
1224
1225 /* MPI type to be used for collective access */
1226 mpi_collective_type = mpi_cont_type;
1227 } /* end if */
1228 /* Interleaved access pattern */
1229 else {
1230 /* Compute offset in file */
1231 mpi_offset=mpi_file_offset+(((nbytes_xfer/buf_size)*pio_mpi_nprocs_g)/snbytes)*
1232 (buf_size*snbytes)+((nbytes_xfer/buf_size)*pio_mpi_nprocs_g)%snbytes;
1233
1234 /* Number of bytes to be transferred per I/O operation */
1235 nbytes_xfer_advance = blk_size;
1236
1237 /* Global offset advance after each I/O operation */
1238 mpi_offset_advance = snbytes;
1239
1240 /* MPI type to be used for collective access */
1241 mpi_collective_type = mpi_inter_type;
1242 } /* end else */
1243 } /* end if */
1244 /* Chunked storage */
1245 else {
1246 /*Contiguous access pattern */
1247 if (!parms->interleaved) {
1248 /* Compute offset in file */
1249 mpi_offset=mpi_file_offset+nbytes_xfer;
1250
1251 /* Number of bytes to be transferred per I/O operation */
1252 nbytes_xfer_advance = blk_size * buf_size;
1253
1254 /* Global offset advance after each I/O operation */
1255 mpi_offset_advance = 0;
1256
1257 /* MPI type to be used for collective access */
1258 mpi_collective_type = mpi_full_buffer;
1259 } /* end if */
1260 /*Interleaved access pattern */
1261 else {
1262 /* Compute offset in file */
1263 /* Before simplification */
1264 /* mpi_offset=mpi_file_offset+(nbytes_xfer/(buf_size/blk_size)
1265 *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))*
1266 (buf_size/blk_size*snbytes/blk_size*(blk_size*blk_size))+
1267 ((nbytes_xfer/(buf_size/blk_size))*pio_mpi_nprocs_g)%(snbytes
1268 /blk_size*(blk_size*blk_size)); */
1269 mpi_offset=mpi_file_offset+((nbytes_xfer/(buf_size/blk_size)
1270 *pio_mpi_nprocs_g)/(snbytes*blk_size))*(buf_size*snbytes)
1271 +((nbytes_xfer/(buf_size/blk_size))*pio_mpi_nprocs_g)%(snbytes*blk_size);
1272
1273 /* Number of bytes to be transferred per I/O operation */
1274 nbytes_xfer_advance = blk_size * blk_size;
1275
1276 /* Global offset advance after each I/O operation */
1277 /* mpi_offset_advance = (MPI_Offset)(snbytes/blk_size*(blk_size*blk_size)); */
1278 mpi_offset_advance = (MPI_Offset)(snbytes*blk_size);
1279
1280 /* MPI type to be used for collective access */
1281 mpi_collective_type = mpi_chunk_inter_type;
1282 } /* end else */
1283 } /* end else */
1284
1285 /* Common code for independent file access */
1286 if (!parms->collective) {
1287 /* Set the base of user's buffer */
1288 buf_p = (unsigned char *)buffer;
1289
1290 /* Set the number of bytes to transfer this time */
1291 nbytes_toxfer = buf_size * blk_size;
1292
1293 /* Loop over portions of the buffer to write */
1294 while(nbytes_toxfer>0){
1295 /* Perform independent write */
1296 mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buf_p,
1297 (int)nbytes_xfer_advance, MPI_BYTE, &mpi_status);
1298 VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
1299
1300 /* Advance location in buffer */
1301 buf_p+=nbytes_xfer_advance;
1302
1303 /* Advance global offset in dataset */
1304 nbytes_xfer+=nbytes_xfer_advance;
1305
1306 /* Decrement number of bytes left this time */
1307 nbytes_toxfer-=nbytes_xfer_advance;
1308
1309 /* Partially advance global offset in dataset */
1310 mpi_offset+=mpi_offset_advance;
1311 } /* end while */
1312 } /* end if */
1313
1314 /* Common code for collective file access */
1315 else {
1316 /* Set the file view */
1317 mrc = MPI_File_set_view(fd->mpifd, mpi_offset, MPI_BYTE,
1318 mpi_collective_type, (char *)"native", h5_io_info_g);
1319 VRFY((mrc==MPI_SUCCESS), "MPIO_VIEW");
1320
1321 /* Perform write */
1322 MPI_File_write_at_all(fd->mpifd, 0, buffer,(int)(buf_size*blk_size),
1323 MPI_BYTE, &mpi_status);
1324 VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
1325
1326 /* Advance global offset in dataset */
1327 nbytes_xfer+=buf_size*blk_size;
1328 } /* end else */
1329
1330 } /* end else */
1331
1332 break;
1333
1334 case PHDF5:
1335 /* 1D dataspace */
1336 if (!parms->dim2d){
1337 /* Set up the file dset space id to move the selection to process */
1338 if (!parms->interleaved){
1339 /* Contiguous pattern */
1340 h5offset[0] = nbytes_xfer;
1341 } /* end if */
1342 else {
1343 /* Interleaved access pattern */
1344 /* Skip offset over blocks of other processes */
1345 h5offset[0] = (nbytes_xfer*pio_mpi_nprocs_g);
1346 } /* end else */
1347 hrc = H5Soffset_simple(h5dset_space_id, h5offset);
1348 VRFY((hrc >= 0), "H5Soffset_simple");
1349
1350 /* Write the buffer out */
1351 hrc = H5Dwrite(h5ds_id, ELMT_H5_TYPE, h5mem_space_id,
1352 h5dset_space_id, h5dxpl, buffer);
1353 VRFY((hrc >= 0), "H5Dwrite");
1354
1355 /* Increment number of bytes transferred */
1356 nbytes_xfer += buf_size;
1357 } /* end if */
1358 /* 2D dataspace */
1359 else {
1360 /* Set up the file dset space id to move the selection to process */
1361 if (!parms->interleaved){
1362 /* Contiguous pattern */
1363 h5offset[0] = (nbytes_xfer/(snbytes*blk_size))*blk_size;
1364 h5offset[1] = (nbytes_xfer%(snbytes*blk_size))/blk_size;
1365
1366 } /* end if */
1367 else {
1368 /* Interleaved access pattern */
1369 /* Skip offset over blocks of other processes */
1370 h5offset[0] = ((nbytes_xfer*pio_mpi_nprocs_g)/(snbytes*buf_size))*buf_size;
1371 h5offset[1] = ((nbytes_xfer*pio_mpi_nprocs_g)%(snbytes*buf_size))/buf_size;
1372
1373 } /* end else */
1374 hrc = H5Soffset_simple(h5dset_space_id, h5offset);
1375 VRFY((hrc >= 0), "H5Soffset_simple");
1376
1377 /* Write the buffer out */
1378 hrc = H5Dwrite(h5ds_id, ELMT_H5_TYPE, h5mem_space_id,
1379 h5dset_space_id, h5dxpl, buffer);
1380 VRFY((hrc >= 0), "H5Dwrite");
1381
1382 /* Increment number of bytes transferred */
1383 nbytes_xfer += buf_size*blk_size;
1384
1385 } /* end else */
1386
1387 break;
1388 } /* switch (parms->io_type) */
1389 } /* end while */
1390
1391 /* Stop "raw data" write timer */
1392 set_time(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, TSTOP);
1393
1394 /* Calculate write time */
1395
1396 /* Close dataset. Only HDF5 needs to do an explicit close. */
1397 if (parms->io_type == PHDF5) {
1398 hrc = H5Dclose(h5ds_id);
1399
1400 if (hrc < 0) {
1401 HDfprintf(stderr, "HDF5 Dataset Close failed\n");
1402 GOTOERROR(FAIL);
1403 }
1404
1405 h5ds_id = -1;
1406 } /* end if */
1407 } /* end for */
1408
1409 done:
1410 /* release MPI-I/O objects */
1411 if (parms->io_type == MPIO) {
1412 /* 1D dataspace */
1413 if (!parms->dim2d){
1414 /* Free file type */
1415 mrc = MPI_Type_free( &mpi_file_type );
1416 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
1417
1418 /* Free buffer type */
1419 mrc = MPI_Type_free( &mpi_blk_type );
1420 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
1421 } /* end if */
1422 /* 2D dataspace */
1423 else {
1424 /* Free partial buffer type for contiguous access */
1425 mrc = MPI_Type_free( &mpi_partial_buffer_cont );
1426 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
1427
1428 /* Free contiguous file type */
1429 mrc = MPI_Type_free( &mpi_cont_type );
1430 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
1431
1432 /* Free partial buffer type for interleaved access */
1433 mrc = MPI_Type_free( &mpi_partial_buffer_inter );
1434 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
1435
1436 /* Free interleaved file type */
1437 mrc = MPI_Type_free( &mpi_inter_type );
1438 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
1439
1440 /* Free full buffer type */
1441 mrc = MPI_Type_free(&mpi_full_buffer);
1442 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
1443
1444 /* Free full chunk type */
1445 mrc = MPI_Type_free(&mpi_full_chunk);
1446 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
1447
1448 /* Free chunk interleaved file type */
1449 mrc = MPI_Type_free(&mpi_chunk_inter_type);
1450 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
1451 } /* end else */
1452 } /* end if */
1453
1454 /* release HDF5 objects */
1455 if (h5dset_space_id != -1) {
1456 hrc = H5Sclose(h5dset_space_id);
1457 if (hrc < 0){
1458 HDfprintf(stderr, "HDF5 Dataset Space Close failed\n");
1459 ret_code = FAIL;
1460 } else {
1461 h5dset_space_id = -1;
1462 }
1463 }
1464
1465 if (h5mem_space_id != -1) {
1466 hrc = H5Sclose(h5mem_space_id);
1467 if (hrc < 0) {
1468 HDfprintf(stderr, "HDF5 Memory Space Close failed\n");
1469 ret_code = FAIL;
1470 } else {
1471 h5mem_space_id = -1;
1472 }
1473 }
1474
1475 if (h5dxpl != -1) {
1476 hrc = H5Pclose(h5dxpl);
1477 if (hrc < 0) {
1478 HDfprintf(stderr, "HDF5 Dataset Transfer Property List Close failed\n");
1479 ret_code = FAIL;
1480 } else {
1481 h5dxpl = -1;
1482 }
1483 }
1484
1485 return ret_code;
1486 }
1487
1488 /*
1489 * Function: do_read
1490 * Purpose: read the required amount of data from the file.
1491 * Return: SUCCESS or FAIL
1492 * Programmer: Albert Cheng 2001/12/13
1493 * Modifications:
1494 * Added 2D testing (Christian Chilan, 10. August 2005)
1495 */
1496 static herr_t
do_read(results * res,file_descr * fd,parameters * parms,long ndsets,off_t nbytes,size_t buf_size,void * buffer)1497 do_read(results *res, file_descr *fd, parameters *parms, long ndsets,
1498 off_t nbytes, size_t buf_size, void *buffer /*out*/)
1499 {
1500 int ret_code = SUCCESS;
1501 int rc; /*routine return code */
1502 long ndset;
1503 size_t blk_size; /* The block size to subdivide the xfer buffer into */
1504 size_t bsize; /* Size of the actual buffer */
1505 off_t nbytes_xfer; /* Total number of bytes transferred so far */
1506 size_t nbytes_xfer_advance; /* Number of bytes transferred in a single I/O operation */
1507 size_t nbytes_toxfer; /* Number of bytes to transfer a particular time */
1508 char dname[64];
1509 off_t dset_offset=0; /*dataset offset in a file */
1510 off_t bytes_begin[2]; /*first elmt this process transfer */
1511 off_t bytes_count; /*number of elmts this process transfer */
1512 off_t snbytes=0; /*size of a side of the dataset square */
1513 unsigned char *buf_p; /* Current buffer pointer */
1514
1515 /* POSIX variables */
1516 off_t file_offset; /* File offset of the next transfer */
1517 off_t file_offset_advance; /* File offset advance after each I/O operation */
1518 off_t posix_file_offset; /* Base file offset of the next transfer */
1519
1520 /* MPI variables */
1521 MPI_Offset mpi_file_offset;/* Base file offset of the next transfer*/
1522 MPI_Offset mpi_offset; /* Offset in MPI file */
1523 MPI_Offset mpi_offset_advance; /* Offset advance after each I/O operation */
1524 MPI_Datatype mpi_file_type; /* MPI derived type for 1D file */
1525 MPI_Datatype mpi_blk_type; /* MPI derived type for 1D buffer */
1526 MPI_Datatype mpi_cont_type; /* MPI derived type for 2D contiguous file */
1527 MPI_Datatype mpi_partial_buffer_cont; /* MPI derived type for partial 2D contiguous buffer */
1528 MPI_Datatype mpi_inter_type; /* MPI derived type for 2D interleaved file */
1529 MPI_Datatype mpi_partial_buffer_inter; /* MPI derived type for partial 2D interleaved buffer */
1530 MPI_Datatype mpi_full_buffer; /* MPI derived type for 2D full buffer */
1531 MPI_Datatype mpi_full_chunk; /* MPI derived type for 2D full chunk */
1532 MPI_Datatype mpi_chunk_inter_type; /* MPI derived type for 2D chunk interleaved file */
1533 MPI_Datatype mpi_collective_type; /* Generic MPI derived type for 2D collective access */
1534 MPI_Status mpi_status;
1535 int mrc; /* MPI return code */
1536
1537 /* HDF5 variables */
1538 herr_t hrc; /*HDF5 return code */
1539 hsize_t h5dims[2]; /*dataset dim sizes */
1540 hid_t h5dset_space_id = -1; /*dataset space ID */
1541 hid_t h5mem_space_id = -1; /*memory dataspace ID */
1542 hid_t h5ds_id = -1; /*dataset handle */
1543 hsize_t h5block[2]; /*dataspace selection */
1544 hsize_t h5stride[2];
1545 hsize_t h5count[2];
1546 hsize_t h5start[2];
1547 hssize_t h5offset[2]; /* Selection offset within dataspace */
1548 hid_t h5dxpl = -1; /* Dataset transfer property list */
1549
1550 /* Get the parameters from the parameter block */
1551 blk_size=parms->blk_size;
1552
1553 /* There are two kinds of transfer patterns, contiguous and interleaved.
1554 * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n
1555 * where n is rank of the last process.
1556 * In contiguous pattern, data are accessed as
1557 * 000...111...222...nnn...
1558 * In interleaved pattern, data are accessed as
1559 * 012...n012...n...
1560 * These are all in the scope of one dataset.
1561 */
1562
1563 /* 1D dataspace */
1564 if (!parms->dim2d){
1565 bsize = buf_size;
1566 /* Contiguous Pattern: */
1567 if (!parms->interleaved) {
1568 bytes_begin[0] = (off_t)(((double)nbytes*pio_mpi_rank_g)/pio_mpi_nprocs_g);
1569 } /* end if */
1570 /* Interleaved Pattern: */
1571 else {
1572 bytes_begin[0] = (off_t)(blk_size*pio_mpi_rank_g);
1573 } /* end else */
1574 }/* end if */
1575 /* 2D dataspace */
1576 else {
1577 /* nbytes is always the number of bytes per dataset (1D or 2D). If the
1578 dataspace is 2D, snbytes is the size of a side of the 'dataset square'.
1579 */
1580 snbytes = (off_t)sqrt(nbytes);
1581
1582 bsize = buf_size * blk_size;
1583
1584 /* Contiguous Pattern: */
1585 if (!parms->interleaved) {
1586 bytes_begin[0] = (off_t)((double)snbytes*pio_mpi_rank_g / pio_mpi_nprocs_g);
1587 bytes_begin[1] = 0;
1588 } /* end if */
1589 /* Interleaved Pattern: */
1590 else {
1591 bytes_begin[0] = 0;
1592
1593 if (!parms->h5_use_chunks || parms->io_type==PHDF5)
1594 bytes_begin[1] = (off_t)(blk_size*pio_mpi_rank_g);
1595 else
1596 bytes_begin[1] = (off_t)(blk_size*blk_size*pio_mpi_rank_g);
1597 } /* end else */
1598 } /* end else */
1599
1600 /* Calculate the total number of bytes (bytes_count) to be
1601 * transferred by this process. It may be different for different
1602 * transfer pattern due to rounding to integral values.
1603 */
1604 /*
1605 * Calculate the beginning bytes of this process and the next.
1606 * bytes_count is the difference between these two beginnings.
1607 * This way, it eliminates any rounding errors.
1608 * (This is tricky, don't mess with the formula, rounding errors
1609 * can easily get introduced) */
1610 bytes_count = (off_t)(((double)nbytes*(pio_mpi_rank_g+1)) / pio_mpi_nprocs_g)
1611 - (off_t)(((double)nbytes*pio_mpi_rank_g) / pio_mpi_nprocs_g);
1612
1613 /* debug */
1614 if (pio_debug_level >= 4) {
1615 HDprint_rank(output);
1616 if (!parms->dim2d) {
1617 HDfprintf(output, "Debug(do_write): "
1618 "buf_size=%zu, bytes_begin=%" H5_PRINTF_LL_WIDTH "d, bytes_count=%" H5_PRINTF_LL_WIDTH "d\n",
1619 buf_size, (long long)bytes_begin[0],
1620 (long long)bytes_count);
1621 } else {
1622 HDfprintf(output, "Debug(do_write): "
1623 "linear buf_size=%zu, bytes_begin=(%" H5_PRINTF_LL_WIDTH "d,%" H5_PRINTF_LL_WIDTH "d), bytes_count=%" H5_PRINTF_LL_WIDTH "d\n",
1624 buf_size*blk_size, (long long)bytes_begin[0],
1625 (long long)bytes_begin[1], (long long)bytes_count);
1626 }
1627 }
1628
1629 /* I/O Access specific setup */
1630 switch (parms->io_type) {
1631 case POSIXIO:
1632 /* No extra setup */
1633 break;
1634
1635 case MPIO: /* MPI-I/O setup */
1636 /* 1D dataspace */
1637 if (!parms->dim2d){
1638 /* Build block's derived type */
1639 mrc = MPI_Type_contiguous((int)blk_size,
1640 MPI_BYTE, &mpi_blk_type);
1641 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
1642
1643 /* Build file's derived type */
1644 mrc = MPI_Type_vector((int)(buf_size/blk_size), (int)1,
1645 (int)pio_mpi_nprocs_g, mpi_blk_type, &mpi_file_type);
1646 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
1647
1648 /* Commit file type */
1649 mrc = MPI_Type_commit( &mpi_file_type );
1650 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
1651
1652 /* Commit buffer type */
1653 mrc = MPI_Type_commit( &mpi_blk_type );
1654 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
1655 } /* end if */
1656 /* 2D dataspace */
1657 else {
1658 /* Build partial buffer derived type for contiguous access */
1659 mrc = MPI_Type_contiguous((int)buf_size, MPI_BYTE,
1660 &mpi_partial_buffer_cont);
1661 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
1662
1663 /* Commit partial buffer derived type */
1664 mrc = MPI_Type_commit(&mpi_partial_buffer_cont);
1665 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
1666
1667 /* Build contiguous file's derived type */
1668 mrc = MPI_Type_vector((int)blk_size, (int)1, (int)(snbytes/buf_size),
1669 mpi_partial_buffer_cont, &mpi_cont_type);
1670 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
1671
1672 /* Commit contiguous file type */
1673 mrc = MPI_Type_commit(&mpi_cont_type);
1674 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
1675
1676 /* Build partial buffer derived type for interleaved access */
1677 mrc = MPI_Type_contiguous((int)blk_size, MPI_BYTE,
1678 &mpi_partial_buffer_inter);
1679 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
1680
1681 /* Commit partial buffer derived type */
1682 mrc = MPI_Type_commit(&mpi_partial_buffer_inter);
1683 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
1684
1685 /* Build interleaved file's derived type */
1686 mrc = MPI_Type_vector((int)buf_size, (int)1, (int)(snbytes/blk_size),
1687 mpi_partial_buffer_inter, &mpi_inter_type);
1688 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
1689
1690 /* Commit interleaved file type */
1691 mrc = MPI_Type_commit(&mpi_inter_type);
1692 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
1693
1694 /* Build full buffer derived type */
1695 mrc = MPI_Type_contiguous((int)(blk_size*buf_size), MPI_BYTE,
1696 &mpi_full_buffer);
1697 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
1698
1699 /* Commit full buffer derived type */
1700 mrc = MPI_Type_commit(&mpi_full_buffer);
1701 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
1702
1703 /* Build full chunk derived type */
1704 mrc = MPI_Type_contiguous((int)(blk_size*blk_size), MPI_BYTE,
1705 &mpi_full_chunk);
1706 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
1707
1708 /* Commit full chunk derived type */
1709 mrc = MPI_Type_commit(&mpi_full_chunk);
1710 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
1711
1712 /* Build chunk interleaved file's derived type */
1713 mrc = MPI_Type_vector((int)(buf_size/blk_size), (int)1, (int)(snbytes/blk_size),
1714 mpi_full_chunk, &mpi_chunk_inter_type);
1715 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
1716
1717 /* Commit chunk interleaved file type */
1718 mrc = MPI_Type_commit(&mpi_chunk_inter_type);
1719 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
1720 } /* end else */
1721 break;
1722
1723 case PHDF5: /* HDF5 setup */
1724 /* 1D dataspace */
1725 if (!parms->dim2d){
1726 if(nbytes>0) {
1727 /* define a contiguous dataset of nbytes native bytes */
1728 h5dims[0] = nbytes;
1729 h5dset_space_id = H5Screate_simple(1, h5dims, NULL);
1730 VRFY((h5dset_space_id >= 0), "H5Screate_simple");
1731
1732 /* Set up the file dset space id to select the pattern to access */
1733 if (!parms->interleaved){
1734 /* Contiguous pattern */
1735 h5start[0] = bytes_begin[0];
1736 h5stride[0] = h5block[0] = blk_size;
1737 h5count[0] = buf_size/blk_size;
1738 } /* end if */
1739 else {
1740 /* Interleaved access pattern */
1741 /* Skip offset over blocks of other processes */
1742 h5start[0] = bytes_begin[0];
1743 h5stride[0] = blk_size*pio_mpi_nprocs_g;
1744 h5block[0] = blk_size;
1745 h5count[0] = buf_size/blk_size;
1746 } /* end else */
1747 hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET,
1748 h5start, h5stride, h5count, h5block);
1749 VRFY((hrc >= 0), "H5Sselect_hyperslab");
1750 } /* end if */
1751 else {
1752 h5dset_space_id = H5Screate(H5S_SCALAR);
1753 VRFY((h5dset_space_id >= 0), "H5Screate");
1754 } /* end else */
1755
1756 /* Create the memory dataspace that corresponds to the xfer buffer */
1757 if(buf_size>0) {
1758 h5dims[0] = buf_size;
1759 h5mem_space_id = H5Screate_simple(1, h5dims, NULL);
1760 VRFY((h5mem_space_id >= 0), "H5Screate_simple");
1761 } /* end if */
1762 else {
1763 h5mem_space_id = H5Screate(H5S_SCALAR);
1764 VRFY((h5mem_space_id >= 0), "H5Screate");
1765 } /* end else */
1766 } /* end if */
1767 /* 2D dataspace */
1768 else {
1769 if(nbytes>0) {
1770 /* define a contiguous dataset of nbytes native bytes */
1771 h5dims[0] = snbytes;
1772 h5dims[1] = snbytes;
1773 h5dset_space_id = H5Screate_simple(2, h5dims, NULL);
1774 VRFY((h5dset_space_id >= 0), "H5Screate_simple");
1775
1776 /* Set up the file dset space id to select the pattern to access */
1777 if (!parms->interleaved){
1778 /* Contiguous pattern */
1779 h5start[0] = bytes_begin[0];
1780 h5start[1] = bytes_begin[1];
1781 h5stride[0] = 1;
1782 h5stride[1] = h5block[0] = h5block[1] = blk_size;
1783 h5count[0] = 1;
1784 h5count[1] = buf_size/blk_size;
1785 } /* end if */
1786 else {
1787 /* Interleaved access pattern */
1788 /* Skip offset over blocks of other processes */
1789 h5start[0] = bytes_begin[0];
1790 h5start[1] = bytes_begin[1];
1791 h5stride[0] = blk_size;
1792 h5stride[1] = blk_size*pio_mpi_nprocs_g;
1793 h5block[0] = h5block[1] = blk_size;
1794 h5count[0] = buf_size/blk_size;
1795 h5count[1] = 1;
1796 } /* end else */
1797 hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET,
1798 h5start, h5stride, h5count, h5block);
1799 VRFY((hrc >= 0), "H5Sselect_hyperslab");
1800 } /* end if */
1801 else {
1802 h5dset_space_id = H5Screate(H5S_SCALAR);
1803 VRFY((h5dset_space_id >= 0), "H5Screate");
1804 } /* end else */
1805
1806 /* Create the memory dataspace that corresponds to the xfer buffer */
1807 if(buf_size>0) {
1808 if (!parms->interleaved){
1809 h5dims[0] = blk_size;
1810 h5dims[1] = buf_size;
1811 }else{
1812 h5dims[0] = buf_size;
1813 h5dims[1] = blk_size;
1814 }
1815 h5mem_space_id = H5Screate_simple(2, h5dims, NULL);
1816 VRFY((h5mem_space_id >= 0), "H5Screate_simple");
1817 } /* end if */
1818 else {
1819 h5mem_space_id = H5Screate(H5S_SCALAR);
1820 VRFY((h5mem_space_id >= 0), "H5Screate");
1821 } /* end else */
1822 } /* end else */
1823
1824 /* Create the dataset transfer property list */
1825 h5dxpl = H5Pcreate(H5P_DATASET_XFER);
1826 if (h5dxpl < 0) {
1827 HDfprintf(stderr, "HDF5 Property List Create failed\n");
1828 GOTOERROR(FAIL);
1829 }
1830
1831 /* Change to collective I/O, if asked */
1832 if(parms->collective) {
1833 hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE);
1834 if (hrc < 0) {
1835 HDfprintf(stderr, "HDF5 Property List Set failed\n");
1836 GOTOERROR(FAIL);
1837 } /* end if */
1838 } /* end if */
1839 break;
1840 } /* end switch */
1841
1842 for (ndset = 1; ndset <= ndsets; ++ndset) {
1843
1844 /* Calculate dataset offset within a file */
1845
1846 /* create dataset */
1847 switch (parms->io_type) {
1848 case POSIXIO:
1849 case MPIO:
1850 /* both posix and mpi io just need dataset offset in file*/
1851 dset_offset = (ndset - 1) * nbytes;
1852 break;
1853
1854 case PHDF5:
1855 HDsprintf(dname, "Dataset_%ld", ndset);
1856 h5ds_id = H5DOPEN(fd->h5fd, dname);
1857 if (h5ds_id < 0) {
1858 HDfprintf(stderr, "HDF5 Dataset open failed\n");
1859 GOTOERROR(FAIL);
1860 }
1861
1862 break;
1863 }
1864
1865 /* The task is to transfer bytes_count bytes, starting at
1866 * bytes_begin position, using transfer buffer of buf_size bytes.
1867 * If interleaved, select buf_size at a time, in round robin
1868 * fashion, according to number of process. Otherwise, select
1869 * all bytes_count in contiguous.
1870 */
1871 nbytes_xfer = 0 ;
1872
1873 /* 1D dataspace */
1874 if (!parms->dim2d){
1875 /* Set base file offset for all I/O patterns and POSIX access */
1876 posix_file_offset = dset_offset + bytes_begin[0];
1877
1878 /* Set base file offset for all I/O patterns and MPI access */
1879 mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin[0]);
1880 } /* end if */
1881 else {
1882 /* Set base file offset for all I/O patterns and POSIX access */
1883 posix_file_offset=dset_offset + bytes_begin[0]*snbytes+
1884 bytes_begin[1];
1885
1886 /* Set base file offset for all I/O patterns and MPI access */
1887 mpi_file_offset=(MPI_Offset)(dset_offset + bytes_begin[0]*snbytes+
1888 bytes_begin[1]);
1889 } /* end else */
1890
1891 /* Start "raw data" read timer */
1892 set_time(res->timers, HDF5_RAW_READ_FIXED_DIMS, TSTART);
1893
1894 while (nbytes_xfer < bytes_count){
1895 /* Read */
1896 /* Calculate offset of read within a dataset/file */
1897 switch (parms->io_type) {
1898 case POSIXIO:
1899 /* 1D dataspace */
1900 if (!parms->dim2d){
1901 /* Contiguous pattern */
1902 if (!parms->interleaved) {
1903 /* Compute file offset */
1904 file_offset = posix_file_offset + (off_t)nbytes_xfer;
1905
1906 /* only care if seek returns error */
1907 rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
1908 VRFY((rc==0), "POSIXSEEK");
1909
1910 /* check if all bytes are read */
1911 rc = ((ssize_t)buf_size ==
1912 POSIXREAD(fd->posixfd, buffer, buf_size));
1913 VRFY((rc != 0), "POSIXREAD");
1914
1915 /* Advance global offset in dataset */
1916 nbytes_xfer+=buf_size;
1917 } /* end if */
1918 /* Interleaved access pattern */
1919 else {
1920 /* Set the base of user's buffer */
1921 buf_p=(unsigned char *)buffer;
1922
1923 /* Set the number of bytes to transfer this time */
1924 nbytes_toxfer = buf_size;
1925
1926 /* Loop over the buffers to read */
1927 while(nbytes_toxfer>0) {
1928 /* Skip offset over blocks of other processes */
1929 file_offset = posix_file_offset +
1930 (off_t)(nbytes_xfer*pio_mpi_nprocs_g);
1931
1932 /* only care if seek returns error */
1933 rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
1934 VRFY((rc==0), "POSIXSEEK");
1935
1936 /* check if all bytes are read */
1937 rc = ((ssize_t)blk_size ==
1938 POSIXREAD(fd->posixfd, buf_p, blk_size));
1939 VRFY((rc != 0), "POSIXREAD");
1940
1941 /* Advance location in buffer */
1942 buf_p+=blk_size;
1943
1944 /* Advance global offset in dataset */
1945 nbytes_xfer+=blk_size;
1946
1947 /* Decrement number of bytes left this time */
1948 nbytes_toxfer-=blk_size;
1949 } /* end while */
1950 } /* end else */
1951 } /* end if */
1952 /* 2D dataspace */
1953 else {
1954 /* Contiguous storage */
1955 if (!parms->h5_use_chunks) {
1956 /* Contiguous access pattern */
1957 if (!parms->interleaved) {
1958 /* Compute file offset */
1959 file_offset=posix_file_offset+(off_t)(((nbytes_xfer/blk_size)
1960 /snbytes)*(blk_size*snbytes)+((nbytes_xfer/blk_size)%snbytes));
1961
1962 /* Number of bytes to be transferred per I/O operation */
1963 nbytes_xfer_advance = buf_size;
1964
1965 /* Global offset advance after each I/O operation */
1966 file_offset_advance = (off_t)snbytes;
1967 } /* end if */
1968 /* Interleaved access pattern */
1969 else {
1970 /* Compute file offset */
1971 file_offset=posix_file_offset+(off_t)((((nbytes_xfer/buf_size)
1972 *pio_mpi_nprocs_g)/snbytes)*(buf_size*snbytes)
1973 +((nbytes_xfer/buf_size)*pio_mpi_nprocs_g)%snbytes);
1974
1975 /* Number of bytes to be transferred per I/O operation */
1976 nbytes_xfer_advance = blk_size;
1977
1978 /* Global offset advance after each I/O operation */
1979 file_offset_advance = (off_t)snbytes;
1980 } /* end else */
1981 } /* end if */
1982 /* Chunked storage */
1983 else {
1984 /*Contiguous access pattern */
1985 if (!parms->interleaved) {
1986 /* Compute file offset */
1987 file_offset=posix_file_offset+(off_t)nbytes_xfer;
1988
1989 /* Number of bytes to be transferred per I/O operation */
1990 nbytes_xfer_advance = blk_size * buf_size;
1991
1992 /* Global offset advance after each I/O operation */
1993 file_offset_advance = 0;
1994 } /* end if */
1995 /*Interleaved access pattern */
1996 else {
1997 /* Compute file offset */
1998 /* Before simplification */
1999 /* file_offset=posix_file_offset+(off_t)((nbytes_xfer/(buf_size/blk_size)
2000 *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))*(buf_size/blk_size
2001 *snbytes/blk_size*(blk_size*blk_size))+((nbytes_xfer/(buf_size/blk_size))
2002 *pio_mpi_nprocs_g)%(snbytes/blk_size*(blk_size*blk_size))); */
2003
2004 file_offset=posix_file_offset+(off_t)(((nbytes_xfer/(buf_size/blk_size)
2005 *pio_mpi_nprocs_g)/(snbytes*blk_size))*(buf_size*snbytes)+((nbytes_xfer/(buf_size/blk_size))
2006 *pio_mpi_nprocs_g)%(snbytes*blk_size));
2007
2008 /* Number of bytes to be transferred per I/O operation */
2009 nbytes_xfer_advance = blk_size * blk_size;
2010
2011 /* Global offset advance after each I/O operation */
2012 /* file_offset_advance = (off_t)(snbytes/blk_size*(blk_size*blk_size)); */
2013 file_offset_advance = (off_t)(snbytes*blk_size);
2014 } /* end else */
2015 } /* end else */
2016
2017 /* Common code for file access */
2018
2019 /* Set the base of user's buffer */
2020 buf_p = (unsigned char *)buffer;
2021
2022 /* Set the number of bytes to transfer this time */
2023 nbytes_toxfer = buf_size*blk_size;
2024
2025 /* Loop over portions of the buffer to read */
2026 while(nbytes_toxfer>0){
2027 /* only care if seek returns error */
2028 rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
2029 VRFY((rc==0), "POSIXSEEK");
2030
2031 /* check if all bytes are read */
2032 rc = ((ssize_t)nbytes_xfer_advance ==
2033 POSIXREAD(fd->posixfd, buf_p, nbytes_xfer_advance));
2034 VRFY((rc != 0), "POSIXREAD");
2035
2036 /* Advance location in buffer */
2037 buf_p+=nbytes_xfer_advance;
2038
2039 /* Advance global offset in dataset */
2040 nbytes_xfer+=nbytes_xfer_advance;
2041
2042 /* Decrement number of bytes left this time */
2043 nbytes_toxfer-=nbytes_xfer_advance;
2044
2045 /* Partially advance file offset */
2046 file_offset+=file_offset_advance;
2047 } /* end while */
2048
2049 } /* end else */
2050 break;
2051
2052 case MPIO:
2053 /* 1D dataspace */
2054 if (!parms->dim2d){
2055 /* Independent file access */
2056 if(!parms->collective) {
2057 /* Contiguous pattern */
2058 if (!parms->interleaved){
2059 /* Compute offset in file */
2060 mpi_offset = mpi_file_offset +
2061 nbytes_xfer;
2062
2063 /* Perform independent read */
2064 mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer,
2065 (int)(buf_size/blk_size), mpi_blk_type,
2066 &mpi_status);
2067 VRFY((mrc==MPI_SUCCESS), "MPIO_READ");
2068
2069 /* Advance global offset in dataset */
2070 nbytes_xfer+=buf_size;
2071 } /* end if */
2072 /* Interleaved access pattern */
2073 else {
2074 /* Set the base of user's buffer */
2075 buf_p=(unsigned char *)buffer;
2076
2077 /* Set the number of bytes to transfer this time */
2078 nbytes_toxfer = buf_size;
2079
2080 /* Loop over the buffers to read */
2081 while(nbytes_toxfer>0) {
2082 /* Skip offset over blocks of other processes */
2083 mpi_offset = mpi_file_offset +
2084 (nbytes_xfer*pio_mpi_nprocs_g);
2085
2086 /* Perform independent read */
2087 mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buf_p,
2088 (int)1, mpi_blk_type, &mpi_status);
2089 VRFY((mrc==MPI_SUCCESS), "MPIO_READ");
2090
2091 /* Advance location in buffer */
2092 buf_p+=blk_size;
2093
2094 /* Advance global offset in dataset */
2095 nbytes_xfer+=blk_size;
2096
2097 /* Decrement number of bytes left this time */
2098 nbytes_toxfer-=blk_size;
2099 } /* end while */
2100 } /* end else */
2101 } /* end if */
2102 /* Collective file access */
2103 else {
2104 /* Contiguous access pattern */
2105 if (!parms->interleaved){
2106 /* Compute offset in file */
2107 mpi_offset = mpi_file_offset +
2108 nbytes_xfer;
2109
2110 /* Perform collective read */
2111 mrc = MPI_File_read_at_all(fd->mpifd, mpi_offset, buffer,
2112 (int)(buf_size/blk_size), mpi_blk_type, &mpi_status);
2113 VRFY((mrc==MPI_SUCCESS), "MPIO_READ");
2114
2115 /* Advance global offset in dataset */
2116 nbytes_xfer+=buf_size;
2117 } /* end if */
2118 /* Interleaved access pattern */
2119 else {
2120 /* Compute offset in file */
2121 mpi_offset = mpi_file_offset +
2122 (nbytes_xfer*pio_mpi_nprocs_g);
2123
2124 /* Set the file view */
2125 mrc = MPI_File_set_view(fd->mpifd, mpi_offset, mpi_blk_type,
2126 mpi_file_type, (char*)"native", h5_io_info_g);
2127 VRFY((mrc==MPI_SUCCESS), "MPIO_VIEW");
2128
2129 /* Perform collective read */
2130 mrc = MPI_File_read_at_all(fd->mpifd, 0, buffer,
2131 (int)(buf_size/blk_size), mpi_blk_type, &mpi_status);
2132 VRFY((mrc==MPI_SUCCESS), "MPIO_READ");
2133
2134 /* Advance global offset in dataset */
2135 nbytes_xfer+=buf_size;
2136 } /* end else */
2137 } /* end else */
2138 } /* end if */
2139 /* 2D dataspace */
2140 else {
2141 /* Contiguous storage */
2142 if (!parms->h5_use_chunks) {
2143 /* Contiguous access pattern */
2144 if (!parms->interleaved) {
2145 /* Compute offset in file */
2146 mpi_offset=mpi_file_offset+((nbytes_xfer/blk_size)/snbytes)*
2147 (blk_size*snbytes)+((nbytes_xfer/blk_size)%snbytes);
2148
2149 /* Number of bytes to be transferred per I/O operation */
2150 nbytes_xfer_advance = buf_size;
2151
2152 /* Global offset advance after each I/O operation */
2153 mpi_offset_advance = snbytes;
2154
2155 /* MPI type to be used for collective access */
2156 mpi_collective_type = mpi_cont_type;
2157 } /* end if */
2158 /* Interleaved access pattern */
2159 else {
2160 /* Compute offset in file */
2161 mpi_offset=mpi_file_offset+(((nbytes_xfer/buf_size)*pio_mpi_nprocs_g)/snbytes)*
2162 (buf_size*snbytes)+((nbytes_xfer/buf_size)*pio_mpi_nprocs_g)%snbytes;
2163
2164 /* Number of bytes to be transferred per I/O operation */
2165 nbytes_xfer_advance = blk_size;
2166
2167 /* Global offset advance after each I/O operation */
2168 mpi_offset_advance = snbytes;
2169
2170 /* MPI type to be used for collective access */
2171 mpi_collective_type = mpi_inter_type;
2172 } /* end else */
2173 } /* end if */
2174 /* Chunked storage */
2175 else {
2176 /*Contiguous access pattern */
2177 if (!parms->interleaved) {
2178 /* Compute offset in file */
2179 mpi_offset=mpi_file_offset+nbytes_xfer;
2180
2181 /* Number of bytes to be transferred per I/O operation */
2182 nbytes_xfer_advance = blk_size * buf_size;
2183
2184 /* Global offset advance after each I/O operation */
2185 mpi_offset_advance = 0;
2186
2187 /* MPI type to be used for collective access */
2188 mpi_collective_type = mpi_full_buffer;
2189 } /* end if */
2190 /*Interleaved access pattern */
2191 else {
2192 /* Compute offset in file */
2193 /* Before simplification */
2194 /* mpi_offset=mpi_file_offset+(nbytes_xfer/(buf_size/blk_size)
2195 *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))*
2196 (buf_size/blk_size*snbytes/blk_size*(blk_size*blk_size))+
2197 ((nbytes_xfer/(buf_size/blk_size))*pio_mpi_nprocs_g)%(snbytes
2198 /blk_size*(blk_size*blk_size)); */
2199 mpi_offset=mpi_file_offset+((nbytes_xfer/(buf_size/blk_size)
2200 *pio_mpi_nprocs_g)/(snbytes*blk_size))*(buf_size*snbytes)
2201 +((nbytes_xfer/(buf_size/blk_size))*pio_mpi_nprocs_g)%(snbytes*blk_size);
2202
2203 /* Number of bytes to be transferred per I/O operation */
2204 nbytes_xfer_advance = blk_size * blk_size;
2205
2206 /* Global offset advance after each I/O operation */
2207 /* mpi_offset_advance = (MPI_Offset)(snbytes/blk_size*(blk_size*blk_size)); */
2208 mpi_offset_advance = (MPI_Offset)(snbytes*blk_size);
2209
2210 /* MPI type to be used for collective access */
2211 mpi_collective_type = mpi_chunk_inter_type;
2212 } /* end else */
2213 } /* end else */
2214
2215 /* Common code for independent file access */
2216 if (!parms->collective) {
2217 /* Set the base of user's buffer */
2218 buf_p = (unsigned char *)buffer;
2219
2220 /* Set the number of bytes to transfer this time */
2221 nbytes_toxfer = buf_size * blk_size;
2222
2223 /* Loop over portions of the buffer to read */
2224 while(nbytes_toxfer>0){
2225 /* Perform independent read */
2226 mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buf_p,
2227 (int)nbytes_xfer_advance, MPI_BYTE, &mpi_status);
2228 VRFY((mrc==MPI_SUCCESS), "MPIO_READ");
2229
2230 /* Advance location in buffer */
2231 buf_p+=nbytes_xfer_advance;
2232
2233 /* Advance global offset in dataset */
2234 nbytes_xfer+=nbytes_xfer_advance;
2235
2236 /* Decrement number of bytes left this time */
2237 nbytes_toxfer-=nbytes_xfer_advance;
2238
2239 /* Partially advance global offset in dataset */
2240 mpi_offset+=mpi_offset_advance;
2241 } /* end while */
2242 } /* end if */
2243
2244 /* Common code for collective file access */
2245 else {
2246 /* Set the file view */
2247 mrc = MPI_File_set_view(fd->mpifd, mpi_offset, MPI_BYTE,
2248 mpi_collective_type, (char *)"native", h5_io_info_g);
2249 VRFY((mrc==MPI_SUCCESS), "MPIO_VIEW");
2250
2251 /* Perform read */
2252 MPI_File_read_at_all(fd->mpifd, 0, buffer,(int)(buf_size*blk_size),
2253 MPI_BYTE, &mpi_status);
2254 VRFY((mrc==MPI_SUCCESS), "MPIO_READ");
2255
2256 /* Advance global offset in dataset */
2257 nbytes_xfer+=buf_size*blk_size;
2258 } /* end else */
2259
2260 } /* end else */
2261 break;
2262
2263 case PHDF5:
2264 /* 1D dataspace */
2265 if (!parms->dim2d){
2266 /* Set up the file dset space id to move the selection to process */
2267 if (!parms->interleaved){
2268 /* Contiguous pattern */
2269 h5offset[0] = nbytes_xfer;
2270 } /* end if */
2271 else {
2272 /* Interleaved access pattern */
2273 /* Skip offset over blocks of other processes */
2274 h5offset[0] = (nbytes_xfer*pio_mpi_nprocs_g);
2275 } /* end else */
2276 hrc = H5Soffset_simple(h5dset_space_id, h5offset);
2277 VRFY((hrc >= 0), "H5Soffset_simple");
2278
2279 /* Read the buffer in */
2280 hrc = H5Dread(h5ds_id, ELMT_H5_TYPE, h5mem_space_id,
2281 h5dset_space_id, h5dxpl, buffer);
2282 VRFY((hrc >= 0), "H5Dread");
2283
2284 /* Increment number of bytes transferred */
2285 nbytes_xfer += buf_size;
2286 } /* end if */
2287 /* 2D dataspace */
2288 else {
2289 /* Set up the file dset space id to move the selection to process */
2290 if (!parms->interleaved){
2291 /* Contiguous pattern */
2292 h5offset[0] = (nbytes_xfer/(snbytes*blk_size))*blk_size;
2293 h5offset[1] = (nbytes_xfer%(snbytes*blk_size))/blk_size;
2294 } /* end if */
2295 else {
2296 /* Interleaved access pattern */
2297 /* Skip offset over blocks of other processes */
2298 h5offset[0] = ((nbytes_xfer*pio_mpi_nprocs_g)/(snbytes*buf_size))*buf_size;
2299 h5offset[1] = ((nbytes_xfer*pio_mpi_nprocs_g)%(snbytes*buf_size))/buf_size;
2300
2301 } /* end else */
2302 hrc = H5Soffset_simple(h5dset_space_id, h5offset);
2303 VRFY((hrc >= 0), "H5Soffset_simple");
2304
2305 /* Write the buffer out */
2306 hrc = H5Dread(h5ds_id, ELMT_H5_TYPE, h5mem_space_id,
2307 h5dset_space_id, h5dxpl, buffer);
2308 VRFY((hrc >= 0), "H5Dread");
2309
2310 /* Increment number of bytes transferred */
2311 nbytes_xfer += buf_size*blk_size;
2312
2313 } /* end else */
2314 break;
2315 } /* switch (parms->io_type) */
2316
2317 /* Verify raw data, if asked */
2318 if (parms->verify) {
2319 /* Verify data read */
2320 unsigned char *ucharptr = (unsigned char *)buffer;
2321 size_t i;
2322 int nerror=0;
2323
2324 for (i = 0; i < bsize; ++i){
2325 if (*ucharptr++ != pio_mpi_rank_g+1) {
2326 if (++nerror < 20){
2327 /* report at most 20 errors */
2328 HDprint_rank(output);
2329 HDfprintf(output, "read data error, expected (%d), "
2330 "got (%d)\n",
2331 pio_mpi_rank_g+1,
2332 (int)*(ucharptr-1));
2333 } /* end if */
2334 } /* end if */
2335 } /* end for */
2336 if (nerror >= 20) {
2337 HDprint_rank(output);
2338 HDfprintf(output, "...");
2339 HDfprintf(output, "total read data errors=%d\n",
2340 nerror);
2341 } /* end if */
2342 } /* if (parms->verify) */
2343
2344 } /* end while */
2345
2346 /* Stop "raw data" read timer */
2347 set_time(res->timers, HDF5_RAW_READ_FIXED_DIMS, TSTOP);
2348
2349 /* Calculate read time */
2350
2351 /* Close dataset. Only HDF5 needs to do an explicit close. */
2352 if (parms->io_type == PHDF5) {
2353 hrc = H5Dclose(h5ds_id);
2354
2355 if (hrc < 0) {
2356 HDfprintf(stderr, "HDF5 Dataset Close failed\n");
2357 GOTOERROR(FAIL);
2358 }
2359
2360 h5ds_id = -1;
2361 } /* end if */
2362 } /* end for */
2363
2364 done:
2365 /* release MPI-I/O objects */
2366 if (parms->io_type == MPIO) {
2367 /* 1D dataspace */
2368 if (!parms->dim2d){
2369 /* Free file type */
2370 mrc = MPI_Type_free( &mpi_file_type );
2371 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
2372
2373 /* Free buffer type */
2374 mrc = MPI_Type_free( &mpi_blk_type );
2375 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
2376 } /* end if */
2377 /* 2D dataspace */
2378 else {
2379 /* Free partial buffer type for contiguous access */
2380 mrc = MPI_Type_free( &mpi_partial_buffer_cont );
2381 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
2382
2383 /* Free contiguous file type */
2384 mrc = MPI_Type_free( &mpi_cont_type );
2385 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
2386
2387 /* Free partial buffer type for interleaved access */
2388 mrc = MPI_Type_free( &mpi_partial_buffer_inter );
2389 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
2390
2391 /* Free interleaved file type */
2392 mrc = MPI_Type_free( &mpi_inter_type );
2393 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
2394
2395 /* Free full buffer type */
2396 mrc = MPI_Type_free(&mpi_full_buffer);
2397 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
2398
2399 /* Free full chunk type */
2400 mrc = MPI_Type_free(&mpi_full_chunk);
2401 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
2402
2403 /* Free chunk interleaved file type */
2404 mrc = MPI_Type_free(&mpi_chunk_inter_type);
2405 VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
2406 } /* end else */
2407 } /* end if */
2408
2409 /* release HDF5 objects */
2410 if (h5dset_space_id != -1) {
2411 hrc = H5Sclose(h5dset_space_id);
2412 if (hrc < 0){
2413 HDfprintf(stderr, "HDF5 Dataset Space Close failed\n");
2414 ret_code = FAIL;
2415 } else {
2416 h5dset_space_id = -1;
2417 }
2418 }
2419
2420 if (h5mem_space_id != -1) {
2421 hrc = H5Sclose(h5mem_space_id);
2422 if (hrc < 0) {
2423 HDfprintf(stderr, "HDF5 Memory Space Close failed\n");
2424 ret_code = FAIL;
2425 } else {
2426 h5mem_space_id = -1;
2427 }
2428 }
2429
2430 if (h5dxpl != -1) {
2431 hrc = H5Pclose(h5dxpl);
2432 if (hrc < 0) {
2433 HDfprintf(stderr, "HDF5 Dataset Transfer Property List Close failed\n");
2434 ret_code = FAIL;
2435 } else {
2436 h5dxpl = -1;
2437 }
2438 }
2439
2440 return ret_code;
2441 }
2442
2443 /*
2444 * Function: do_fopen
2445 * Purpose: Open the specified file.
2446 * Return: SUCCESS or FAIL
2447 * Programmer: Albert Cheng, Bill Wendling, 2001/12/13
2448 * Modifications:
2449 */
2450 static herr_t
do_fopen(parameters * param,char * fname,file_descr * fd,int flags)2451 do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags)
2452 {
2453 int ret_code = SUCCESS, mrc;
2454 hid_t acc_tpl = -1; /* file access templates */
2455
2456 switch (param->io_type) {
2457 case POSIXIO:
2458 if (flags & (PIO_CREATE | PIO_WRITE))
2459 fd->posixfd = POSIXCREATE(fname);
2460 else
2461 fd->posixfd = POSIXOPEN(fname, O_RDONLY);
2462
2463 if (fd->posixfd < 0 ) {
2464 HDfprintf(stderr, "POSIX File Open failed(%s)\n", fname);
2465 GOTOERROR(FAIL);
2466 }
2467
2468
2469 /* The perils of POSIX I/O in a parallel environment. The problem is:
2470 *
2471 * - Process n opens a file with truncation and then starts
2472 * writing to the file.
2473 * - Process m also opens the file with truncation, but after
2474 * process n has already started to write to the file. Thus,
2475 * all of the stuff process n wrote is now lost.
2476 */
2477 MPI_Barrier(pio_comm_g);
2478
2479 break;
2480
2481 case MPIO:
2482 if (flags & (PIO_CREATE | PIO_WRITE)) {
2483 MPI_File_delete(fname, h5_io_info_g);
2484 mrc = MPI_File_open(pio_comm_g, fname, MPI_MODE_CREATE | MPI_MODE_RDWR,
2485 h5_io_info_g, &fd->mpifd);
2486
2487 if (mrc != MPI_SUCCESS) {
2488 HDfprintf(stderr, "MPI File Open failed(%s)\n", fname);
2489 GOTOERROR(FAIL);
2490 }
2491
2492 /*since MPI_File_open with MPI_MODE_CREATE does not truncate */
2493 /*filesize , set size to 0 explicitedly. */
2494 mrc = MPI_File_set_size(fd->mpifd, (MPI_Offset)0);
2495 if (mrc != MPI_SUCCESS) {
2496 HDfprintf(stderr, "MPI_File_set_size failed\n");
2497 GOTOERROR(FAIL);
2498 }
2499 } else {
2500 mrc = MPI_File_open(pio_comm_g, fname, MPI_MODE_RDONLY, h5_io_info_g, &fd->mpifd);
2501 if (mrc != MPI_SUCCESS) {
2502 HDfprintf(stderr, "MPI File Open failed(%s)\n", fname);
2503 GOTOERROR(FAIL);
2504 }
2505 }
2506
2507 break;
2508
2509 case PHDF5:
2510 if ((acc_tpl = H5Pcreate(H5P_FILE_ACCESS)) < 0) {
2511 HDfprintf(stderr, "HDF5 Property List Create failed\n");
2512 GOTOERROR(FAIL);
2513 }
2514
2515 /* Set the file driver to the MPI-IO driver */
2516 if (H5Pset_fapl_mpio(acc_tpl, pio_comm_g, h5_io_info_g) < 0) {
2517 HDfprintf(stderr, "HDF5 Property List Set failed\n");
2518 GOTOERROR(FAIL);
2519 }
2520
2521 /* Set the alignment of objects in HDF5 file */
2522 if (H5Pset_alignment(acc_tpl, param->h5_thresh, param->h5_align) < 0) {
2523 HDfprintf(stderr, "HDF5 Property List Set failed\n");
2524 GOTOERROR(FAIL);
2525 }
2526
2527 /* create the parallel file */
2528 if (flags & (PIO_CREATE | PIO_WRITE))
2529 fd->h5fd = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, acc_tpl);
2530 else
2531 fd->h5fd = H5Fopen(fname, H5F_ACC_RDONLY, acc_tpl);
2532 if (fd->h5fd < 0) {
2533 HDfprintf(stderr, "HDF5 File Create failed(%s)\n", fname);
2534 GOTOERROR(FAIL);
2535 }
2536
2537 /* verifying the close of the acc_tpl */
2538 if (H5Pclose(acc_tpl) < 0) {
2539 HDfprintf(stderr, "HDF5 Property List Close failed\n");
2540 GOTOERROR(FAIL);
2541 }
2542
2543 break;
2544 }
2545
2546 done:
2547 return ret_code;
2548 }
2549
2550 /*
2551 * Function: do_fclose
2552 * Purpose: Close the specified file descriptor.
2553 * Return: SUCCESS or FAIL
2554 * Programmer: Albert Cheng, Bill Wendling, 2001/12/13
2555 * Modifications:
2556 */
2557 static herr_t
do_fclose(iotype iot,file_descr * fd)2558 do_fclose(iotype iot, file_descr *fd /*out*/)
2559 {
2560 herr_t ret_code = SUCCESS, hrc;
2561 int mrc = 0, rc = 0;
2562
2563 switch (iot) {
2564 case POSIXIO:
2565 rc = POSIXCLOSE(fd->posixfd);
2566
2567 if (rc != 0){
2568 HDfprintf(stderr, "POSIX File Close failed\n");
2569 GOTOERROR(FAIL);
2570 }
2571
2572 fd->posixfd = -1;
2573 break;
2574
2575 case MPIO:
2576 mrc = MPI_File_close(&fd->mpifd);
2577
2578 if (mrc != MPI_SUCCESS){
2579 HDfprintf(stderr, "MPI File close failed\n");
2580 GOTOERROR(FAIL);
2581 }
2582
2583 fd->mpifd = MPI_FILE_NULL;
2584 break;
2585
2586 case PHDF5:
2587 hrc = H5Fclose(fd->h5fd);
2588
2589 if (hrc < 0) {
2590 HDfprintf(stderr, "HDF5 File Close failed\n");
2591 GOTOERROR(FAIL);
2592 }
2593
2594 fd->h5fd = -1;
2595 break;
2596 }
2597
2598 done:
2599 return ret_code;
2600 }
2601
2602
2603 /*
2604 * Function: do_fclose
2605 * Purpose: Cleanup temporary file unless HDF5_NOCLEANUP is set.
2606 * Only Proc 0 of the PIO communicator will do the cleanup.
2607 * Other processes just return.
2608 * Return: void
2609 * Programmer: Albert Cheng 2001/12/12
2610 * Modifications:
2611 */
2612 static void
do_cleanupfile(iotype iot,char * fname)2613 do_cleanupfile(iotype iot, char *fname)
2614 {
2615 if (pio_mpi_rank_g != 0)
2616 return;
2617
2618 if (clean_file_g == -1)
2619 clean_file_g = (getenv("HDF5_NOCLEANUP")==NULL) ? 1 : 0;
2620
2621 if (clean_file_g){
2622 switch (iot){
2623 case POSIXIO:
2624 HDremove(fname);
2625 break;
2626 case MPIO:
2627 case PHDF5:
2628 MPI_File_delete(fname, h5_io_info_g);
2629 break;
2630 }
2631 }
2632 }
2633
2634 #ifdef TIME_MPI
2635 /* instrument the MPI_File_wrirte_xxx and read_xxx calls to measure
2636 * pure time spent in MPI_File code.
2637 */
MPI_File_read_at(MPI_File fh,MPI_Offset offset,void * buf,int count,MPI_Datatype datatype,MPI_Status * status)2638 int MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf,
2639 int count, MPI_Datatype datatype, MPI_Status *status)
2640 {
2641 int err;
2642 set_time(timer_g, HDF5_MPI_READ, TSTART);
2643 err=PMPI_File_read_at(fh, offset, buf, count, datatype, status);
2644 set_time(timer_g, HDF5_MPI_READ, TSTOP);
2645 return err;
2646 }
2647
2648
MPI_File_read_at_all(MPI_File fh,MPI_Offset offset,void * buf,int count,MPI_Datatype datatype,MPI_Status * status)2649 int MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf,
2650 int count, MPI_Datatype datatype, MPI_Status *status)
2651 {
2652 int err;
2653 set_time(timer_g, HDF5_MPI_READ, TSTART);
2654 err=PMPI_File_read_at_all(fh, offset, buf, count, datatype, status);
2655 set_time(timer_g, HDF5_MPI_READ, TSTOP);
2656 return err;
2657 }
2658
MPI_File_write_at(MPI_File fh,MPI_Offset offset,void * buf,int count,MPI_Datatype datatype,MPI_Status * status)2659 int MPI_File_write_at(MPI_File fh, MPI_Offset offset, void *buf,
2660 int count, MPI_Datatype datatype, MPI_Status *status)
2661 {
2662 int err;
2663 set_time(timer_g, HDF5_MPI_WRITE, TSTART);
2664 err=PMPI_File_write_at(fh, offset, buf, count, datatype, status);
2665 set_time(timer_g, HDF5_MPI_WRITE, TSTOP);
2666 return err;
2667 }
2668
MPI_File_write_at_all(MPI_File fh,MPI_Offset offset,void * buf,int count,MPI_Datatype datatype,MPI_Status * status)2669 int MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, void *buf,
2670 int count, MPI_Datatype datatype, MPI_Status *status)
2671 {
2672 int err;
2673 set_time(timer_g, HDF5_MPI_WRITE, TSTART);
2674 err=PMPI_File_write_at_all(fh, offset, buf, count, datatype, status);
2675 set_time(timer_g, HDF5_MPI_WRITE, TSTOP);
2676 return err;
2677 }
2678
2679 #endif /* TIME_MPI */
2680 #endif /* H5_HAVE_PARALLEL */
2681
2682
2683
2684
2685
2686