1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * Copyright by The HDF Group.                                               *
3  * Copyright by the Board of Trustees of the University of Illinois.         *
4  * All rights reserved.                                                      *
5  *                                                                           *
6  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
7  * terms governing use, modification, and redistribution, is contained in    *
8  * the COPYING file, which can be found at the root of the source code       *
9  * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
10  * If you do not have access to either file, you may request a copy from     *
11  * help@hdfgroup.org.                                                        *
12  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13 
14 /*
15  * Collective file open optimization tests
16  *
17  */
18 
19 #include "testpar.h"
20 #include "H5Dprivate.h"
21 
22 /* The collection of files is included below to aid
23  * an external "cleanup" process if required.
24  *
25  * Note that the code below relies on the ordering of this array
26  * since each set of three is used by the tests either to construct
27  * or to read and validate.
28  */
29 #define NFILENAME 3
30 const char *FILENAMES[NFILENAME + 1]={"reloc_t_pread_data_file",
31                                       "reloc_t_pread_group_0_file",
32                                       "reloc_t_pread_group_1_file",
33                                       NULL};
34 #define FILENAME_BUF_SIZE 1024
35 
36 #define COUNT 1000
37 
38 #define LIMIT_NPROC 6
39 
40 hbool_t pass = true;
41 static const char *random_hdf5_text =
42 "Now is the time for all first-time-users of HDF5 to read their \
43 manual or go thru the tutorials!\n\
44 While you\'re at it, now is also the time to read up on MPI-IO.";
45 
46 static const char *hitchhiker_quote =
47 "A common mistake that people make when trying to design something\n\
48 completely foolproof is to underestimate the ingenuity of complete\n\
49 fools.\n";
50 
51 static int generate_test_file(MPI_Comm comm, int mpi_rank, int group);
52 static int test_parallel_read(MPI_Comm comm, int mpi_rank, int mpi_size, int group);
53 
54 static char *test_argv0 = NULL;
55 
56 
57 /*-------------------------------------------------------------------------
58  * Function:    generate_test_file
59  *
60  * Purpose:     This function is called to produce an HDF5 data file
61  *              whose superblock is relocated to a power-of-2 boundary.
62  *
63  *              Since data will be read back and validated, we generate
64  *              data in a predictable manner rather than randomly.
65  *              For now, we simply use the global mpi_rank of the writing
66  *              process as a starting component for the data generation.
67  *              Subsequent writes are increments from the initial start
68  *              value.
69  *
70  *              In the overall scheme of running the test, we'll call
71  *              this function twice: first as a collection of all MPI
72  *              processes and then a second time with the processes split
73  *              more or less in half. Each sub group will operate
74  *              collectively on their assigned file.  This split into
75  *              subgroups validates that parallel groups can successfully
76  *              open and read data independantly from the other parallel
77  *              operations taking place.
78  *
79  * Return:      Success: 0
80  *
81  *              Failure: 1
82  *
83  * Programmer:  Richard Warren
84  *              10/1/17
85  *
86  * Modifications:
87  *
88  *-------------------------------------------------------------------------
89  */
90 static int
generate_test_file(MPI_Comm comm,int mpi_rank,int group_id)91 generate_test_file( MPI_Comm comm, int mpi_rank, int group_id )
92 {
93     int header = -1;
94     const char *fcn_name = "generate_test_file()";
95     const char *failure_mssg = NULL;
96     const char *group_filename = NULL;
97     char data_filename[FILENAME_BUF_SIZE];
98     int file_index = 0;
99     int group_size;
100     int group_rank;
101     int local_failure = 0;
102     int global_failures = 0;
103     hsize_t count = COUNT;
104     hsize_t i;
105     hsize_t offset;
106     hsize_t dims[1] = {0};
107     hid_t file_id   = -1;
108     hid_t memspace  = -1;
109     hid_t filespace = -1;
110 	hid_t fctmpl    = -1;
111     hid_t fapl_id   = -1;
112     hid_t dxpl_id   = -1;
113     hid_t dset_id   = -1;
114     hid_t dset_id_ch = -1;
115     hid_t dcpl_id = H5P_DEFAULT;
116     hsize_t chunk[1];
117     float nextValue;
118     float *data_slice = NULL;
119 
120     pass = true;
121 
122     HDassert(comm != MPI_COMM_NULL);
123 
124     if ( (MPI_Comm_rank(comm, &group_rank)) != MPI_SUCCESS) {
125         pass = FALSE;
126         failure_mssg = "generate_test_file: MPI_Comm_rank failed.\n";
127     }
128 
129     if ( (MPI_Comm_size(comm, &group_size)) != MPI_SUCCESS) {
130         pass = FALSE;
131         failure_mssg = "generate_test_file: MPI_Comm_size failed.\n";
132     }
133 
134     if ( mpi_rank == 0 ) {
135 
136         HDfprintf(stdout, "Constructing test files...");
137     }
138 
139     /* Setup the file names
140      * The test specfic filenames are stored as consecutive
141      * array entries in the global 'FILENAMES' array above.
142      * Here, we simply decide on the starting index for
143      * file construction.  The reading portion of the test
144      * will have a similar setup process...
145      */
146     if ( pass ) {
147         if ( comm == MPI_COMM_WORLD ) { /* Test 1 */
148             file_index = 0;
149         }
150         else if ( group_id == 0 ) {     /* Test 2 group 0 */
151             file_index = 1;
152         }
153         else {                          /* Test 2 group 1 */
154             file_index = 2;
155         }
156 
157         /* The 'group_filename' is just a temp variable and
158          * is used to call into the h5_fixname function. No
159          * need to worry that we reassign it for each file!
160          */
161         group_filename = FILENAMES[file_index];
162         HDassert( group_filename );
163 
164         /* Assign the 'data_filename' */
165         if ( h5_fixname(group_filename, H5P_DEFAULT, data_filename,
166                         sizeof(data_filename)) == NULL ) {
167             pass = FALSE;
168             failure_mssg = "h5_fixname(0) failed.\n";
169         }
170     }
171 
172     /* setup data to write */
173     if ( pass ) {
174         if ( (data_slice = (float *)HDmalloc(COUNT * sizeof(float))) == NULL ) {
175             pass = FALSE;
176             failure_mssg = "malloc of data_slice failed.\n";
177         }
178     }
179 
180     if ( pass ) {
181         nextValue = (float)(mpi_rank * COUNT);
182 
183         for(i=0; i<COUNT; i++) {
184             data_slice[i] = nextValue;
185             nextValue += 1;
186         }
187     }
188 
189 	/* Initialize a file creation template */
190 	if (pass) {
191 		if ((fctmpl = H5Pcreate(H5P_FILE_CREATE)) < 0) {
192 			pass = FALSE;
193 			failure_mssg = "H5Pcreate(H5P_FILE_CREATE) failed.\n";
194 		}
195 		else if (H5Pset_userblock(fctmpl, 512) != SUCCEED) {
196 			pass = FALSE;
197 			failure_mssg = "H5Pset_userblock(,size) failed.\n";
198 		}
199 	}
200     /* setup FAPL */
201     if ( pass ) {
202         if ( (fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0 ) {
203             pass = FALSE;
204             failure_mssg = "H5Pcreate(H5P_FILE_ACCESS) failed.\n";
205         }
206     }
207 
208     if ( pass ) {
209         if ( (H5Pset_fapl_mpio(fapl_id, comm, MPI_INFO_NULL)) < 0 ) {
210             pass = FALSE;
211             failure_mssg = "H5Pset_fapl_mpio() failed\n";
212         }
213     }
214 
215     /* create the data file */
216     if ( pass ) {
217         if ( (file_id = H5Fcreate(data_filename, H5F_ACC_TRUNC,
218                                   fctmpl, fapl_id)) < 0 ) {
219             pass = FALSE;
220             failure_mssg = "H5Fcreate() failed.\n";
221         }
222     }
223 
224     /* create and write the dataset */
225     if ( pass ) {
226         if ( (dxpl_id = H5Pcreate(H5P_DATASET_XFER)) < 0 ) {
227             pass = FALSE;
228             failure_mssg = "H5Pcreate(H5P_DATASET_XFER) failed.\n";
229         }
230     }
231 
232     if ( pass ) {
233         if ( (H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE)) < 0 ) {
234             pass = FALSE;
235             failure_mssg = "H5Pset_dxpl_mpio() failed.\n";
236         }
237     }
238 
239     if ( pass ) {
240         dims[0] = COUNT;
241         if ( (memspace = H5Screate_simple(1, dims, NULL)) < 0 ) {
242             pass = FALSE;
243             failure_mssg = "H5Screate_simple(1, dims, NULL) failed (1).\n";
244         }
245     }
246 
247     if ( pass ) {
248         dims[0] *= (hsize_t)group_size;
249         if ( (filespace = H5Screate_simple(1, dims, NULL)) < 0 ) {
250             pass = FALSE;
251             failure_mssg = "H5Screate_simple(1, dims, NULL) failed (2).\n";
252         }
253     }
254 
255     if ( pass ) {
256         offset = (hsize_t)group_rank * (hsize_t)COUNT;
257         if ( (H5Sselect_hyperslab(filespace, H5S_SELECT_SET, &offset,
258                                   NULL, &count, NULL)) < 0 ) {
259             pass = FALSE;
260             failure_mssg = "H5Sselect_hyperslab() failed.\n";
261         }
262     }
263 
264     if ( pass ) {
265         if ( (dset_id = H5Dcreate2(file_id, "dataset0", H5T_NATIVE_FLOAT,
266                                    filespace, H5P_DEFAULT, H5P_DEFAULT,
267                                    H5P_DEFAULT)) < 0 ) {
268             pass = false;
269             failure_mssg = "H5Dcreate2() failed.\n";
270         }
271     }
272 
273     if ( pass ) {
274         if ( (H5Dwrite(dset_id, H5T_NATIVE_FLOAT, memspace,
275                        filespace, dxpl_id, data_slice)) < 0 ) {
276             pass = false;
277             failure_mssg = "H5Dwrite() failed.\n";
278         }
279     }
280 
281 
282     /* create a chunked dataset */
283     chunk[0] = COUNT/8;
284 
285     if ( pass ) {
286       if ( (dcpl_id = H5Pcreate (H5P_DATASET_CREATE)) < 0 ) {
287         pass = false;
288           failure_mssg = "H5Pcreate() failed.\n";
289       }
290     }
291 
292     if ( pass ) {
293       if ( (H5Pset_chunk (dcpl_id, 1, chunk) ) < 0 ) {
294         pass = false;
295         failure_mssg = "H5Pset_chunk() failed.\n";
296       }
297     }
298 
299     if ( pass ) {
300 
301       if ( (dset_id_ch = H5Dcreate2(file_id, "dataset0_chunked", H5T_NATIVE_FLOAT,
302                                     filespace, H5P_DEFAULT, dcpl_id,
303                                     H5P_DEFAULT)) < 0 ) {
304         pass = false;
305         failure_mssg = "H5Dcreate2() failed.\n";
306       }
307     }
308 
309     if ( pass ) {
310       if ( (H5Dwrite(dset_id_ch, H5T_NATIVE_FLOAT, memspace,
311                      filespace, dxpl_id, data_slice)) < 0 ) {
312         pass = false;
313         failure_mssg = "H5Dwrite() failed.\n";
314       }
315     }
316     if ( pass || (dcpl_id != -1)) {
317       if ( H5Pclose(dcpl_id) < 0 ) {
318         pass = false;
319         failure_mssg = "H5Pclose(dcpl_id) failed.\n";
320       }
321     }
322 
323     if ( pass || (dset_id_ch != -1)) {
324       if ( H5Dclose(dset_id_ch) < 0 ) {
325         pass = false;
326         failure_mssg = "H5Dclose(dset_id_ch) failed.\n";
327       }
328     }
329 
330     /* close file, etc. */
331     if ( pass || (dset_id != -1)) {
332         if ( H5Dclose(dset_id) < 0 ) {
333             pass = false;
334             failure_mssg = "H5Dclose(dset_id) failed.\n";
335         }
336     }
337 
338     if ( pass || (memspace != -1) ) {
339         if ( H5Sclose(memspace) < 0 ) {
340             pass = false;
341             failure_mssg = "H5Sclose(memspace) failed.\n";
342         }
343     }
344 
345     if ( pass || (filespace != -1) ) {
346         if ( H5Sclose(filespace) < 0 ) {
347             pass = false;
348             failure_mssg = "H5Sclose(filespace) failed.\n";
349         }
350     }
351 
352     if ( pass || (file_id != -1) ) {
353         if ( H5Fclose(file_id) < 0 ) {
354             pass = false;
355             failure_mssg = "H5Fclose(file_id) failed.\n";
356         }
357     }
358 
359     if ( pass || (dxpl_id != -1) ) {
360         if ( H5Pclose(dxpl_id) < 0 ) {
361             pass = false;
362             failure_mssg = "H5Pclose(dxpl_id) failed.\n";
363         }
364     }
365 
366     if ( pass || (fapl_id != -1) ) {
367         if ( H5Pclose(fapl_id) < 0 ) {
368             pass = false;
369             failure_mssg = "H5Pclose(fapl_id) failed.\n";
370         }
371     }
372 
373     if (pass || (fctmpl != -1)) {
374         if (H5Pclose(fctmpl) < 0) {
375             pass = false;
376             failure_mssg = "H5Pclose(fctmpl) failed.\n";
377         }
378     }
379 
380     /* Add a userblock to the head of the datafile.
381      * We will use this to for a functional test of the
382      * file open optimization.  This is superblock
383      * relocation is done by the rank 0 process associated
384      * with the communicator being used.  For test 1, we
385      * utilize MPI_COMM_WORLD, so group_rank 0 is the
386      * same as mpi_rank 0.  For test 2 which utilizes
387      * two groups resulting from an MPI_Comm_split, we
388      * will have parallel groups and hence two
389      * group_rank(0) processes. Each parallel group
390      * will create a unique file with different text
391      * headers and different data.
392      */
393     if (group_rank == 0) {
394         const char *text_to_write;
395         size_t bytes_to_write;
396 
397         if (group_id == 0)
398             text_to_write = random_hdf5_text;
399         else
400             text_to_write = hitchhiker_quote;
401 
402         bytes_to_write = HDstrlen(text_to_write);
403 
404         if (pass) {
405 	    if ((header = HDopen(data_filename, O_WRONLY)) < 0) {
406                 pass = FALSE;
407                 failure_mssg = "HDopen(data_filename, O_WRONLY) failed.\n";
408             }
409         }
410 
411         if (pass) {
412             HDlseek(header, 0, SEEK_SET);
413             if (HDwrite(header, text_to_write, bytes_to_write) < 0) {
414                 pass = FALSE;
415                 failure_mssg = "Unable to write user text into file.\n";
416 		}
417         }
418 
419         if (pass || (header > 0)) {
420             if (HDclose(header) < 0) {
421                 pass = FALSE;
422                 failure_mssg = "HDclose() failed.\n";
423             }
424         }
425     }
426 
427     /* collect results from other processes.
428      * Only overwrite the failure message if no previous error
429      * has been detected
430      */
431     local_failure = ( pass ? 0 : 1 );
432 
433     /* This is a global all reduce (NOT group specific) */
434     if ( MPI_Allreduce(&local_failure, &global_failures, 1,
435                        MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS ) {
436         if ( pass ) {
437             pass = FALSE;
438             failure_mssg = "MPI_Allreduce() failed.\n";
439         }
440     } else if ( ( pass ) && ( global_failures > 0 ) ) {
441         pass = FALSE;
442         failure_mssg = "One or more processes report failure.\n";
443     }
444 
445     /* report results */
446     if ( mpi_rank == 0 ) {
447         if ( pass ) {
448             HDfprintf(stdout, "Done.\n");
449         } else {
450             HDfprintf(stdout, "FAILED.\n");
451             HDfprintf(stdout, "%s: failure_mssg = \"%s\"\n",
452                       fcn_name, failure_mssg);
453         }
454     }
455 
456     /* free data_slice if it has been allocated */
457     if ( data_slice != NULL ) {
458         HDfree(data_slice);
459         data_slice = NULL;
460     }
461 
462     return(! pass);
463 
464 } /* generate_test_file() */
465 
466 
467 /*-------------------------------------------------------------------------
468  * Function:    test_parallel_read
469  *
470  * Purpose:     This actually tests the superblock optimization
471  *              and covers the three primary cases we're interested in.
472  *              1). That HDF5 files can be opened in parallel by
473  *                  the rank 0 process and that the superblock
474  *                  offset is correctly broadcast to the other
475  *                  parallel file readers.
476  *              2). That a parallel application can correctly
477  *                  handle reading multiple files by using
478  *                  subgroups of MPI_COMM_WORLD and that each
479  *                  subgroup operates as described in (1) to
480  *                  collectively read the data.
481  *              3). Testing proc0-read-and-MPI_Bcast using
482  *                  sub-communicators, and reading into
483  *                  a memory space that is different from the
484  *                  file space, and chunked datasets.
485  *
486  *              The global MPI rank is used for reading and
487  *              writing data for process specific data in the
488  *              dataset.  We do this rather simplisticly, i.e.
489  *               rank 0:  writes/reads 0-9999
490  *               rank 1:  writes/reads 1000-1999
491  *               rank 2:  writes/reads 2000-2999
492  *               ...
493  *
494  * Return:      Success: 0
495  *
496  *              Failure: 1
497  *
498  * Programmer:  Richard Warren
499  *              10/1/17
500  *
501  * Modifications:
502  *
503  *-------------------------------------------------------------------------
504  */
505 static int
test_parallel_read(MPI_Comm comm,int mpi_rank,int mpi_size,int group_id)506 test_parallel_read(MPI_Comm comm, int mpi_rank, int mpi_size, int group_id)
507 {
508     const char *failure_mssg;
509     const char *fcn_name = "test_parallel_read()";
510     const char *group_filename = NULL;
511     char reloc_data_filename[FILENAME_BUF_SIZE];
512     int local_failure = 0;
513     int global_failures = 0;
514     int group_size;
515     int group_rank;
516     hid_t fapl_id   = -1;
517     hid_t file_id   = -1;
518     hid_t dset_id   = -1;
519     hid_t dset_id_ch = -1;
520     hid_t dxpl_id   = H5P_DEFAULT;
521     hid_t memspace  = -1;
522     hid_t filespace = -1;
523     hid_t filetype  = -1;
524     size_t filetype_size;
525     hssize_t dset_size;
526     hsize_t i;
527     hsize_t offset;
528     hsize_t count = COUNT;
529     hsize_t dims[1] = {0};
530     float nextValue;
531     float *data_slice = NULL;
532 
533     pass = TRUE;
534 
535     HDassert(comm != MPI_COMM_NULL);
536 
537     if ( (MPI_Comm_rank(comm, &group_rank)) != MPI_SUCCESS) {
538         pass = FALSE;
539         failure_mssg = "test_parallel_read: MPI_Comm_rank failed.\n";
540     }
541 
542     if ( (MPI_Comm_size(comm, &group_size)) != MPI_SUCCESS) {
543         pass = FALSE;
544         failure_mssg = "test_parallel_read: MPI_Comm_size failed.\n";
545     }
546 
547     if ( mpi_rank == 0 ) {
548         if ( comm == MPI_COMM_WORLD ) {
549             TESTING("parallel file open test 1");
550         }
551         else {
552             TESTING("parallel file open test 2");
553         }
554     }
555 
556     /* allocate space for the data_slice array */
557     if ( pass ) {
558         if ( (data_slice = (float *)HDmalloc(COUNT * sizeof(float))) == NULL ) {
559             pass = FALSE;
560             failure_mssg = "malloc of data_slice failed.\n";
561         }
562     }
563 
564 
565     /* Select the file file name to read
566      * Please see the comments in the 'generate_test_file' function
567      * for more details...
568      */
569     if ( pass ) {
570 
571         if ( comm == MPI_COMM_WORLD )       /* test 1 */
572             group_filename = FILENAMES[0];
573         else if ( group_id == 0 )           /* test 2 group 0 */
574             group_filename = FILENAMES[1];
575         else                                /* test 2 group 1 */
576             group_filename = FILENAMES[2];
577 
578         HDassert(group_filename);
579         if ( h5_fixname(group_filename, H5P_DEFAULT, reloc_data_filename,
580                         sizeof(reloc_data_filename)) == NULL ) {
581 
582             pass = FALSE;
583             failure_mssg = "h5_fixname(1) failed.\n";
584         }
585     }
586 
587     /* setup FAPL */
588     if ( pass ) {
589         if ( (fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0 ) {
590             pass = FALSE;
591             failure_mssg = "H5Pcreate(H5P_FILE_ACCESS) failed.\n";
592         }
593     }
594 
595     if ( pass ) {
596         if ( (H5Pset_fapl_mpio(fapl_id, comm, MPI_INFO_NULL)) < 0 ) {
597             pass = FALSE;
598             failure_mssg = "H5Pset_fapl_mpio() failed\n";
599         }
600     }
601 
602     /* open the file -- should have user block, exercising the optimization */
603     if ( pass ) {
604         if ( (file_id = H5Fopen(reloc_data_filename,
605                                 H5F_ACC_RDONLY, fapl_id)) < 0 ) {
606             pass = FALSE;
607             failure_mssg = "H5Fopen() failed\n";
608         }
609     }
610 
611     /* open the data set */
612     if ( pass ) {
613         if ( (dset_id = H5Dopen2(file_id, "dataset0", H5P_DEFAULT)) < 0 ) {
614             pass = FALSE;
615             failure_mssg = "H5Dopen2() failed\n";
616         }
617     }
618 
619     /* open the chunked data set */
620     if ( pass ) {
621       if ( (dset_id_ch = H5Dopen2(file_id, "dataset0_chunked", H5P_DEFAULT)) < 0 ) {
622         pass = FALSE;
623         failure_mssg = "H5Dopen2() failed\n";
624       }
625     }
626 
627     /* setup memspace */
628     if ( pass ) {
629         dims[0] = count;
630         if ( (memspace = H5Screate_simple(1, dims, NULL)) < 0 ) {
631             pass = FALSE;
632             failure_mssg = "H5Screate_simple(1, dims, NULL) failed\n";
633         }
634     }
635 
636     /* setup filespace */
637     if ( pass ) {
638         if ( (filespace = H5Dget_space(dset_id)) < 0 ) {
639             pass = FALSE;
640             failure_mssg = "H5Dget_space(dataset) failed\n";
641         }
642     }
643 
644     if ( pass ) {
645         offset = (hsize_t)group_rank * count;
646         if ( (H5Sselect_hyperslab(filespace, H5S_SELECT_SET,
647                                   &offset, NULL, &count, NULL)) < 0 ) {
648             pass = FALSE;
649             failure_mssg = "H5Sselect_hyperslab() failed\n";
650         }
651     }
652 
653     /* read this processes section of the data */
654     if ( pass ) {
655         if ( (H5Dread(dset_id, H5T_NATIVE_FLOAT, memspace,
656                       filespace, H5P_DEFAULT, data_slice)) < 0 ) {
657             pass = FALSE;
658             failure_mssg = "H5Dread() failed\n";
659         }
660     }
661 
662     /* verify the data */
663     if ( pass ) {
664         nextValue = (float)((hsize_t)mpi_rank * count);
665         i = 0;
666         while ( ( pass ) && ( i < count ) ) {
667             /* what we really want is data_slice[i] != nextValue --
668              * the following is a circumlocution to shut up the
669              * the compiler.
670              */
671             if ( ( data_slice[i] > nextValue ) ||
672                  ( data_slice[i] < nextValue ) ) {
673                 pass = FALSE;
674                 failure_mssg = "Unexpected dset contents.\n";
675             }
676             nextValue += 1;
677             i++;
678         }
679     }
680 
681     if ( pass || (memspace != -1) ) {
682         if ( H5Sclose(memspace) < 0 ) {
683             pass = false;
684             failure_mssg = "H5Sclose(memspace) failed.\n";
685         }
686     }
687 
688     if ( pass || (filespace != -1) ) {
689         if ( H5Sclose(filespace) < 0 ) {
690             pass = false;
691             failure_mssg = "H5Sclose(filespace) failed.\n";
692         }
693     }
694 
695     /* free data_slice if it has been allocated */
696     if ( data_slice != NULL ) {
697         HDfree(data_slice);
698         data_slice = NULL;
699     }
700 
701     /*
702      * Test reading proc0-read-and-bcast with sub-communicators
703      */
704 
705     /* Don't test with more than LIMIT_NPROC processes to avoid memory issues */
706 
707     if( group_size <= LIMIT_NPROC ) {
708 #ifdef H5_HAVE_INSTRUMENTED_LIBRARY
709       hbool_t prop_value;
710 #endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
711 
712       if ( (filespace = H5Dget_space(dset_id )) < 0 ) {
713         pass = FALSE;
714         failure_mssg = "H5Dget_space failed.\n";
715       }
716 
717       if ( (dset_size = H5Sget_simple_extent_npoints(filespace)) < 0 ) {
718         pass = FALSE;
719         failure_mssg = "H5Sget_simple_extent_npoints failed.\n";
720       }
721 
722       if ( (filetype = H5Dget_type(dset_id)) < 0 ) {
723         pass = FALSE;
724         failure_mssg = "H5Dget_type failed.\n";
725       }
726 
727       if ( (filetype_size = H5Tget_size(filetype)) == 0 ) {
728         pass = FALSE;
729         failure_mssg = "H5Tget_size failed.\n";
730       }
731 
732       if ( H5Tclose(filetype) < 0 ) {
733         pass = FALSE;
734         failure_mssg = "H5Tclose failed.\n";
735       };
736 
737       if ( (data_slice = (float *)HDmalloc((size_t)dset_size*filetype_size)) == NULL ) {
738         pass = FALSE;
739         failure_mssg = "malloc of data_slice failed.\n";
740       }
741 
742       if ( pass ) {
743         if ( (dxpl_id = H5Pcreate(H5P_DATASET_XFER)) < 0 ) {
744           pass = FALSE;
745           failure_mssg = "H5Pcreate(H5P_DATASET_XFER) failed.\n";
746         }
747       }
748 
749       if ( pass ) {
750         if ( (H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE)) < 0 ) {
751           pass = FALSE;
752           failure_mssg = "H5Pset_dxpl_mpio() failed.\n";
753         }
754       }
755 
756 #ifdef H5_HAVE_INSTRUMENTED_LIBRARY
757       if ( pass ) {
758         prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
759         if(H5Pinsert2(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, H5D_XFER_COLL_RANK0_BCAST_SIZE, &prop_value,
760                    NULL, NULL, NULL, NULL, NULL, NULL) < 0) {
761             pass = FALSE;
762             failure_mssg = "H5Pinsert2() failed\n";
763         }
764       }
765 #endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
766 
767       /* read H5S_ALL section */
768       if ( pass ) {
769         if ( (H5Dread(dset_id, H5T_NATIVE_FLOAT, H5S_ALL,
770                       H5S_ALL, dxpl_id, data_slice)) < 0 ) {
771           pass = FALSE;
772           failure_mssg = "H5Dread() failed\n";
773         }
774       }
775 
776 #ifdef H5_HAVE_INSTRUMENTED_LIBRARY
777       if ( pass ) {
778         prop_value = FALSE;
779         if(H5Pget(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
780             pass = FALSE;
781             failure_mssg = "H5Pget() failed\n";
782         }
783         if (pass) {
784           if(prop_value != TRUE) {
785             pass = FALSE;
786             failure_mssg = "rank 0 Bcast optimization was mistakenly not performed\n";
787           }
788         }
789       }
790 #endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
791 
792       /* verify the data */
793       if ( pass ) {
794 
795         if ( comm == MPI_COMM_WORLD )       /* test 1 */
796           nextValue = 0;
797         else if ( group_id == 0 )           /* test 2 group 0 */
798           nextValue = 0;
799         else                                /* test 2 group 1 */
800           nextValue = (float)((hsize_t)( mpi_size / 2 )*count);
801 
802         i = 0;
803         while ( ( pass ) && ( i < (hsize_t)dset_size ) ) {
804           /* what we really want is data_slice[i] != nextValue --
805            * the following is a circumlocution to shut up the
806            * the compiler.
807            */
808           if ( ( data_slice[i] > nextValue ) ||
809                ( data_slice[i] < nextValue ) ) {
810             pass = FALSE;
811             failure_mssg = "Unexpected dset contents.\n";
812           }
813           nextValue += 1;
814           i++;
815         }
816       }
817 
818       /* read H5S_ALL section for the chunked dataset */
819 
820 #ifdef H5_HAVE_INSTRUMENTED_LIBRARY
821       if ( pass ) {
822         prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
823         if(H5Pset(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
824             pass = FALSE;
825             failure_mssg = "H5Pset() failed\n";
826         }
827       }
828 #endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
829 
830       for ( i = 0; i < (hsize_t)dset_size; i++) {
831         data_slice[i] = 0;
832       }
833       if ( pass ) {
834         if ( (H5Dread(dset_id_ch, H5T_NATIVE_FLOAT, H5S_ALL,
835                       H5S_ALL, dxpl_id, data_slice)) < 0 ) {
836           pass = FALSE;
837           failure_mssg = "H5Dread() failed\n";
838         }
839       }
840 
841 #ifdef H5_HAVE_INSTRUMENTED_LIBRARY
842       if ( pass ) {
843         prop_value = FALSE;
844         if(H5Pget(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
845             pass = FALSE;
846             failure_mssg = "H5Pget() failed\n";
847         }
848         if (pass) {
849           if(prop_value == TRUE) {
850             pass = FALSE;
851             failure_mssg = "rank 0 Bcast optimization was mistakenly performed for chunked dataset\n";
852           }
853         }
854       }
855 #endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
856 
857       /* verify the data */
858       if ( pass ) {
859 
860         if ( comm == MPI_COMM_WORLD )       /* test 1 */
861           nextValue = 0;
862         else if ( group_id == 0 )           /* test 2 group 0 */
863           nextValue = 0;
864         else                                /* test 2 group 1 */
865           nextValue = (float)((hsize_t)( mpi_size / 2 )*count);
866 
867         i = 0;
868         while ( ( pass ) && ( i < (hsize_t)dset_size ) ) {
869           /* what we really want is data_slice[i] != nextValue --
870            * the following is a circumlocution to shut up the
871            * the compiler.
872            */
873           if ( ( data_slice[i] > nextValue ) ||
874                ( data_slice[i] < nextValue ) ) {
875             pass = FALSE;
876             failure_mssg = "Unexpected chunked dset contents.\n";
877           }
878           nextValue += 1;
879           i++;
880         }
881       }
882 
883       if ( pass || (filespace != -1) ) {
884         if ( H5Sclose(filespace) < 0 ) {
885           pass = false;
886           failure_mssg = "H5Sclose(filespace) failed.\n";
887         }
888       }
889 
890       /* free data_slice if it has been allocated */
891       if ( data_slice != NULL ) {
892         HDfree(data_slice);
893         data_slice = NULL;
894       }
895 
896       /*
897        * Read an H5S_ALL filespace into a hyperslab defined memory space
898        */
899 
900       if ( (data_slice = (float *)HDmalloc((size_t)(dset_size*2)*filetype_size)) == NULL ) {
901         pass = FALSE;
902         failure_mssg = "malloc of data_slice failed.\n";
903       }
904 
905       /* setup memspace */
906       if ( pass ) {
907         dims[0] = (hsize_t)dset_size*2;
908         if ( (memspace = H5Screate_simple(1, dims, NULL)) < 0 ) {
909           pass = FALSE;
910           failure_mssg = "H5Screate_simple(1, dims, NULL) failed\n";
911         }
912       }
913       if ( pass ) {
914         offset = (hsize_t)dset_size;
915         if ( (H5Sselect_hyperslab(memspace, H5S_SELECT_SET,
916                                   &offset, NULL, &offset, NULL)) < 0 ) {
917           pass = FALSE;
918           failure_mssg = "H5Sselect_hyperslab() failed\n";
919         }
920       }
921 
922 #ifdef H5_HAVE_INSTRUMENTED_LIBRARY
923       if ( pass ) {
924         prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
925         if(H5Pset(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
926             pass = FALSE;
927             failure_mssg = "H5Pset() failed\n";
928         }
929       }
930 #endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
931 
932       /* read this processes section of the data */
933       if ( pass ) {
934         if ( (H5Dread(dset_id, H5T_NATIVE_FLOAT, memspace,
935                       H5S_ALL, dxpl_id, data_slice)) < 0 ) {
936           pass = FALSE;
937           failure_mssg = "H5Dread() failed\n";
938         }
939       }
940 
941 #ifdef H5_HAVE_INSTRUMENTED_LIBRARY
942       if ( pass ) {
943         prop_value = FALSE;
944         if(H5Pget(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
945             pass = FALSE;
946             failure_mssg = "H5Pget() failed\n";
947         }
948         if (pass) {
949           if(prop_value != TRUE) {
950             pass = FALSE;
951             failure_mssg = "rank 0 Bcast optimization was mistakenly not performed\n";
952           }
953         }
954       }
955 #endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
956 
957       /* verify the data */
958       if ( pass ) {
959 
960         if ( comm == MPI_COMM_WORLD )       /* test 1 */
961           nextValue = 0;
962         else if ( group_id == 0 )           /* test 2 group 0 */
963           nextValue = 0;
964         else                                /* test 2 group 1 */
965           nextValue = (float)((hsize_t)(mpi_size / 2)*count);
966 
967         i = (hsize_t)dset_size;
968         while ( ( pass ) && ( i < (hsize_t)dset_size ) ) {
969           /* what we really want is data_slice[i] != nextValue --
970            * the following is a circumlocution to shut up the
971            * the compiler.
972            */
973           if ( ( data_slice[i] > nextValue ) ||
974                ( data_slice[i] < nextValue ) ) {
975             pass = FALSE;
976             failure_mssg = "Unexpected dset contents.\n";
977           }
978           nextValue += 1;
979           i++;
980         }
981       }
982 
983       if ( pass || (memspace != -1) ) {
984         if ( H5Sclose(memspace) < 0 ) {
985           pass = false;
986           failure_mssg = "H5Sclose(memspace) failed.\n";
987         }
988       }
989 
990       /* free data_slice if it has been allocated */
991       if ( data_slice != NULL ) {
992         HDfree(data_slice);
993         data_slice = NULL;
994       }
995 
996       if ( pass || (dxpl_id != -1) ) {
997         if ( H5Pclose(dxpl_id) < 0 ) {
998           pass = false;
999           failure_mssg = "H5Pclose(dxpl_id) failed.\n";
1000         }
1001       }
1002     }
1003 
1004     /* close file, etc. */
1005     if ( pass || (dset_id != -1) ) {
1006         if ( H5Dclose(dset_id) < 0 ) {
1007             pass = false;
1008             failure_mssg = "H5Dclose(dset_id) failed.\n";
1009         }
1010     }
1011 
1012     if ( pass || (dset_id_ch != -1) ) {
1013         if ( H5Dclose(dset_id_ch) < 0 ) {
1014             pass = false;
1015             failure_mssg = "H5Dclose(dset_id_ch) failed.\n";
1016         }
1017     }
1018 
1019     if ( pass || (file_id != -1) ) {
1020         if ( H5Fclose(file_id) < 0 ) {
1021             pass = false;
1022             failure_mssg = "H5Fclose(file_id) failed.\n";
1023         }
1024     }
1025 
1026     if ( pass || (fapl_id != -1) ) {
1027         if ( H5Pclose(fapl_id) < 0 ) {
1028             pass = false;
1029             failure_mssg = "H5Pclose(fapl_id) failed.\n";
1030         }
1031     }
1032 
1033     /* collect results from other processes.
1034      * Only overwrite the failure message if no previous error
1035      * has been detected
1036      */
1037     local_failure = ( pass ? 0 : 1 );
1038 
1039     if ( MPI_Allreduce( &local_failure, &global_failures, 1,
1040                         MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS ) {
1041         if ( pass ) {
1042             pass = FALSE;
1043             failure_mssg = "MPI_Allreduce() failed.\n";
1044         }
1045     } else if ( ( pass ) && ( global_failures > 0 ) ) {
1046         pass = FALSE;
1047         failure_mssg = "One or more processes report failure.\n";
1048     }
1049 
1050     /* report results and finish cleanup */
1051     if ( group_rank == 0 ) {
1052         if ( pass ) {
1053             PASSED();
1054         } else {
1055             H5_FAILED();
1056             HDfprintf(stdout, "%s: failure_mssg = \"%s\"\n",
1057                       fcn_name, failure_mssg);
1058         }
1059         HDremove(reloc_data_filename);
1060     }
1061 
1062     return( ! pass );
1063 
1064 } /* test_parallel_read() */
1065 
1066 
1067 /*-------------------------------------------------------------------------
1068  * Function:    main
1069  *
1070  * Purpose:     To implement a parallel test which validates whether the
1071  *              new superblock lookup functionality is working correctly.
1072  *
1073  *              The test consists of creating two seperate HDF datasets
1074  *              in which random text is inserted at the start of each
1075  *              file using the 'j5jam' application.  This forces the
1076  *              HDF5 file superblock to a non-zero offset.
1077  *              Having created the two independant files, we create two
1078  *              non-overlapping MPI groups, each of which is then tasked
1079  *              with the opening and validation of the data contained
1080  *              therein.
1081  *
1082  * Return:      Success: 0
1083  *              Failure: 1
1084  *
1085  * Programmer:  Richard Warren
1086  *              10/1/17
1087  *-------------------------------------------------------------------------
1088  */
1089 
1090 int
main(int argc,char ** argv)1091 main( int argc, char **argv)
1092 {
1093     int nerrs = 0;
1094     int which_group = 0;
1095     int mpi_rank;
1096     int mpi_size;
1097     int split_size;
1098     MPI_Comm group_comm = MPI_COMM_NULL;
1099 
1100     /* I don't believe that argv[0] can ever be NULL.
1101      * It should thus be safe to dup and save as a check
1102      * for cmake testing. Note that in our Cmake builds,
1103      * all executables are located in the same directory.
1104      * We assume (but we'll check) that the h5jam utility
1105      * is in the directory as this executable.  If that
1106      * isn't true, then we can use a relative path that
1107      * should be valid for the autotools environment.
1108      */
1109     test_argv0 = HDstrdup(argv[0]);
1110 
1111     if ( (MPI_Init(&argc, &argv)) != MPI_SUCCESS) {
1112        HDfprintf(stderr, "FATAL: Unable to initialize MPI\n");
1113        HDexit(EXIT_FAILURE);
1114     }
1115 
1116     if ( (MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank)) != MPI_SUCCESS) {
1117         HDfprintf(stderr, "FATAL: MPI_Comm_rank returned an error\n");
1118         HDexit(EXIT_FAILURE);
1119     }
1120 
1121     if ( (MPI_Comm_size(MPI_COMM_WORLD, &mpi_size)) != MPI_SUCCESS) {
1122         HDfprintf(stderr, "FATAL: MPI_Comm_size returned an error\n");
1123         HDexit(EXIT_FAILURE);
1124     }
1125 
1126     H5open();
1127 
1128     if ( mpi_rank == 0 ) {
1129         HDfprintf(stdout, "========================================\n");
1130         HDfprintf(stdout, "Collective file open optimization tests\n");
1131         HDfprintf(stdout, "        mpi_size     = %d\n", mpi_size);
1132         HDfprintf(stdout, "========================================\n");
1133     }
1134 
1135     if ( mpi_size < 3 ) {
1136 
1137         if ( mpi_rank == 0 ) {
1138 
1139             HDprintf("    Need at least 3 processes.  Exiting.\n");
1140         }
1141         goto finish;
1142     }
1143 
1144     /* ------  Create two (2) MPI groups  ------
1145      *
1146      * We split MPI_COMM_WORLD into 2 more or less equal sized
1147      * groups.  The resulting communicators will be used to generate
1148      * two HDF files which in turn will be opened in parallel and the
1149      * contents verified in the second read test below.
1150      */
1151     split_size = mpi_size / 2;
1152     which_group = (mpi_rank < split_size ? 0 : 1);
1153 
1154     if ( (MPI_Comm_split(MPI_COMM_WORLD,
1155                          which_group,
1156                          0,
1157                          &group_comm)) != MPI_SUCCESS) {
1158 
1159         HDfprintf(stderr, "FATAL: MPI_Comm_split returned an error\n");
1160         HDexit(EXIT_FAILURE);
1161     }
1162 
1163     /* ------  Generate all files ------ */
1164 
1165     /* We generate the file used for test 1 */
1166     nerrs += generate_test_file( MPI_COMM_WORLD, mpi_rank, which_group );
1167 
1168     if ( nerrs > 0 ) {
1169         if ( mpi_rank == 0 ) {
1170             HDprintf("    Test(1) file construction failed -- skipping tests.\n");
1171         }
1172         goto finish;
1173     }
1174 
1175     /* We generate the file used for test 2 */
1176     nerrs += generate_test_file( group_comm, mpi_rank, which_group );
1177 
1178     if ( nerrs > 0 ) {
1179         if ( mpi_rank == 0 ) {
1180             HDprintf("    Test(2) file construction failed -- skipping tests.\n");
1181         }
1182         goto finish;
1183     }
1184 
1185     /* Now read the generated test file (stil using MPI_COMM_WORLD) */
1186     nerrs += test_parallel_read( MPI_COMM_WORLD, mpi_rank, mpi_size, which_group);
1187 
1188     if ( nerrs > 0 ) {
1189         if ( mpi_rank == 0 ) {
1190             HDprintf("    Parallel read test(1) failed -- skipping tests.\n");
1191         }
1192         goto finish;
1193     }
1194 
1195     /* Update the user on our progress so far. */
1196     if ( mpi_rank == 0 ) {
1197         HDprintf("    Test 1 of 2 succeeded\n");
1198         HDprintf("    -- Starting multi-group parallel read test.\n");
1199     }
1200 
1201     /* run the 2nd set of tests */
1202     nerrs += test_parallel_read(group_comm, mpi_rank, mpi_size, which_group);
1203 
1204     if ( nerrs > 0 ) {
1205         if ( mpi_rank == 0 ) {
1206             HDprintf("    Multi-group read test(2) failed\n");
1207         }
1208         goto finish;
1209     }
1210 
1211     if ( mpi_rank == 0 ) {
1212         HDprintf("    Test 2 of 2 succeeded\n");
1213     }
1214 
1215 finish:
1216 
1217     if ((group_comm != MPI_COMM_NULL) &&
1218         (MPI_Comm_free(&group_comm)) != MPI_SUCCESS) {
1219         HDfprintf(stderr, "MPI_Comm_free failed!\n");
1220     }
1221 
1222     /* make sure all processes are finished before final report, cleanup
1223      * and exit.
1224      */
1225     MPI_Barrier(MPI_COMM_WORLD);
1226 
1227     if ( mpi_rank == 0 ) {           /* only process 0 reports */
1228         const char *header = "Collective file open optimization tests";
1229 
1230         HDfprintf(stdout, "===================================\n");
1231         if ( nerrs > 0 ) {
1232             HDfprintf(stdout, "***%s detected %d failures***\n", header, nerrs);
1233         }
1234         else {
1235             HDfprintf(stdout, "%s finished with no failures\n", header);
1236         }
1237         HDfprintf(stdout, "===================================\n");
1238     }
1239 
1240     /* close HDF5 library */
1241     if (H5close() != SUCCEED) {
1242         HDfprintf(stdout, "H5close() failed. (Ignoring)\n");
1243     }
1244 
1245     /* MPI_Finalize must be called AFTER H5close which may use MPI calls */
1246     MPI_Finalize();
1247 
1248     /* cannot just return (nerrs) because exit code is limited to 1byte */
1249     return((nerrs > 0) ? EXIT_FAILURE : EXIT_SUCCESS );
1250 
1251 } /* main() */
1252