1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * Copyright by The HDF Group.                                               *
3  * Copyright by the Board of Trustees of the University of Illinois.         *
4  * All rights reserved.                                                      *
5  *                                                                           *
6  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
7  * terms governing use, modification, and redistribution, is contained in    *
8  * the COPYING file, which can be found at the root of the source code       *
9  * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
10  * If you do not have access to either file, you may request a copy from     *
11  * help@hdfgroup.org.                                                        *
12  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13 
14 /*
15  * Author: Albert Cheng of NCSA, May 1, 2001.
16  * This is derived from code given to me by Robert Ross.
17  *
18  * NOTE: This code assumes that all command line arguments make it out to all
19  * the processes that make up the parallel job, which isn't always the case.
20  * So if it doesn't work on some platform, that might be why.
21  */
22 
23 #include "hdf5.h"
24 #include "H5private.h"
25 #include "h5test.h"
26 
27 #ifdef H5_HAVE_PARALLEL
28 
29 #ifdef H5_STDC_HEADERS
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #endif
36 
37 #ifdef H5_HAVE_UNISTD_H
38 #include <sys/types.h>
39 #include <unistd.h>
40 #endif
41 
42 #ifdef H5_HAVE_SYS_STAT_H
43 #include <sys/stat.h>
44 #endif
45 
46 #if defined(H5_TIME_WITH_SYS_TIME)
47 #   include <sys/time.h>
48 #   include <time.h>
49 #elif defined(H5_HAVE_SYS_TIME_H)
50 #   include <sys/time.h>
51 #else
52 #   include <time.h>
53 #endif
54 
55 #include <mpi.h>
56 #ifndef MPI_FILE_NULL           /*MPIO may be defined in mpi.h already       */
57 #   include <mpio.h>
58 #endif
59 
60 /* Macro definitions */
61 /* Verify:
62  * if val is false (0), print mesg and if fatal is true (non-zero), die.
63  */
64 #define H5FATAL 1
65 #define VRFY(val, mesg, fatal) do {                                            \
66     if (!val) {                                                                \
67         printf("Proc %d: ", mynod);                 \
68         printf("*** Assertion failed (%s) at line %4d in %s\n",                \
69         mesg, (int)__LINE__, __FILE__);                  \
70         if (fatal){                     \
71             fflush(stdout);                   \
72             goto die_jar_jar_die;                 \
73         }                       \
74     }                                                                          \
75 } while(0)
76 #define RANK 1
77 #define MAX_PATH 1024
78 
79 hsize_t dims[RANK];     /* dataset dim sizes */
80 hsize_t block[RANK], stride[RANK], count[RANK];
81 hsize_t start[RANK];
82 hid_t fid;                  /* HDF5 file ID */
83 hid_t acc_tpl;    /* File access templates */
84 hid_t sid;       /* Dataspace ID */
85 hid_t file_dataspace;  /* File dataspace ID */
86 hid_t mem_dataspace;  /* memory dataspace ID */
87 hid_t dataset;    /* Dataset ID */
88 hsize_t opt_alignment  = 1;
89 hsize_t opt_threshold  = 1;
90 int  opt_split_vfd  = 0;
91 char  *meta_ext, *raw_ext;  /* holds the meta and raw file extension if */
92         /* opt_split_vfd is set */
93 
94 
95 /* DEFAULT VALUES FOR OPTIONS */
96 int64_t opt_block     = 1048576*16;
97 int     opt_iter      = 1;
98 int     opt_stripe    = -1;
99 int     opt_correct   = 0;
100 int     amode         = O_RDWR | O_CREAT;
101 char    opt_file[256] = "perftest.out";
102 char    opt_pvfstab[256] = "notset";
103 int     opt_pvfstab_set = 0;
104 
105 const char *FILENAME[] = {
106    opt_file,
107    NULL
108 };
109 
110 /* function prototypes */
111 static int parse_args(int argc, char **argv);
112 
113 extern int errno;
114 
115 /* globals needed for getopt */
116 extern char *optarg;
117 
main(int argc,char ** argv)118 int main(int argc, char **argv)
119 {
120     char *buf, *tmp, *buf2, *tmp2, *check;
121     int i, j, mynod=0, nprocs=1, err, my_correct = 1, correct, myerrno;
122     double stim, etim;
123     double write_tim = 0;
124     double read_tim = 0;
125     double read_bw, write_bw;
126     double max_read_tim, max_write_tim;
127     double min_read_tim, min_write_tim;
128     double ave_read_tim, ave_write_tim;
129     int64_t iter_jump = 0;
130     int64_t seek_position = 0;
131     MPI_File fh;
132     MPI_Status status;
133     int nchars;
134     char filename[MAX_PATH];
135     herr_t ret;           /* Generic return value */
136 
137     /* startup MPI and determine the rank of this process */
138     MPI_Init(&argc,&argv);
139     MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
140     MPI_Comm_rank(MPI_COMM_WORLD, &mynod);
141 
142     /* parse the command line arguments */
143     parse_args(argc, argv);
144 
145     if (mynod == 0) printf("# Using hdf5-io calls.\n");
146 
147     /* kindof a weird hack- if the location of the pvfstab file was
148      * specified on the command line, then spit out this location into
149      * the appropriate environment variable: */
150 
151 #if H5_HAVE_SETENV
152 /* no setenv or unsetenv */
153     if (opt_pvfstab_set) {
154             if((setenv("PVFSTAB_FILE", opt_pvfstab, 1)) < 0){
155                     perror("setenv");
156                     goto die_jar_jar_die;
157             }
158     }
159 #endif
160 
161     /* this is how much of the file data is covered on each iteration of
162      * the test.  used to help determine the seek offset on each
163      * iteration */
164     iter_jump = nprocs * opt_block;
165 
166     /* setup a buffer of data to write */
167     if (!(tmp = (char *) malloc(opt_block + 256))) {
168             perror("malloc");
169             goto die_jar_jar_die;
170     }
171     buf = tmp + 128 - (((long)tmp) % 128);  /* align buffer */
172 
173     if (opt_correct) {
174             /* do the same buffer setup for verifiable data */
175             if (!(tmp2 = (char *) malloc(opt_block + 256))) {
176                     perror("malloc2");
177                     goto die_jar_jar_die;
178              }
179             buf2 = tmp + 128 - (((long)tmp) % 128);
180     }
181 
182     /* setup file access template with parallel IO access. */
183     if (opt_split_vfd){
184     hid_t mpio_pl;
185 
186     mpio_pl = H5Pcreate (H5P_FILE_ACCESS);
187     VRFY((acc_tpl >= 0), "", H5FATAL);
188     ret = H5Pset_fapl_mpio(mpio_pl, MPI_COMM_WORLD, MPI_INFO_NULL);
189     VRFY((ret >= 0), "", H5FATAL);
190 
191     /* set optional allocation alignment */
192     if (opt_alignment*opt_threshold != 1){
193         ret = H5Pset_alignment(acc_tpl, opt_threshold, opt_alignment );
194         VRFY((ret >= 0), "H5Pset_alignment succeeded", !H5FATAL);
195     }
196 
197     /* setup file access template */
198     acc_tpl = H5Pcreate (H5P_FILE_ACCESS);
199     VRFY((acc_tpl >= 0), "", H5FATAL);
200     ret = H5Pset_fapl_split(acc_tpl, meta_ext, mpio_pl, raw_ext, mpio_pl);
201     VRFY((ret >= 0), "H5Pset_fapl_split succeeded", H5FATAL);
202     ret = H5Pclose(mpio_pl);
203     VRFY((ret >= 0), "H5Pclose mpio_pl succeeded", H5FATAL);
204     }else{
205     /* setup file access template */
206     acc_tpl = H5Pcreate (H5P_FILE_ACCESS);
207     VRFY((acc_tpl >= 0), "", H5FATAL);
208     ret = H5Pset_fapl_mpio(acc_tpl, MPI_COMM_WORLD, MPI_INFO_NULL);
209     VRFY((ret >= 0), "", H5FATAL);
210 
211     /* set optional allocation alignment */
212     if (opt_alignment*opt_threshold != 1){
213         ret = H5Pset_alignment(acc_tpl, opt_threshold, opt_alignment );
214         VRFY((ret >= 0), "H5Pset_alignment succeeded", !H5FATAL);
215     }
216     }
217 
218     h5_fixname_no_suffix(FILENAME[0], acc_tpl, filename, sizeof filename);
219 
220     /* create the parallel file */
221     fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, acc_tpl);
222     VRFY((fid >= 0), "H5Fcreate succeeded", H5FATAL);
223 
224     /* define a contiquous dataset of opt_iter*nprocs*opt_block chars */
225     dims[0] = opt_iter * nprocs * opt_block;
226     sid = H5Screate_simple(RANK, dims, NULL);
227     VRFY((sid >= 0), "H5Screate_simple succeeded", H5FATAL);
228     dataset = H5Dcreate2(fid, "Dataset1", H5T_NATIVE_CHAR, sid,
229             H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
230     VRFY((dataset >= 0), "H5Dcreate2 succeeded", H5FATAL);
231 
232     /* create the memory dataspace and the file dataspace */
233     dims[0] = opt_block;
234     mem_dataspace = H5Screate_simple(RANK, dims, NULL);
235     VRFY((mem_dataspace >= 0), "", H5FATAL);
236     file_dataspace = H5Dget_space(dataset);
237     VRFY((file_dataspace >= 0), "H5Dget_space succeeded", H5FATAL);
238 
239     /* now each process writes a block of opt_block chars in round robbin
240      * fashion until the whole dataset is covered.
241      */
242     for(j=0; j < opt_iter; j++) {
243         /* setup a file dataspace selection */
244         start[0] = (hsize_t)((j * iter_jump) + (mynod * opt_block));
245         stride[0] = block[0] = opt_block;
246         count[0]= 1;
247         ret=H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
248         VRFY((ret >= 0), "H5Sset_hyperslab succeeded", H5FATAL);
249 
250             if (opt_correct) /* fill in buffer for iteration */ {
251                     for (i=mynod+j, check=buf; i<opt_block; i++,check++) *check=(char)i;
252             }
253 
254             /* discover the starting time of the operation */
255        MPI_Barrier(MPI_COMM_WORLD);
256        stim = MPI_Wtime();
257 
258         /* write data */
259         ret = H5Dwrite(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace,
260                 H5P_DEFAULT, buf);
261         VRFY((ret >= 0), "H5Dwrite dataset1 succeeded", !H5FATAL);
262 
263             /* discover the ending time of the operation */
264        etim = MPI_Wtime();
265 
266        write_tim += (etim - stim);
267 
268             /* we are done with this "write" iteration */
269     }
270 
271     /* close dataset and file */
272     ret=H5Dclose(dataset);
273     VRFY((ret >= 0), "H5Dclose succeeded", H5FATAL);
274     ret=H5Fclose(fid);
275     VRFY((ret >= 0), "H5Fclose succeeded", H5FATAL);
276 
277 
278 
279     /* wait for everyone to synchronize at this point */
280     MPI_Barrier(MPI_COMM_WORLD);
281 
282     /* reopen the file for reading */
283     fid=H5Fopen(filename,H5F_ACC_RDONLY,acc_tpl);
284     VRFY((fid >= 0), "", H5FATAL);
285 
286     /* open the dataset */
287     dataset = H5Dopen2(fid, "Dataset1", H5P_DEFAULT);
288     VRFY((dataset >= 0), "H5Dopen succeeded", H5FATAL);
289 
290     /* we can re-use the same mem_dataspace and file_dataspace
291      * the H5Dwrite used since the dimension size is the same.
292      */
293 
294     /* we are going to repeat the read the same pattern the write used */
295     for (j=0; j < opt_iter; j++) {
296         /* setup a file dataspace selection */
297         start[0] = (hsize_t)((j * iter_jump) + (mynod * opt_block));
298         stride[0] = block[0] = opt_block;
299         count[0]= 1;
300         ret=H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
301         VRFY((ret >= 0), "H5Sset_hyperslab succeeded", H5FATAL);
302             /* seek to the appropriate spot give the current iteration and
303              * rank within the MPI processes */
304 
305             /* discover the start time */
306        MPI_Barrier(MPI_COMM_WORLD);
307        stim = MPI_Wtime();
308 
309         /* read in the file data */
310         if (!opt_correct){
311             ret = H5Dread(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, H5P_DEFAULT, buf);
312         }
313         else{
314             ret = H5Dread(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, H5P_DEFAULT, buf2);
315         }
316         myerrno = errno;
317 
318         /* discover the end time */
319        etim = MPI_Wtime();
320        read_tim += (etim - stim);
321         VRFY((ret >= 0), "H5Dwrite dataset1 succeeded", !H5FATAL);
322 
323 
324        if (ret < 0) HDfprintf(stderr, "node %d, read error, loc = %Ld: %s\n",
325                     mynod, mynod*opt_block, strerror(myerrno));
326 
327         /* if the user wanted to check correctness, compare the write
328          * buffer to the read buffer */
329         if (opt_correct && memcmp(buf, buf2, opt_block)) {
330                 HDfprintf(stderr, "node %d, correctness test failed\n", mynod);
331                 my_correct = 0;
332                 MPI_Allreduce(&my_correct, &correct, 1, MPI_INT, MPI_MIN,
333                         MPI_COMM_WORLD);
334         }
335 
336         /* we are done with this read iteration */
337     }
338 
339     /* close dataset and file */
340     ret=H5Dclose(dataset);
341     VRFY((ret >= 0), "H5Dclose succeeded", H5FATAL);
342     ret=H5Fclose(fid);
343     VRFY((ret >= 0), "H5Fclose succeeded", H5FATAL);
344     ret=H5Pclose(acc_tpl);
345     VRFY((ret >= 0), "H5Pclose succeeded", H5FATAL);
346 
347     /* compute the read and write times */
348     MPI_Allreduce(&read_tim, &max_read_tim, 1, MPI_DOUBLE, MPI_MAX,
349             MPI_COMM_WORLD);
350     MPI_Allreduce(&read_tim, &min_read_tim, 1, MPI_DOUBLE, MPI_MIN,
351             MPI_COMM_WORLD);
352     MPI_Allreduce(&read_tim, &ave_read_tim, 1, MPI_DOUBLE, MPI_SUM,
353             MPI_COMM_WORLD);
354 
355     /* calculate the average from the sum */
356     ave_read_tim = ave_read_tim / nprocs;
357 
358     MPI_Allreduce(&write_tim, &max_write_tim, 1, MPI_DOUBLE, MPI_MAX,
359             MPI_COMM_WORLD);
360     MPI_Allreduce(&write_tim, &min_write_tim, 1, MPI_DOUBLE, MPI_MIN,
361             MPI_COMM_WORLD);
362     MPI_Allreduce(&write_tim, &ave_write_tim, 1, MPI_DOUBLE, MPI_SUM,
363             MPI_COMM_WORLD);
364 
365     /* calculate the average from the sum */
366     ave_write_tim = ave_write_tim / nprocs;
367 
368     /* print out the results on one node */
369     if (mynod == 0) {
370        read_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_read_tim*1000000.0);
371        write_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_write_tim*1000000.0);
372 
373                     printf("nr_procs = %d, nr_iter = %d, blk_sz = %ld\n", nprocs,
374             opt_iter, (long)opt_block);
375 
376                     printf("# total_size = %ld\n", (long)(opt_block*nprocs*opt_iter));
377 
378                     printf("# Write:  min_time = %f, max_time = %f, mean_time = %f\n",
379                             min_write_tim, max_write_tim, ave_write_tim);
380                     printf("# Read:  min_time = %f, max_time = %f, mean_time = %f\n",
381                             min_read_tim, max_read_tim, ave_read_tim);
382 
383        printf("Write bandwidth = %f Mbytes/sec\n", write_bw);
384        printf("Read bandwidth = %f Mbytes/sec\n", read_bw);
385 
386             if (opt_correct) {
387                     printf("Correctness test %s.\n", correct ? "passed" : "failed");
388             }
389     }
390 
391 
392 die_jar_jar_die:
393 
394 #if H5_HAVE_SETENV
395 /* no setenv or unsetenv */
396     /* clear the environment variable if it was set earlier */
397     if  (opt_pvfstab_set){
398             unsetenv("PVFSTAB_FILE");
399     }
400 #endif
401 
402     free(tmp);
403     if (opt_correct) free(tmp2);
404 
405     MPI_Finalize();
406 
407     return(0);
408 }
409 
410 static int
parse_args(int argc,char ** argv)411 parse_args(int argc, char **argv)
412 {
413     int c;
414 
415     while ((c = getopt(argc, argv, "s:b:i:f:p:a:2:c")) != EOF) {
416         switch (c) {
417             case 's': /* stripe */
418                 opt_stripe = atoi(optarg);
419                 break;
420             case 'b': /* block size */
421                 opt_block = atoi(optarg);
422                 break;
423             case 'i': /* iterations */
424                 opt_iter = atoi(optarg);
425                 break;
426             case 'f': /* filename */
427                 strncpy(opt_file, optarg, 255);
428                 FILENAME[0] = opt_file;
429                 break;
430             case 'p': /* pvfstab file */
431                 strncpy(opt_pvfstab, optarg, 255);
432                 opt_pvfstab_set = 1;
433                 break;
434             case 'a': /* aligned allocation.
435                        * syntax: -a<alignment>/<threshold>
436                        * e.g., -a4096/512  allocate at 4096 bytes
437                        * boundary if request size >= 512.
438                        */
439                 {
440                     char *p;
441 
442                     opt_alignment = HDatoi(optarg);
443                     if(NULL != (p = (char*)HDstrchr(optarg, '/')))
444                         opt_threshold = HDatoi(p + 1);
445                 }
446                 HDfprintf(stdout,
447                     "alignment/threshold=%Hu/%Hu\n",
448                      opt_alignment, opt_threshold);
449                 break;
450             case '2': /* use 2-files, i.e., split file driver */
451                 opt_split_vfd=1;
452                 /* get meta and raw file extension. */
453                 /* syntax is <raw_ext>,<meta_ext> */
454                 meta_ext = raw_ext = optarg;
455                 while (*raw_ext != '\0'){
456                     if (*raw_ext == ','){
457                         *raw_ext = '\0';
458                         raw_ext++;
459                         break;
460                     }
461                     raw_ext++;
462                 }
463                 printf("split-file-vfd used: %s,%s\n",
464                     meta_ext, raw_ext);
465                 break;
466             case 'c': /* correctness */
467                 opt_correct = 1;
468                 break;
469             case '?': /* unknown */
470             default:
471                 break;
472         }
473     }
474 
475     return(0);
476 }
477 
478 /*
479  * Local variables:
480  *  c-indent-level: 3
481  *  c-basic-offset: 3
482  *  tab-width: 3
483  * End:
484  */
485 
486 #else /* H5_HAVE_PARALLEL */
487 /* dummy program since H5_HAVE_PARALLEL is not configured in */
488 int
main(int H5_ATTR_UNUSED argc,char H5_ATTR_UNUSED ** argv)489 main(int H5_ATTR_UNUSED argc, char H5_ATTR_UNUSED **argv)
490 {
491     printf("No parallel performance because parallel is not configured in\n");
492     return(0);
493 }
494 #endif /* H5_HAVE_PARALLEL */
495 
496