1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  * Copyright by The HDF Group.                                               *
3  * Copyright by the Board of Trustees of the University of Illinois.         *
4  * All rights reserved.                                                      *
5  *                                                                           *
6  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
7  * terms governing use, modification, and redistribution, is contained in    *
8  * the COPYING file, which can be found at the root of the source code       *
9  * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
10  * If you do not have access to either file, you may request a copy from     *
11  * help@hdfgroup.org.                                                        *
12  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13 
14 /*
15  * Author: Albert Cheng of NCSA, May 1, 2001.
16  * This is derived from code given to me by Robert Ross.
17  *
18  * NOTE: This code assumes that all command line arguments make it out to all
19  * the processes that make up the parallel job, which isn't always the case.
20  * So if it doesn't work on some platform, that might be why.
21  */
22 
23 #include "hdf5.h"
24 #include "H5private.h"
25 
26 #ifdef H5_HAVE_PARALLEL
27 
28 #ifdef H5_STDC_HEADERS
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #endif
35 
36 #ifdef H5_HAVE_UNISTD_H
37 #include <sys/types.h>
38 #include <unistd.h>
39 #endif
40 
41 #ifdef H5_HAVE_SYS_STAT_H
42 #include <sys/stat.h>
43 #endif
44 
45 #if defined(H5_TIME_WITH_SYS_TIME)
46 #   include <sys/time.h>
47 #   include <time.h>
48 #elif defined(H5_HAVE_SYS_TIME_H)
49 #   include <sys/time.h>
50 #else
51 #   include <time.h>
52 #endif
53 
54 #include <mpi.h>
55 #ifndef MPI_FILE_NULL           /*MPIO may be defined in mpi.h already       */
56 #   include <mpio.h>
57 #endif
58 
59 /* Macro definitions */
60 /* Verify:
61  * if val is false (0), print mesg and if fatal is true (non-zero), die.
62  */
63 #define H5FATAL 1
64 #define VRFY(val, mesg, fatal) do {                                            \
65     if (!val) {                                                                \
66   	printf("Proc %d: ", mynod);                 \
67         printf("*** Assertion failed (%s) at line %4d in %s\n",                \
68       mesg, (int)__LINE__, __FILE__);                  \
69   	if (fatal){                     \
70       fflush(stdout);                   \
71       goto die_jar_jar_die;                 \
72   	}                       \
73     }                                                                          \
74 } while(0)
75 #define RANK 1
76 #define MAX_PATH 1024
77 
78 hsize_t dims[RANK];     /* dataset dim sizes */
79 hsize_t block[RANK], stride[RANK], count[RANK];
80 hssize_t start[RANK];
81 hid_t fid;                  /* HDF5 file ID */
82 hid_t acc_tpl;    /* File access templates */
83 hid_t sid;       /* Dataspace ID */
84 hid_t file_dataspace;  /* File dataspace ID */
85 hid_t mem_dataspace;  /* memory dataspace ID */
86 hid_t dataset;    /* Dataset ID */
87 hsize_t opt_alignment  = 1;
88 hsize_t opt_threshold  = 1;
89 int  opt_split_vfd  = 0;
90 char  *meta_ext, *raw_ext;  /* holds the meta and raw file extension if */
91         /* opt_split_vfd is set */
92 
93 
94 /* DEFAULT VALUES FOR OPTIONS */
95 int64_t opt_block     = 1048576*16;
96 int     opt_iter      = 1;
97 int     opt_stripe    = -1;
98 int     opt_correct   = 0;
99 int     amode         = O_RDWR | O_CREAT;
100 char    opt_file[256] = "perftest.out";
101 char    opt_pvfstab[256] = "notset";
102 int     opt_pvfstab_set = 0;
103 
104 const char *FILENAME[] = {
105    opt_file,
106    NULL
107 };
108 
109 /* function prototypes */
110 static int parse_args(int argc, char **argv);
111 
112 extern int errno;
113 
114 /* globals needed for getopt */
115 extern char *optarg;
116 
main(int argc,char ** argv)117 int main(int argc, char **argv)
118 {
119     char *buf, *tmp, *buf2, *tmp2, *check;
120     int i, j, mynod=0, nprocs=1, err, my_correct = 1, correct, myerrno;
121     double stim, etim;
122     double write_tim = 0;
123     double read_tim = 0;
124     double read_bw, write_bw;
125     double max_read_tim, max_write_tim;
126     double min_read_tim, min_write_tim;
127     double ave_read_tim, ave_write_tim;
128     int64_t iter_jump = 0;
129     int64_t seek_position = 0;
130     MPI_File fh;
131     MPI_Status status;
132     int nchars;
133     char filename[MAX_PATH];
134     herr_t ret;           /* Generic return value */
135 
136     /* startup MPI and determine the rank of this process */
137     MPI_Init(&argc,&argv);
138     MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
139     MPI_Comm_rank(MPI_COMM_WORLD, &mynod);
140 
141     /* parse the command line arguments */
142     parse_args(argc, argv);
143 
144     if (mynod == 0) printf("# Using hdf5-io calls.\n");
145 
146     /* kindof a weird hack- if the location of the pvfstab file was
147      * specified on the command line, then spit out this location into
148      * the appropriate environment variable: */
149 
150 #if H5_HAVE_SETENV
151 /* no setenv or unsetenv */
152     if (opt_pvfstab_set) {
153             if((setenv("PVFSTAB_FILE", opt_pvfstab, 1)) < 0){
154                     perror("setenv");
155                     goto die_jar_jar_die;
156             }
157     }
158 #endif
159 
160     /* this is how much of the file data is covered on each iteration of
161      * the test.  used to help determine the seek offset on each
162      * iteration */
163     iter_jump = nprocs * opt_block;
164 
165     /* setup a buffer of data to write */
166     if (!(tmp = (char *) malloc(opt_block + 256))) {
167             perror("malloc");
168             goto die_jar_jar_die;
169     }
170     buf = tmp + 128 - (((long)tmp) % 128);  /* align buffer */
171 
172     if (opt_correct) {
173             /* do the same buffer setup for verifiable data */
174             if (!(tmp2 = (char *) malloc(opt_block + 256))) {
175                     perror("malloc2");
176                     goto die_jar_jar_die;
177              }
178             buf2 = tmp + 128 - (((long)tmp) % 128);
179     }
180 
181     /* setup file access template with parallel IO access. */
182     if (opt_split_vfd){
183   	hid_t mpio_pl;
184 
185   	mpio_pl = H5Pcreate (H5P_FILE_ACCESS);
186   	VRFY((acc_tpl >= 0), "", H5FATAL);
187   	ret = H5Pset_fapl_mpio(mpio_pl, MPI_COMM_WORLD, MPI_INFO_NULL);
188   	VRFY((ret >= 0), "", H5FATAL);
189 
190   	/* set optional allocation alignment */
191   	if (opt_alignment*opt_threshold != 1){
192       	ret = H5Pset_alignment(acc_tpl, opt_threshold, opt_alignment );
193       	VRFY((ret >= 0), "H5Pset_alignment succeeded", !H5FATAL);
194   	}
195 
196   	/* setup file access template */
197   	acc_tpl = H5Pcreate (H5P_FILE_ACCESS);
198   	VRFY((acc_tpl >= 0), "", H5FATAL);
199   	ret = H5Pset_fapl_split(acc_tpl, meta_ext, mpio_pl, raw_ext, mpio_pl);
200   	VRFY((ret >= 0), "H5Pset_fapl_split succeeded", H5FATAL);
201   	ret = H5Pclose(mpio_pl);
202   	VRFY((ret >= 0), "H5Pclose mpio_pl succeeded", H5FATAL);
203     }else{
204   	/* setup file access template */
205   	acc_tpl = H5Pcreate (H5P_FILE_ACCESS);
206   	VRFY((acc_tpl >= 0), "", H5FATAL);
207   	ret = H5Pset_fapl_mpio(acc_tpl, MPI_COMM_WORLD, MPI_INFO_NULL);
208   	VRFY((ret >= 0), "", H5FATAL);
209 
210   	/* set optional allocation alignment */
211   	if (opt_alignment*opt_threshold != 1){
212   	    ret = H5Pset_alignment(acc_tpl, opt_threshold, opt_alignment );
213       	VRFY((ret >= 0), "H5Pset_alignment succeeded", !H5FATAL);
214   	}
215     }
216 
217     h5_fixname_no_suffix(FILENAME[0], acc_tpl, filename, sizeof filename);
218 
219     /* create the parallel file */
220     fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, acc_tpl);
221     VRFY((fid >= 0), "H5Fcreate succeeded", H5FATAL);
222 
223     /* define a contiquous dataset of opt_iter*nprocs*opt_block chars */
224     dims[0] = opt_iter * nprocs * opt_block;
225     sid = H5Screate_simple(RANK, dims, NULL);
226     VRFY((sid >= 0), "H5Screate_simple succeeded", H5FATAL);
227     dataset = H5Dcreate2(fid, "Dataset1", H5T_NATIVE_CHAR, sid,
228       		H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
229     VRFY((dataset >= 0), "H5Dcreate2 succeeded", H5FATAL);
230 
231     /* create the memory dataspace and the file dataspace */
232     dims[0] = opt_block;
233     mem_dataspace = H5Screate_simple(RANK, dims, NULL);
234     VRFY((mem_dataspace >= 0), "", H5FATAL);
235     file_dataspace = H5Dget_space(dataset);
236     VRFY((file_dataspace >= 0), "H5Dget_space succeeded", H5FATAL);
237 
238     /* now each process writes a block of opt_block chars in round robbin
239      * fashion until the whole dataset is covered.
240      */
241     for(j=0; j < opt_iter; j++) {
242         /* setup a file dataspace selection */
243         start[0] = (j*iter_jump)+(mynod*opt_block);
244         stride[0] = block[0] = opt_block;
245         count[0]= 1;
246         ret=H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
247         VRFY((ret >= 0), "H5Sset_hyperslab succeeded", H5FATAL);
248 
249             if (opt_correct) /* fill in buffer for iteration */ {
250                     for (i=mynod+j, check=buf; i<opt_block; i++,check++) *check=(char)i;
251             }
252 
253             /* discover the starting time of the operation */
254        MPI_Barrier(MPI_COMM_WORLD);
255        stim = MPI_Wtime();
256 
257         /* write data */
258         ret = H5Dwrite(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace,
259                 H5P_DEFAULT, buf);
260         VRFY((ret >= 0), "H5Dwrite dataset1 succeeded", !H5FATAL);
261 
262             /* discover the ending time of the operation */
263        etim = MPI_Wtime();
264 
265        write_tim += (etim - stim);
266 
267             /* we are done with this "write" iteration */
268     }
269 
270     /* close dataset and file */
271     ret=H5Dclose(dataset);
272     VRFY((ret >= 0), "H5Dclose succeeded", H5FATAL);
273     ret=H5Fclose(fid);
274     VRFY((ret >= 0), "H5Fclose succeeded", H5FATAL);
275 
276 
277 
278     /* wait for everyone to synchronize at this point */
279     MPI_Barrier(MPI_COMM_WORLD);
280 
281     /* reopen the file for reading */
282     fid=H5Fopen(filename,H5F_ACC_RDONLY,acc_tpl);
283     VRFY((fid >= 0), "", H5FATAL);
284 
285     /* open the dataset */
286     dataset = H5Dopen2(fid, "Dataset1", H5P_DEFAULT);
287     VRFY((dataset >= 0), "H5Dopen succeeded", H5FATAL);
288 
289     /* we can re-use the same mem_dataspace and file_dataspace
290      * the H5Dwrite used since the dimension size is the same.
291      */
292 
293     /* we are going to repeat the read the same pattern the write used */
294     for (j=0; j < opt_iter; j++) {
295         /* setup a file dataspace selection */
296         start[0] = (j*iter_jump)+(mynod*opt_block);
297         stride[0] = block[0] = opt_block;
298         count[0]= 1;
299         ret=H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
300         VRFY((ret >= 0), "H5Sset_hyperslab succeeded", H5FATAL);
301             /* seek to the appropriate spot give the current iteration and
302              * rank within the MPI processes */
303 
304             /* discover the start time */
305        MPI_Barrier(MPI_COMM_WORLD);
306        stim = MPI_Wtime();
307 
308         /* read in the file data */
309         if (!opt_correct){
310             ret = H5Dread(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, H5P_DEFAULT, buf);
311         }
312         else{
313             ret = H5Dread(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, H5P_DEFAULT, buf2);
314         }
315         myerrno = errno;
316 
317         /* discover the end time */
318        etim = MPI_Wtime();
319        read_tim += (etim - stim);
320         VRFY((ret >= 0), "H5Dwrite dataset1 succeeded", !H5FATAL);
321 
322 
323        if (ret < 0) fprintf(stderr, "node %d, read error, loc = %Ld: %s\n",
324                     mynod, mynod*opt_block, strerror(myerrno));
325 
326         /* if the user wanted to check correctness, compare the write
327          * buffer to the read buffer */
328         if (opt_correct && memcmp(buf, buf2, opt_block)) {
329                 fprintf(stderr, "node %d, correctness test failed\n", mynod);
330                 my_correct = 0;
331                 MPI_Allreduce(&my_correct, &correct, 1, MPI_INT, MPI_MIN,
332                         MPI_COMM_WORLD);
333         }
334 
335         /* we are done with this read iteration */
336     }
337 
338     /* close dataset and file */
339     ret=H5Dclose(dataset);
340     VRFY((ret >= 0), "H5Dclose succeeded", H5FATAL);
341     ret=H5Fclose(fid);
342     VRFY((ret >= 0), "H5Fclose succeeded", H5FATAL);
343     ret=H5Pclose(acc_tpl);
344     VRFY((ret >= 0), "H5Pclose succeeded", H5FATAL);
345 
346     /* compute the read and write times */
347     MPI_Allreduce(&read_tim, &max_read_tim, 1, MPI_DOUBLE, MPI_MAX,
348             MPI_COMM_WORLD);
349     MPI_Allreduce(&read_tim, &min_read_tim, 1, MPI_DOUBLE, MPI_MIN,
350             MPI_COMM_WORLD);
351     MPI_Allreduce(&read_tim, &ave_read_tim, 1, MPI_DOUBLE, MPI_SUM,
352             MPI_COMM_WORLD);
353 
354     /* calculate the average from the sum */
355     ave_read_tim = ave_read_tim / nprocs;
356 
357     MPI_Allreduce(&write_tim, &max_write_tim, 1, MPI_DOUBLE, MPI_MAX,
358             MPI_COMM_WORLD);
359     MPI_Allreduce(&write_tim, &min_write_tim, 1, MPI_DOUBLE, MPI_MIN,
360             MPI_COMM_WORLD);
361     MPI_Allreduce(&write_tim, &ave_write_tim, 1, MPI_DOUBLE, MPI_SUM,
362             MPI_COMM_WORLD);
363 
364     /* calculate the average from the sum */
365     ave_write_tim = ave_write_tim / nprocs;
366 
367     /* print out the results on one node */
368     if (mynod == 0) {
369        read_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_read_tim*1000000.0);
370        write_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_write_tim*1000000.0);
371 
372                     printf("nr_procs = %d, nr_iter = %d, blk_sz = %ld\n", nprocs,
373             opt_iter, (long)opt_block);
374 
375                     printf("# total_size = %ld\n", (long)(opt_block*nprocs*opt_iter));
376 
377                     printf("# Write:  min_time = %f, max_time = %f, mean_time = %f\n",
378                             min_write_tim, max_write_tim, ave_write_tim);
379                     printf("# Read:  min_time = %f, max_time = %f, mean_time = %f\n",
380                             min_read_tim, max_read_tim, ave_read_tim);
381 
382        printf("Write bandwidth = %f Mbytes/sec\n", write_bw);
383        printf("Read bandwidth = %f Mbytes/sec\n", read_bw);
384 
385             if (opt_correct) {
386                     printf("Correctness test %s.\n", correct ? "passed" : "failed");
387             }
388     }
389 
390 
391 die_jar_jar_die:
392 
393 #if H5_HAVE_SETENV
394 /* no setenv or unsetenv */
395     /* clear the environment variable if it was set earlier */
396     if  (opt_pvfstab_set){
397             unsetenv("PVFSTAB_FILE");
398     }
399 #endif
400 
401     free(tmp);
402     if (opt_correct) free(tmp2);
403 
404     MPI_Finalize();
405 
406     return(0);
407 }
408 
409 static int
parse_args(int argc,char ** argv)410 parse_args(int argc, char **argv)
411 {
412     int c;
413 
414     while ((c = getopt(argc, argv, "s:b:i:f:p:a:2:c")) != EOF) {
415         switch (c) {
416             case 's': /* stripe */
417                 opt_stripe = atoi(optarg);
418                 break;
419             case 'b': /* block size */
420                 opt_block = atoi(optarg);
421                 break;
422             case 'i': /* iterations */
423                 opt_iter = atoi(optarg);
424                 break;
425             case 'f': /* filename */
426                 strncpy(opt_file, optarg, 255);
427                 FILENAME[0] = opt_file;
428                 break;
429             case 'p': /* pvfstab file */
430                 strncpy(opt_pvfstab, optarg, 255);
431                 opt_pvfstab_set = 1;
432                 break;
433             case 'a': /* aligned allocation.
434                        * syntax: -a<alignment>/<threshold>
435                        * e.g., -a4096/512  allocate at 4096 bytes
436                        * boundary if request size >= 512.
437                        */
438                 {char *p;
439                 opt_alignment = atoi(optarg);
440                 if (p=(char*)strchr(optarg, '/'))
441                     opt_threshold = atoi(p+1);
442                 }
443                 HDfprintf(stdout,
444                     "alignment/threshold=%Hu/%Hu\n",
445                      opt_alignment, opt_threshold);
446                 break;
447             case '2': /* use 2-files, i.e., split file driver */
448                 opt_split_vfd=1;
449                 /* get meta and raw file extension. */
450                 /* syntax is <raw_ext>,<meta_ext> */
451                 meta_ext = raw_ext = optarg;
452                 while (*raw_ext != '\0'){
453                     if (*raw_ext == ','){
454                         *raw_ext = '\0';
455                         raw_ext++;
456                         break;
457                     }
458                     raw_ext++;
459                 }
460                 printf("split-file-vfd used: %s,%s\n",
461                     meta_ext, raw_ext);
462                 break;
463             case 'c': /* correctness */
464                 opt_correct = 1;
465                 break;
466             case '?': /* unknown */
467             default:
468                 break;
469         }
470     }
471 
472     return(0);
473 }
474 
475 /*
476  * Local variables:
477  *  c-indent-level: 3
478  *  c-basic-offset: 3
479  *  tab-width: 3
480  * End:
481  */
482 
483 #else /* H5_HAVE_PARALLEL */
484 /* dummy program since H5_HAVE_PARALLEL is not configured in */
485 int
main(int H5_ATTR_UNUSED argc,char H5_ATTR_UNUSED ** argv)486 main(int H5_ATTR_UNUSED argc, char H5_ATTR_UNUSED **argv)
487 {
488     printf("No parallel performance because parallel is not configured in\n");
489     return(0);
490 }
491 #endif /* H5_HAVE_PARALLEL */
492 
493