1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Copyright by The HDF Group. *
3 * Copyright by the Board of Trustees of the University of Illinois. *
4 * All rights reserved. *
5 * *
6 * This file is part of HDF5. The full HDF5 copyright notice, including *
7 * terms governing use, modification, and redistribution, is contained in *
8 * the COPYING file, which can be found at the root of the source code *
9 * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases. *
10 * If you do not have access to either file, you may request a copy from *
11 * help@hdfgroup.org. *
12 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
13
14 /*
15 * Author: Albert Cheng of NCSA, May 1, 2001.
16 * This is derived from code given to me by Robert Ross.
17 *
18 * NOTE: This code assumes that all command line arguments make it out to all
19 * the processes that make up the parallel job, which isn't always the case.
20 * So if it doesn't work on some platform, that might be why.
21 */
22
23 #include "hdf5.h"
24 #include "H5private.h"
25
26 #ifdef H5_HAVE_PARALLEL
27
28 #ifdef H5_STDC_HEADERS
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #endif
35
36 #ifdef H5_HAVE_UNISTD_H
37 #include <sys/types.h>
38 #include <unistd.h>
39 #endif
40
41 #ifdef H5_HAVE_SYS_STAT_H
42 #include <sys/stat.h>
43 #endif
44
45 #if defined(H5_TIME_WITH_SYS_TIME)
46 # include <sys/time.h>
47 # include <time.h>
48 #elif defined(H5_HAVE_SYS_TIME_H)
49 # include <sys/time.h>
50 #else
51 # include <time.h>
52 #endif
53
54 #include <mpi.h>
55 #ifndef MPI_FILE_NULL /*MPIO may be defined in mpi.h already */
56 # include <mpio.h>
57 #endif
58
59 /* Macro definitions */
60 /* Verify:
61 * if val is false (0), print mesg and if fatal is true (non-zero), die.
62 */
63 #define H5FATAL 1
64 #define VRFY(val, mesg, fatal) do { \
65 if (!val) { \
66 printf("Proc %d: ", mynod); \
67 printf("*** Assertion failed (%s) at line %4d in %s\n", \
68 mesg, (int)__LINE__, __FILE__); \
69 if (fatal){ \
70 fflush(stdout); \
71 goto die_jar_jar_die; \
72 } \
73 } \
74 } while(0)
75 #define RANK 1
76 #define MAX_PATH 1024
77
78 hsize_t dims[RANK]; /* dataset dim sizes */
79 hsize_t block[RANK], stride[RANK], count[RANK];
80 hssize_t start[RANK];
81 hid_t fid; /* HDF5 file ID */
82 hid_t acc_tpl; /* File access templates */
83 hid_t sid; /* Dataspace ID */
84 hid_t file_dataspace; /* File dataspace ID */
85 hid_t mem_dataspace; /* memory dataspace ID */
86 hid_t dataset; /* Dataset ID */
87 hsize_t opt_alignment = 1;
88 hsize_t opt_threshold = 1;
89 int opt_split_vfd = 0;
90 char *meta_ext, *raw_ext; /* holds the meta and raw file extension if */
91 /* opt_split_vfd is set */
92
93
94 /* DEFAULT VALUES FOR OPTIONS */
95 int64_t opt_block = 1048576*16;
96 int opt_iter = 1;
97 int opt_stripe = -1;
98 int opt_correct = 0;
99 int amode = O_RDWR | O_CREAT;
100 char opt_file[256] = "perftest.out";
101 char opt_pvfstab[256] = "notset";
102 int opt_pvfstab_set = 0;
103
104 const char *FILENAME[] = {
105 opt_file,
106 NULL
107 };
108
109 /* function prototypes */
110 static int parse_args(int argc, char **argv);
111
112 extern int errno;
113
114 /* globals needed for getopt */
115 extern char *optarg;
116
main(int argc,char ** argv)117 int main(int argc, char **argv)
118 {
119 char *buf, *tmp, *buf2, *tmp2, *check;
120 int i, j, mynod=0, nprocs=1, err, my_correct = 1, correct, myerrno;
121 double stim, etim;
122 double write_tim = 0;
123 double read_tim = 0;
124 double read_bw, write_bw;
125 double max_read_tim, max_write_tim;
126 double min_read_tim, min_write_tim;
127 double ave_read_tim, ave_write_tim;
128 int64_t iter_jump = 0;
129 int64_t seek_position = 0;
130 MPI_File fh;
131 MPI_Status status;
132 int nchars;
133 char filename[MAX_PATH];
134 herr_t ret; /* Generic return value */
135
136 /* startup MPI and determine the rank of this process */
137 MPI_Init(&argc,&argv);
138 MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
139 MPI_Comm_rank(MPI_COMM_WORLD, &mynod);
140
141 /* parse the command line arguments */
142 parse_args(argc, argv);
143
144 if (mynod == 0) printf("# Using hdf5-io calls.\n");
145
146 /* kindof a weird hack- if the location of the pvfstab file was
147 * specified on the command line, then spit out this location into
148 * the appropriate environment variable: */
149
150 #if H5_HAVE_SETENV
151 /* no setenv or unsetenv */
152 if (opt_pvfstab_set) {
153 if((setenv("PVFSTAB_FILE", opt_pvfstab, 1)) < 0){
154 perror("setenv");
155 goto die_jar_jar_die;
156 }
157 }
158 #endif
159
160 /* this is how much of the file data is covered on each iteration of
161 * the test. used to help determine the seek offset on each
162 * iteration */
163 iter_jump = nprocs * opt_block;
164
165 /* setup a buffer of data to write */
166 if (!(tmp = (char *) malloc(opt_block + 256))) {
167 perror("malloc");
168 goto die_jar_jar_die;
169 }
170 buf = tmp + 128 - (((long)tmp) % 128); /* align buffer */
171
172 if (opt_correct) {
173 /* do the same buffer setup for verifiable data */
174 if (!(tmp2 = (char *) malloc(opt_block + 256))) {
175 perror("malloc2");
176 goto die_jar_jar_die;
177 }
178 buf2 = tmp + 128 - (((long)tmp) % 128);
179 }
180
181 /* setup file access template with parallel IO access. */
182 if (opt_split_vfd){
183 hid_t mpio_pl;
184
185 mpio_pl = H5Pcreate (H5P_FILE_ACCESS);
186 VRFY((acc_tpl >= 0), "", H5FATAL);
187 ret = H5Pset_fapl_mpio(mpio_pl, MPI_COMM_WORLD, MPI_INFO_NULL);
188 VRFY((ret >= 0), "", H5FATAL);
189
190 /* set optional allocation alignment */
191 if (opt_alignment*opt_threshold != 1){
192 ret = H5Pset_alignment(acc_tpl, opt_threshold, opt_alignment );
193 VRFY((ret >= 0), "H5Pset_alignment succeeded", !H5FATAL);
194 }
195
196 /* setup file access template */
197 acc_tpl = H5Pcreate (H5P_FILE_ACCESS);
198 VRFY((acc_tpl >= 0), "", H5FATAL);
199 ret = H5Pset_fapl_split(acc_tpl, meta_ext, mpio_pl, raw_ext, mpio_pl);
200 VRFY((ret >= 0), "H5Pset_fapl_split succeeded", H5FATAL);
201 ret = H5Pclose(mpio_pl);
202 VRFY((ret >= 0), "H5Pclose mpio_pl succeeded", H5FATAL);
203 }else{
204 /* setup file access template */
205 acc_tpl = H5Pcreate (H5P_FILE_ACCESS);
206 VRFY((acc_tpl >= 0), "", H5FATAL);
207 ret = H5Pset_fapl_mpio(acc_tpl, MPI_COMM_WORLD, MPI_INFO_NULL);
208 VRFY((ret >= 0), "", H5FATAL);
209
210 /* set optional allocation alignment */
211 if (opt_alignment*opt_threshold != 1){
212 ret = H5Pset_alignment(acc_tpl, opt_threshold, opt_alignment );
213 VRFY((ret >= 0), "H5Pset_alignment succeeded", !H5FATAL);
214 }
215 }
216
217 h5_fixname_no_suffix(FILENAME[0], acc_tpl, filename, sizeof filename);
218
219 /* create the parallel file */
220 fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, acc_tpl);
221 VRFY((fid >= 0), "H5Fcreate succeeded", H5FATAL);
222
223 /* define a contiquous dataset of opt_iter*nprocs*opt_block chars */
224 dims[0] = opt_iter * nprocs * opt_block;
225 sid = H5Screate_simple(RANK, dims, NULL);
226 VRFY((sid >= 0), "H5Screate_simple succeeded", H5FATAL);
227 dataset = H5Dcreate2(fid, "Dataset1", H5T_NATIVE_CHAR, sid,
228 H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
229 VRFY((dataset >= 0), "H5Dcreate2 succeeded", H5FATAL);
230
231 /* create the memory dataspace and the file dataspace */
232 dims[0] = opt_block;
233 mem_dataspace = H5Screate_simple(RANK, dims, NULL);
234 VRFY((mem_dataspace >= 0), "", H5FATAL);
235 file_dataspace = H5Dget_space(dataset);
236 VRFY((file_dataspace >= 0), "H5Dget_space succeeded", H5FATAL);
237
238 /* now each process writes a block of opt_block chars in round robbin
239 * fashion until the whole dataset is covered.
240 */
241 for(j=0; j < opt_iter; j++) {
242 /* setup a file dataspace selection */
243 start[0] = (j*iter_jump)+(mynod*opt_block);
244 stride[0] = block[0] = opt_block;
245 count[0]= 1;
246 ret=H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
247 VRFY((ret >= 0), "H5Sset_hyperslab succeeded", H5FATAL);
248
249 if (opt_correct) /* fill in buffer for iteration */ {
250 for (i=mynod+j, check=buf; i<opt_block; i++,check++) *check=(char)i;
251 }
252
253 /* discover the starting time of the operation */
254 MPI_Barrier(MPI_COMM_WORLD);
255 stim = MPI_Wtime();
256
257 /* write data */
258 ret = H5Dwrite(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace,
259 H5P_DEFAULT, buf);
260 VRFY((ret >= 0), "H5Dwrite dataset1 succeeded", !H5FATAL);
261
262 /* discover the ending time of the operation */
263 etim = MPI_Wtime();
264
265 write_tim += (etim - stim);
266
267 /* we are done with this "write" iteration */
268 }
269
270 /* close dataset and file */
271 ret=H5Dclose(dataset);
272 VRFY((ret >= 0), "H5Dclose succeeded", H5FATAL);
273 ret=H5Fclose(fid);
274 VRFY((ret >= 0), "H5Fclose succeeded", H5FATAL);
275
276
277
278 /* wait for everyone to synchronize at this point */
279 MPI_Barrier(MPI_COMM_WORLD);
280
281 /* reopen the file for reading */
282 fid=H5Fopen(filename,H5F_ACC_RDONLY,acc_tpl);
283 VRFY((fid >= 0), "", H5FATAL);
284
285 /* open the dataset */
286 dataset = H5Dopen2(fid, "Dataset1", H5P_DEFAULT);
287 VRFY((dataset >= 0), "H5Dopen succeeded", H5FATAL);
288
289 /* we can re-use the same mem_dataspace and file_dataspace
290 * the H5Dwrite used since the dimension size is the same.
291 */
292
293 /* we are going to repeat the read the same pattern the write used */
294 for (j=0; j < opt_iter; j++) {
295 /* setup a file dataspace selection */
296 start[0] = (j*iter_jump)+(mynod*opt_block);
297 stride[0] = block[0] = opt_block;
298 count[0]= 1;
299 ret=H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
300 VRFY((ret >= 0), "H5Sset_hyperslab succeeded", H5FATAL);
301 /* seek to the appropriate spot give the current iteration and
302 * rank within the MPI processes */
303
304 /* discover the start time */
305 MPI_Barrier(MPI_COMM_WORLD);
306 stim = MPI_Wtime();
307
308 /* read in the file data */
309 if (!opt_correct){
310 ret = H5Dread(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, H5P_DEFAULT, buf);
311 }
312 else{
313 ret = H5Dread(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, H5P_DEFAULT, buf2);
314 }
315 myerrno = errno;
316
317 /* discover the end time */
318 etim = MPI_Wtime();
319 read_tim += (etim - stim);
320 VRFY((ret >= 0), "H5Dwrite dataset1 succeeded", !H5FATAL);
321
322
323 if (ret < 0) fprintf(stderr, "node %d, read error, loc = %Ld: %s\n",
324 mynod, mynod*opt_block, strerror(myerrno));
325
326 /* if the user wanted to check correctness, compare the write
327 * buffer to the read buffer */
328 if (opt_correct && memcmp(buf, buf2, opt_block)) {
329 fprintf(stderr, "node %d, correctness test failed\n", mynod);
330 my_correct = 0;
331 MPI_Allreduce(&my_correct, &correct, 1, MPI_INT, MPI_MIN,
332 MPI_COMM_WORLD);
333 }
334
335 /* we are done with this read iteration */
336 }
337
338 /* close dataset and file */
339 ret=H5Dclose(dataset);
340 VRFY((ret >= 0), "H5Dclose succeeded", H5FATAL);
341 ret=H5Fclose(fid);
342 VRFY((ret >= 0), "H5Fclose succeeded", H5FATAL);
343 ret=H5Pclose(acc_tpl);
344 VRFY((ret >= 0), "H5Pclose succeeded", H5FATAL);
345
346 /* compute the read and write times */
347 MPI_Allreduce(&read_tim, &max_read_tim, 1, MPI_DOUBLE, MPI_MAX,
348 MPI_COMM_WORLD);
349 MPI_Allreduce(&read_tim, &min_read_tim, 1, MPI_DOUBLE, MPI_MIN,
350 MPI_COMM_WORLD);
351 MPI_Allreduce(&read_tim, &ave_read_tim, 1, MPI_DOUBLE, MPI_SUM,
352 MPI_COMM_WORLD);
353
354 /* calculate the average from the sum */
355 ave_read_tim = ave_read_tim / nprocs;
356
357 MPI_Allreduce(&write_tim, &max_write_tim, 1, MPI_DOUBLE, MPI_MAX,
358 MPI_COMM_WORLD);
359 MPI_Allreduce(&write_tim, &min_write_tim, 1, MPI_DOUBLE, MPI_MIN,
360 MPI_COMM_WORLD);
361 MPI_Allreduce(&write_tim, &ave_write_tim, 1, MPI_DOUBLE, MPI_SUM,
362 MPI_COMM_WORLD);
363
364 /* calculate the average from the sum */
365 ave_write_tim = ave_write_tim / nprocs;
366
367 /* print out the results on one node */
368 if (mynod == 0) {
369 read_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_read_tim*1000000.0);
370 write_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_write_tim*1000000.0);
371
372 printf("nr_procs = %d, nr_iter = %d, blk_sz = %ld\n", nprocs,
373 opt_iter, (long)opt_block);
374
375 printf("# total_size = %ld\n", (long)(opt_block*nprocs*opt_iter));
376
377 printf("# Write: min_time = %f, max_time = %f, mean_time = %f\n",
378 min_write_tim, max_write_tim, ave_write_tim);
379 printf("# Read: min_time = %f, max_time = %f, mean_time = %f\n",
380 min_read_tim, max_read_tim, ave_read_tim);
381
382 printf("Write bandwidth = %f Mbytes/sec\n", write_bw);
383 printf("Read bandwidth = %f Mbytes/sec\n", read_bw);
384
385 if (opt_correct) {
386 printf("Correctness test %s.\n", correct ? "passed" : "failed");
387 }
388 }
389
390
391 die_jar_jar_die:
392
393 #if H5_HAVE_SETENV
394 /* no setenv or unsetenv */
395 /* clear the environment variable if it was set earlier */
396 if (opt_pvfstab_set){
397 unsetenv("PVFSTAB_FILE");
398 }
399 #endif
400
401 free(tmp);
402 if (opt_correct) free(tmp2);
403
404 MPI_Finalize();
405
406 return(0);
407 }
408
409 static int
parse_args(int argc,char ** argv)410 parse_args(int argc, char **argv)
411 {
412 int c;
413
414 while ((c = getopt(argc, argv, "s:b:i:f:p:a:2:c")) != EOF) {
415 switch (c) {
416 case 's': /* stripe */
417 opt_stripe = atoi(optarg);
418 break;
419 case 'b': /* block size */
420 opt_block = atoi(optarg);
421 break;
422 case 'i': /* iterations */
423 opt_iter = atoi(optarg);
424 break;
425 case 'f': /* filename */
426 strncpy(opt_file, optarg, 255);
427 FILENAME[0] = opt_file;
428 break;
429 case 'p': /* pvfstab file */
430 strncpy(opt_pvfstab, optarg, 255);
431 opt_pvfstab_set = 1;
432 break;
433 case 'a': /* aligned allocation.
434 * syntax: -a<alignment>/<threshold>
435 * e.g., -a4096/512 allocate at 4096 bytes
436 * boundary if request size >= 512.
437 */
438 {char *p;
439 opt_alignment = atoi(optarg);
440 if (p=(char*)strchr(optarg, '/'))
441 opt_threshold = atoi(p+1);
442 }
443 HDfprintf(stdout,
444 "alignment/threshold=%Hu/%Hu\n",
445 opt_alignment, opt_threshold);
446 break;
447 case '2': /* use 2-files, i.e., split file driver */
448 opt_split_vfd=1;
449 /* get meta and raw file extension. */
450 /* syntax is <raw_ext>,<meta_ext> */
451 meta_ext = raw_ext = optarg;
452 while (*raw_ext != '\0'){
453 if (*raw_ext == ','){
454 *raw_ext = '\0';
455 raw_ext++;
456 break;
457 }
458 raw_ext++;
459 }
460 printf("split-file-vfd used: %s,%s\n",
461 meta_ext, raw_ext);
462 break;
463 case 'c': /* correctness */
464 opt_correct = 1;
465 break;
466 case '?': /* unknown */
467 default:
468 break;
469 }
470 }
471
472 return(0);
473 }
474
475 /*
476 * Local variables:
477 * c-indent-level: 3
478 * c-basic-offset: 3
479 * tab-width: 3
480 * End:
481 */
482
483 #else /* H5_HAVE_PARALLEL */
484 /* dummy program since H5_HAVE_PARALLEL is not configured in */
485 int
main(int H5_ATTR_UNUSED argc,char H5_ATTR_UNUSED ** argv)486 main(int H5_ATTR_UNUSED argc, char H5_ATTR_UNUSED **argv)
487 {
488 printf("No parallel performance because parallel is not configured in\n");
489 return(0);
490 }
491 #endif /* H5_HAVE_PARALLEL */
492
493