1 /*********************************************************************
2  *
3  *  Copyright (C) 2012, Northwestern University and Argonne National Laboratory
4  *  See COPYRIGHT notice in top-level directory.
5  *
6  *********************************************************************/
7 /* $Id: collective_write.c 2728 2016-12-18 17:49:20Z wkliao $ */
8 
9 /*
10  *    This example mimics the coll_perf.c from ROMIO.
11  *    It creates a netcdf file in CD-5 format and writes a number of
12  *    3D integer non-record variables. The measured write bandwidth is reported
13  *    at the end. Usage:
14  *    To compile:
15  *        mpicc -O2 collective_write.c -o collective_write -lpnetcdf
16  *    To run:
17  *        mpiexec -n num_processes ./collective_write [filename] [len]
18  *    where len decides the size of each local array, which is len x len x len.
19  *    So, each non-record variable is of size len*len*len * nprocs * sizeof(int)
20  *    All variables are partitioned among all processes in a 3D
21  *    block-block-block fashion. Below is an example standard output from
22  *    command:
23  *        mpiexec -n 32 ./collective_write /pvfs2/wkliao/testfile.nc 100
24  *
25  *    MPI hint: cb_nodes        = 2
26  *    MPI hint: cb_buffer_size  = 16777216
27  *    MPI hint: striping_factor = 32
28  *    MPI hint: striping_unit   = 1048576
29  *    Local array size 100 x 100 x 100 integers, size = 3.81 MB
30  *    Global array size 400 x 400 x 200 integers, write size = 0.30 GB
31  *     procs    Global array size  exec(sec)  write(MB/s)
32  *     -------  ------------------  ---------  -----------
33  *        32     400 x  400 x  200     6.67       45.72
34  */
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h> /* strcpy(), strncpy() */
39 #include <unistd.h> /* getopt() */
40 #include <string.h> /* strcpy() */
41 #include <mpi.h>
42 #include <pnetcdf.h>
43 
44 #ifndef MPI_OFFSET
45 #define MPI_OFFSET MPI_LONG_LONG_INT
46 #endif
47 
48 #define NDIMS    3
49 #define NUM_VARS 10
50 
51 #define HANDLE_ERROR {                                \
52     if (err != NC_NOERR) {                            \
53         printf("Error at line %d (%s)\n", __LINE__,   \
54                ncmpi_strerror(err));                  \
55         nerrs++;                                      \
56     }                                                 \
57 }
58 
59 static void
usage(char * argv0)60 usage(char *argv0)
61 {
62     char *help =
63     "Usage: %s [-h] | [-q] [file_name] [len]\n"
64     "       [-h] Print help\n"
65     "       [-q] Quiet mode (reports when fail)\n"
66     "       [filename] output netCDF file name\n"
67     "       [len] size of each dimension of the local array\n";
68     fprintf(stderr, help, argv0);
69 }
70 
71 /*----< print_info() >------------------------------------------------------*/
72 static
print_info(MPI_Info * info_used)73 void print_info(MPI_Info *info_used)
74 {
75     int     flag;
76     char    info_cb_nodes[64], info_cb_buffer_size[64];
77     char    info_striping_factor[64], info_striping_unit[64];
78 
79     strcpy(info_cb_nodes,        "undefined");
80     strcpy(info_cb_buffer_size,  "undefined");
81     strcpy(info_striping_factor, "undefined");
82     strcpy(info_striping_unit,   "undefined");
83 
84     MPI_Info_get(*info_used, "cb_nodes", 64, info_cb_nodes, &flag);
85     MPI_Info_get(*info_used, "cb_buffer_size", 64, info_cb_buffer_size, &flag);
86     MPI_Info_get(*info_used, "striping_factor", 64, info_striping_factor, &flag);
87     MPI_Info_get(*info_used, "striping_unit", 64, info_striping_unit, &flag);
88 
89     printf("MPI hint: cb_nodes        = %s\n", info_cb_nodes);
90     printf("MPI hint: cb_buffer_size  = %s\n", info_cb_buffer_size);
91     printf("MPI hint: striping_factor = %s\n", info_striping_factor);
92     printf("MPI hint: striping_unit   = %s\n", info_striping_unit);
93 }
94 
95 /*----< main() >------------------------------------------------------------*/
main(int argc,char ** argv)96 int main(int argc, char **argv)
97 {
98     extern int optind;
99     char filename[256], str[512];
100     int i, j, rank, nprocs, len, ncid, bufsize, verbose=1, err, nerrs=0;
101     int *buf[NUM_VARS], psizes[NDIMS], dimids[NDIMS], varids[NUM_VARS];
102     double write_timing, max_write_timing, write_bw;
103     MPI_Offset gsizes[NDIMS], starts[NDIMS], counts[NDIMS];
104     MPI_Offset write_size, sum_write_size;
105     MPI_Info info_used;
106 
107     MPI_Init(&argc,&argv);
108     MPI_Comm_rank(MPI_COMM_WORLD, &rank);
109     MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
110 
111     /* get command-line arguments */
112     while ((i = getopt(argc, argv, "hq")) != EOF)
113         switch(i) {
114             case 'q': verbose = 0;
115                       break;
116             case 'h':
117             default:  if (rank==0) usage(argv[0]);
118                       MPI_Finalize();
119                       return 0;
120         }
121     argc -= optind;
122     argv += optind;
123     if (argc >= 1) snprintf(filename, 256, "%s", argv[0]);
124     else           strcpy(filename, "testfile.nc");
125 
126     len = 10;
127     if (argc >= 2) len = (int)strtol(argv[1],NULL,10); /* optional argument */
128     len = (len <= 0) ? 10 : len;
129 
130     for (i=0; i<NDIMS; i++)
131         psizes[i] = 0;
132 
133     MPI_Dims_create(nprocs, NDIMS, psizes);
134     starts[0] = rank % psizes[0];
135     starts[1] = (rank / psizes[1]) % psizes[1];
136     starts[2] = (rank / (psizes[0] * psizes[1])) % psizes[2];
137 
138     bufsize = 1;
139     for (i=0; i<NDIMS; i++) {
140         gsizes[i] = (MPI_Offset)len * psizes[i];
141         starts[i] *= len;
142         counts[i]  = len;
143         bufsize   *= len;
144     }
145 
146     /* allocate buffer and initialize with non-zero numbers */
147     for (i=0; i<NUM_VARS; i++) {
148         buf[i] = (int *) malloc(bufsize * sizeof(int));
149         for (j=0; j<bufsize; j++) buf[i][j] = rank * i + 123 + j;
150     }
151 
152     MPI_Barrier(MPI_COMM_WORLD);
153     write_timing = MPI_Wtime();
154 
155     /* create the file */
156     err = ncmpi_create(MPI_COMM_WORLD, filename, NC_CLOBBER|NC_64BIT_DATA,
157                        MPI_INFO_NULL, &ncid);
158     if (err != NC_NOERR) {
159         printf("Error: ncmpi_create() file %s (%s)\n",filename,ncmpi_strerror(err));
160         MPI_Abort(MPI_COMM_WORLD, -1);
161         exit(1);
162     }
163 
164     /* define dimensions */
165     for (i=0; i<NDIMS; i++) {
166         sprintf(str, "%c", 'x'+i);
167         err = ncmpi_def_dim(ncid, str, gsizes[i], &dimids[i]);
168         HANDLE_ERROR
169     }
170 
171     /* define variables */
172     for (i=0; i<NUM_VARS; i++) {
173         sprintf(str, "var%d", i);
174         err = ncmpi_def_var(ncid, str, NC_INT, NDIMS, dimids, &varids[i]);
175         HANDLE_ERROR
176     }
177 
178     /* exit the define mode */
179     err = ncmpi_enddef(ncid);
180     HANDLE_ERROR
181 
182     /* get all the hints used */
183     err = ncmpi_inq_file_info(ncid, &info_used);
184     HANDLE_ERROR
185 
186     /* write one variable at a time */
187     for (i=0; i<NUM_VARS; i++) {
188         err = ncmpi_put_vara_int_all(ncid, varids[i], starts, counts, buf[i]);
189         HANDLE_ERROR
190     }
191 
192     /* close the file */
193     err = ncmpi_close(ncid);
194     HANDLE_ERROR
195 
196     write_timing = MPI_Wtime() - write_timing;
197 
198     write_size = bufsize * NUM_VARS * sizeof(int);
199     for (i=0; i<NUM_VARS; i++) free(buf[i]);
200 
201     MPI_Reduce(&write_size, &sum_write_size, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);
202     MPI_Reduce(&write_timing, &max_write_timing, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
203 
204     if (rank == 0 && verbose) {
205         float subarray_size = (float)bufsize*sizeof(int)/1048576.0;
206         print_info(&info_used);
207         printf("Local array size %d x %d x %d integers, size = %.2f MB\n",len,len,len,subarray_size);
208         sum_write_size /= 1048576.0;
209         printf("Global array size %lld x %lld x %lld integers, write size = %.2f GB\n",
210                gsizes[0], gsizes[1], gsizes[2], sum_write_size/1024.0);
211 
212         write_bw = sum_write_size/max_write_timing;
213         printf(" procs    Global array size  exec(sec)  write(MB/s)\n");
214         printf("-------  ------------------  ---------  -----------\n");
215         printf(" %4d    %4lld x %4lld x %4lld %8.2f  %10.2f\n", nprocs,
216                gsizes[0], gsizes[1], gsizes[2], max_write_timing, write_bw);
217     }
218     MPI_Info_free(&info_used);
219 
220     /* print info about PnetCDF internal malloc usage */
221     MPI_Offset malloc_size, sum_size;
222     err = ncmpi_inq_malloc_max_size(&malloc_size);
223     if (err == NC_NOERR) {
224         MPI_Reduce(&malloc_size, &sum_size, 1, MPI_OFFSET, MPI_SUM, 0, MPI_COMM_WORLD);
225         if (rank == 0 && verbose)
226             printf("maximum heap memory allocted by PnetCDF internally is %lld bytes\n",
227                    sum_size);
228 
229         /* check if there is any PnetCDF internal malloc residue */
230         err = ncmpi_inq_malloc_size(&malloc_size);
231         MPI_Reduce(&malloc_size, &sum_size, 1, MPI_OFFSET, MPI_SUM, 0, MPI_COMM_WORLD);
232         if (rank == 0 && sum_size > 0)
233             printf("heap memory allocated by PnetCDF internally has %lld bytes yet to be freed\n",
234                    sum_size);
235     }
236 
237     MPI_Finalize();
238     return nerrs;
239 }
240 
241