1 /*
2 Copyright 2009, UCAR/Unidata
3 See COPYRIGHT file for copying and redistribution conditions.
4 
5 This program tests netcdf-4 parallel I/O. These tests are based on the
6 needs of the NASA GMAO model, and are based on some test code from
7 Dennis Nadeau.
8 
9 $Id: tst_nc4perf.c,v 1.4 2009/08/19 15:58:57 ed Exp $
10 */
11 
12 #include "nc_tests.h"
13 #include "err_macros.h"
14 
15 #define FILENAME "tst_nc4perf.nc"
16 #define NDIMS1 2
17 #define NDIMS2 4
18 #define DIMSIZE1 40
19 #define DIMSIZE2 61
20 #define DIMSIZE3 3
21 /*#define DIMSIZE1 540
22 #define DIMSIZE2 361
23 #define DIMSIZE3 72*/
24 #define TIMELEN 4
25 #define NUMVARS 10
26 #define NUM_TRIES 2
27 #define MEGABYTE 1048576
28 
29 /* This function creates a file with 10 2D variables, no unlimited
30  * dimension. */
test_pio_2d(size_t cache_size,int access_flag,MPI_Comm comm,MPI_Info info,int mpi_size,int mpi_rank,size_t * chunk_size)31 int test_pio_2d(size_t cache_size, int access_flag, MPI_Comm comm,
32 		MPI_Info info, int mpi_size, int mpi_rank,
33 		size_t *chunk_size)
34 {
35    double starttime, endtime, write_time = 0, bandwidth = 0;
36    int ncid;
37    int dimids[NDIMS1];
38    size_t start[NDIMS1], count[NDIMS1];
39    float *data;
40    char file_name[NC_MAX_NAME + 1];
41    char var_name1[NUMVARS][NC_MAX_NAME + 1] = {"GWa", "JAd", "TJe", "JMa", "JMo",
42 					       "JQA", "AJa", "MVB", "WHH", "JTy"};
43    int varid1[NUMVARS];
44    size_t nelems_in;
45    float preemption_in;
46    int j, i, t;
47 
48    /* Create some data. */
49    if (!(data = malloc(sizeof(float) * DIMSIZE2 * DIMSIZE1 / mpi_size)))
50       return -2;
51    for (j = 0; j < DIMSIZE2; j++)
52       for (i = 0; i < DIMSIZE1 / mpi_size; i++)
53 	 data[j * DIMSIZE1 / mpi_size + i] = (float)mpi_rank * (j + 1);
54 
55    /* Get the file name. */
56    sprintf(file_name, "%s/%s", TEMP_LARGE, FILENAME);
57 
58    /* Set the cache size. */
59    if (nc_get_chunk_cache(NULL, &nelems_in, &preemption_in)) ERR;
60    if (nc_set_chunk_cache(cache_size, nelems_in, preemption_in)) ERR;
61 
62    for (t = 0; t < NUM_TRIES; t++)
63    {
64       /* Create a netcdf-4 file, opened for parallel I/O. */
65       if (nc_create_par(file_name, NC_NETCDF4, comm,
66 			info, &ncid)) ERR;
67 
68       /* Create two dimensions. */
69       if (nc_def_dim(ncid, "d1", DIMSIZE2, &dimids[0])) ERR;
70       if (nc_def_dim(ncid, "d2", DIMSIZE1, &dimids[1])) ERR;
71 
72       /* Create our variables. */
73       for (i = 0; i < NUMVARS; i++)
74       {
75 	 if (nc_def_var(ncid, var_name1[i], NC_INT, NDIMS1,
76 			dimids, &varid1[i])) ERR;
77 	 if (chunk_size[0])
78 	    if (nc_def_var_chunking(ncid, varid1[i], 0, chunk_size)) ERR;
79       }
80 
81       if (nc_enddef(ncid)) ERR;
82 
83       /* Set up slab for this process. */
84       start[0] = 0;
85       start[1] = mpi_rank * DIMSIZE1/mpi_size;
86       count[0] = DIMSIZE2;
87       count[1] = DIMSIZE1 / mpi_size;
88 
89       /* start parallel netcdf4 */
90       for (i = 0; i < NUMVARS; i++)
91 	 if (nc_var_par_access(ncid, varid1[i], access_flag)) ERR;
92 
93       starttime = MPI_Wtime();
94 
95       /* Write two dimensional float data */
96       for (i = 0; i < NUMVARS; i++)
97 	 if (nc_put_vara_float(ncid, varid1[i], start, count, data)) ERR;
98 
99       /* Close the netcdf file. */
100       if (nc_close(ncid)) ERR;
101 
102       endtime = MPI_Wtime();
103       if (!mpi_rank)
104       {
105 	 bandwidth += ((sizeof(float) * DIMSIZE1 * DIMSIZE2 * NUMVARS) /
106 		       ((endtime - starttime) * 1024 * 1024)) / NUM_TRIES;
107 	 write_time += (endtime - starttime) / NUM_TRIES;
108       }
109    }
110    free(data);
111    if (!mpi_rank)
112    {
113       char chunk_string[NC_MAX_NAME + 1] = "";
114 
115       /* What was our chunking? */
116       if (chunk_size[0])
117 	 sprintf(chunk_string, "%dx%d    ", (int)chunk_size[0], (int)chunk_size[1]);
118       else
119 	 strcat(chunk_string, "contiguous");
120 
121       /* Print the results. */
122       printf("%d\t\t%s\t%s\t%d\t\t%dx%d\t\t%s\t%f\t\t%f\t\t\t%d\n", mpi_size,
123 	     "MPI-IO   ",
124 	     (access_flag == NC_INDEPENDENT ? "independent" : "collective"),
125 	     (int)cache_size/MEGABYTE, DIMSIZE1, DIMSIZE2, chunk_string,
126 	     write_time, bandwidth, NUM_TRIES);
127    }
128 
129    /* Delete this file. */
130    remove(file_name);
131 
132    return 0;
133 }
134 
135 /* Both read and write will be tested */
136 /* Case 2: create four dimensional integer data,
137    one dimension is unlimited. */
test_pio_4d(size_t cache_size,int access_flag,MPI_Comm comm,MPI_Info info,int mpi_size,int mpi_rank,size_t * chunk_size)138 int test_pio_4d(size_t cache_size, int access_flag, MPI_Comm comm,
139 		MPI_Info info, int mpi_size, int mpi_rank, size_t *chunk_size)
140 {
141    int ncid, dimuids[NDIMS2], varid2[NUMVARS];
142    size_t ustart[NDIMS2], ucount[NDIMS2];
143    float *udata, *tempudata;
144    char file_name[NC_MAX_NAME + 1];
145    char var_name2[NUMVARS][NC_MAX_NAME + 1] = {"JKP", "ZTa", "MFi", "FPi", "JBu",
146 					       "ALi", "AJo", "USG", "RBH", "JAG"};
147    double starttime, endtime, write_time = 0, bandwidth = 0;
148    size_t nelems_in;
149    float preemption_in;
150    int k, j, i, t;
151 
152    udata = malloc(DIMSIZE3 * DIMSIZE2 * DIMSIZE1 / mpi_size * sizeof(int));
153 
154    /* Create phony data. */
155    tempudata = udata;
156    for(k = 0; k < DIMSIZE3; k++)
157       for(j = 0; j < DIMSIZE2; j++)
158 	 for(i = 0; i < DIMSIZE1 / mpi_size; i++)
159 	 {
160 	    *tempudata = (float)(1 + mpi_rank) * 2 * (j + 1) * (k + 1);
161 	    tempudata++;
162 	 }
163 
164    /* Get the file name. */
165    sprintf(file_name, "%s/%s", TEMP_LARGE, FILENAME);
166 
167    /* Set the cache size. */
168    if (nc_get_chunk_cache(NULL, &nelems_in, &preemption_in)) ERR;
169    if (nc_set_chunk_cache(cache_size, nelems_in, preemption_in)) ERR;
170 
171    for (t = 0; t < NUM_TRIES; t++)
172    {
173       /* Create a netcdf-4 file. */
174       if (nc_create_par(file_name, NC_NETCDF4, comm, info,
175 			&ncid)) ERR;
176 
177       /* Create four dimensions. */
178       if (nc_def_dim(ncid, "ud1", TIMELEN, dimuids)) ERR;
179       if (nc_def_dim(ncid, "ud2", DIMSIZE3, &dimuids[1])) ERR;
180       if (nc_def_dim(ncid, "ud3", DIMSIZE2, &dimuids[2])) ERR;
181       if (nc_def_dim(ncid, "ud4", DIMSIZE1, &dimuids[3])) ERR;
182 
183       /* Create 10 variables. */
184       for (i = 0; i < NUMVARS; i++)
185 	 if (nc_def_var(ncid, var_name2[i], NC_INT, NDIMS2,
186 			dimuids, &varid2[i])) ERR;
187 
188       if (nc_enddef(ncid)) ERR;
189 
190       /* Set up selection parameters */
191       ustart[0] = 0;
192       ustart[1] = 0;
193       ustart[2] = 0;
194       ustart[3] = DIMSIZE1 * mpi_rank / mpi_size;
195       ucount[0] = 1;
196       ucount[1] = DIMSIZE3;
197       ucount[2] = DIMSIZE2;
198       ucount[3] = DIMSIZE1 / mpi_size;
199 
200       /* Access parallel */
201       for (i = 0; i < NUMVARS; i++)
202 	 if (nc_var_par_access(ncid, varid2[i], access_flag)) ERR;
203 
204       starttime = MPI_Wtime();
205 
206       /* Write slabs of phony data. */
207       for(ustart[0] = 0; ustart[0] < TIMELEN; ustart[0]++)
208 	 for (i = 0; i < NUMVARS; i++)
209 	    if (nc_put_vara_float(ncid, varid2[i], ustart, ucount, udata)) ERR;
210 
211       /* Close the netcdf file. */
212       if (nc_close(ncid)) ERR;
213 
214       endtime = MPI_Wtime();
215       if (!mpi_rank)
216       {
217 	 write_time += (endtime - starttime) / NUM_TRIES;
218 	 bandwidth += (sizeof(float) * TIMELEN * DIMSIZE1 * DIMSIZE2 * DIMSIZE3 * NUMVARS) /
219 	    ((endtime - starttime) * 1024 * 1024 * NUM_TRIES);
220       }
221    }
222    free(udata);
223    if (!mpi_rank)
224    {
225       char chunk_string[NC_MAX_NAME + 1] = "";
226 
227       /* What was our chunking? */
228       if (chunk_size[0])
229 	 sprintf(chunk_string, "%dx%dx%dx%d", (int)chunk_size[0], (int)chunk_size[1],
230 		 (int)chunk_size[2], (int)chunk_size[3]);
231       else
232 	 strcat(chunk_string, "contiguous");
233 
234       /* Print our results. */
235       printf("%d\t\t%s\t%s\t%d\t\t%dx%dx%dx%d\t%s\t%f\t\t%f\t\t\t%d\n", mpi_size,
236 	     "MPI-IO   ",
237 	     (access_flag == NC_INDEPENDENT ? "independent" : "collective"),
238 	     (int)cache_size / MEGABYTE, TIMELEN, DIMSIZE3, DIMSIZE2, DIMSIZE1, chunk_string, write_time,
239 	     bandwidth, NUM_TRIES);
240    }
241 
242    /* Delete this file. */
243    remove(file_name);
244 
245    return 0;
246 }
247 
248 #define NUM_MODES 2
249 #define NUM_FACC 2
250 #define NUM_CHUNK_COMBOS_2D 3
251 #define NUM_CHUNK_COMBOS_4D 4
252 #define NUM_CACHE_SIZES 3
253 
main(int argc,char ** argv)254 int main(int argc, char **argv)
255 {
256    MPI_Comm comm = MPI_COMM_WORLD;
257    MPI_Info info = MPI_INFO_NULL;
258    int mpi_size, mpi_rank;
259    int facc_type[NUM_FACC] = {NC_INDEPENDENT, NC_COLLECTIVE};
260    size_t chunk_size_2d[NUM_CHUNK_COMBOS_2D][NDIMS1] = {{0, 0},
261 							{DIMSIZE2, DIMSIZE1},
262 							{DIMSIZE2/2 + 1, DIMSIZE1 / 2}};
263    size_t chunk_size_4d[NUM_CHUNK_COMBOS_4D][NDIMS2] = {{0, 0, 0, 0},
264 							{1, DIMSIZE3, DIMSIZE2, DIMSIZE1},
265 							{TIMELEN / 2, DIMSIZE3 / 2 + 1, DIMSIZE2 / 2 + 1, DIMSIZE1 / 2},
266 							{TIMELEN, DIMSIZE3, DIMSIZE2, DIMSIZE1}};
267    size_t cache_size[NUM_CACHE_SIZES] = {MEGABYTE, 32 * MEGABYTE, 64 * MEGABYTE};
268    int f, c, i;
269 
270    /* Initialize MPI. */
271    MPI_Init(&argc, &argv);
272    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
273    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
274 
275    /* Check for invalid number of processors. */
276    if ((float)DIMSIZE1 / mpi_size != (int)(DIMSIZE1 / mpi_size))
277    {
278       printf("%d divided by number of processors must be a whole number!\n",
279 	     DIMSIZE1);
280       return -1;
281    }
282 
283    if (!mpi_rank)
284    {
285       printf("*** Testing parallel IO for NASA...\n");
286       printf("num_proc\tMPI mode\taccess\t\tcache (MB)\tgrid size\tchunks\tavg. write time(s)\t"
287 	     "avg. write bandwidth(MB/s)\tnum_tries\n");
288    }
289 
290    for (i = 0; i < NUM_CACHE_SIZES; i++)
291 	 for (f = 0; f < NUM_FACC; f++)
292 	    for (c = 0; c < NUM_CHUNK_COMBOS_2D; c++)
293 	       if (test_pio_2d(cache_size[i], facc_type[f], comm,
294 			       info, mpi_size, mpi_rank, chunk_size_2d[c])) ERR;
295 
296    for (i = 0; i < NUM_CACHE_SIZES; i++)
297 	 for (f = 0; f < NUM_FACC; f++)
298 	    for (c = 0; c < NUM_CHUNK_COMBOS_4D; c++)
299 	       if (test_pio_4d(cache_size[i], facc_type[f], comm,
300 			       info, mpi_size, mpi_rank, chunk_size_4d[c])) ERR;
301 
302    if (!mpi_rank)
303       SUMMARIZE_ERR;
304    MPI_Finalize();
305 
306    if (!mpi_rank)
307       FINAL_RESULTS;
308 
309    return 0;
310 }
311