1 /*
2 Copyright 2009, UCAR/Unidata
3 See COPYRIGHT file for copying and redistribution conditions.
4
5 This program tests netcdf-4 parallel I/O. These tests are based on the
6 needs of the NASA GMAO model, and are based on some test code from
7 Dennis Nadeau.
8
9 $Id: tst_nc4perf.c,v 1.4 2009/08/19 15:58:57 ed Exp $
10 */
11
12 #include "nc_tests.h"
13 #include "err_macros.h"
14
15 #define FILENAME "tst_nc4perf.nc"
16 #define NDIMS1 2
17 #define NDIMS2 4
18 #define DIMSIZE1 40
19 #define DIMSIZE2 61
20 #define DIMSIZE3 3
21 /*#define DIMSIZE1 540
22 #define DIMSIZE2 361
23 #define DIMSIZE3 72*/
24 #define TIMELEN 4
25 #define NUMVARS 10
26 #define NUM_TRIES 2
27 #define MEGABYTE 1048576
28
29 /* This function creates a file with 10 2D variables, no unlimited
30 * dimension. */
test_pio_2d(size_t cache_size,int access_flag,MPI_Comm comm,MPI_Info info,int mpi_size,int mpi_rank,size_t * chunk_size)31 int test_pio_2d(size_t cache_size, int access_flag, MPI_Comm comm,
32 MPI_Info info, int mpi_size, int mpi_rank,
33 size_t *chunk_size)
34 {
35 double starttime, endtime, write_time = 0, bandwidth = 0;
36 int ncid;
37 int dimids[NDIMS1];
38 size_t start[NDIMS1], count[NDIMS1];
39 float *data;
40 char file_name[NC_MAX_NAME + 1];
41 char var_name1[NUMVARS][NC_MAX_NAME + 1] = {"GWa", "JAd", "TJe", "JMa", "JMo",
42 "JQA", "AJa", "MVB", "WHH", "JTy"};
43 int varid1[NUMVARS];
44 size_t nelems_in;
45 float preemption_in;
46 int j, i, t;
47
48 /* Create some data. */
49 if (!(data = malloc(sizeof(float) * DIMSIZE2 * DIMSIZE1 / mpi_size)))
50 return -2;
51 for (j = 0; j < DIMSIZE2; j++)
52 for (i = 0; i < DIMSIZE1 / mpi_size; i++)
53 data[j * DIMSIZE1 / mpi_size + i] = (float)mpi_rank * (j + 1);
54
55 /* Get the file name. */
56 sprintf(file_name, "%s/%s", TEMP_LARGE, FILENAME);
57
58 /* Set the cache size. */
59 if (nc_get_chunk_cache(NULL, &nelems_in, &preemption_in)) ERR;
60 if (nc_set_chunk_cache(cache_size, nelems_in, preemption_in)) ERR;
61
62 for (t = 0; t < NUM_TRIES; t++)
63 {
64 /* Create a netcdf-4 file, opened for parallel I/O. */
65 if (nc_create_par(file_name, NC_NETCDF4, comm,
66 info, &ncid)) ERR;
67
68 /* Create two dimensions. */
69 if (nc_def_dim(ncid, "d1", DIMSIZE2, &dimids[0])) ERR;
70 if (nc_def_dim(ncid, "d2", DIMSIZE1, &dimids[1])) ERR;
71
72 /* Create our variables. */
73 for (i = 0; i < NUMVARS; i++)
74 {
75 if (nc_def_var(ncid, var_name1[i], NC_INT, NDIMS1,
76 dimids, &varid1[i])) ERR;
77 if (chunk_size[0])
78 if (nc_def_var_chunking(ncid, varid1[i], 0, chunk_size)) ERR;
79 }
80
81 if (nc_enddef(ncid)) ERR;
82
83 /* Set up slab for this process. */
84 start[0] = 0;
85 start[1] = mpi_rank * DIMSIZE1/mpi_size;
86 count[0] = DIMSIZE2;
87 count[1] = DIMSIZE1 / mpi_size;
88
89 /* start parallel netcdf4 */
90 for (i = 0; i < NUMVARS; i++)
91 if (nc_var_par_access(ncid, varid1[i], access_flag)) ERR;
92
93 starttime = MPI_Wtime();
94
95 /* Write two dimensional float data */
96 for (i = 0; i < NUMVARS; i++)
97 if (nc_put_vara_float(ncid, varid1[i], start, count, data)) ERR;
98
99 /* Close the netcdf file. */
100 if (nc_close(ncid)) ERR;
101
102 endtime = MPI_Wtime();
103 if (!mpi_rank)
104 {
105 bandwidth += ((sizeof(float) * DIMSIZE1 * DIMSIZE2 * NUMVARS) /
106 ((endtime - starttime) * 1024 * 1024)) / NUM_TRIES;
107 write_time += (endtime - starttime) / NUM_TRIES;
108 }
109 }
110 free(data);
111 if (!mpi_rank)
112 {
113 char chunk_string[NC_MAX_NAME + 1] = "";
114
115 /* What was our chunking? */
116 if (chunk_size[0])
117 sprintf(chunk_string, "%dx%d ", (int)chunk_size[0], (int)chunk_size[1]);
118 else
119 strcat(chunk_string, "contiguous");
120
121 /* Print the results. */
122 printf("%d\t\t%s\t%s\t%d\t\t%dx%d\t\t%s\t%f\t\t%f\t\t\t%d\n", mpi_size,
123 "MPI-IO ",
124 (access_flag == NC_INDEPENDENT ? "independent" : "collective"),
125 (int)cache_size/MEGABYTE, DIMSIZE1, DIMSIZE2, chunk_string,
126 write_time, bandwidth, NUM_TRIES);
127 }
128
129 /* Delete this file. */
130 remove(file_name);
131
132 return 0;
133 }
134
135 /* Both read and write will be tested */
136 /* Case 2: create four dimensional integer data,
137 one dimension is unlimited. */
test_pio_4d(size_t cache_size,int access_flag,MPI_Comm comm,MPI_Info info,int mpi_size,int mpi_rank,size_t * chunk_size)138 int test_pio_4d(size_t cache_size, int access_flag, MPI_Comm comm,
139 MPI_Info info, int mpi_size, int mpi_rank, size_t *chunk_size)
140 {
141 int ncid, dimuids[NDIMS2], varid2[NUMVARS];
142 size_t ustart[NDIMS2], ucount[NDIMS2];
143 float *udata, *tempudata;
144 char file_name[NC_MAX_NAME + 1];
145 char var_name2[NUMVARS][NC_MAX_NAME + 1] = {"JKP", "ZTa", "MFi", "FPi", "JBu",
146 "ALi", "AJo", "USG", "RBH", "JAG"};
147 double starttime, endtime, write_time = 0, bandwidth = 0;
148 size_t nelems_in;
149 float preemption_in;
150 int k, j, i, t;
151
152 udata = malloc(DIMSIZE3 * DIMSIZE2 * DIMSIZE1 / mpi_size * sizeof(int));
153
154 /* Create phony data. */
155 tempudata = udata;
156 for(k = 0; k < DIMSIZE3; k++)
157 for(j = 0; j < DIMSIZE2; j++)
158 for(i = 0; i < DIMSIZE1 / mpi_size; i++)
159 {
160 *tempudata = (float)(1 + mpi_rank) * 2 * (j + 1) * (k + 1);
161 tempudata++;
162 }
163
164 /* Get the file name. */
165 sprintf(file_name, "%s/%s", TEMP_LARGE, FILENAME);
166
167 /* Set the cache size. */
168 if (nc_get_chunk_cache(NULL, &nelems_in, &preemption_in)) ERR;
169 if (nc_set_chunk_cache(cache_size, nelems_in, preemption_in)) ERR;
170
171 for (t = 0; t < NUM_TRIES; t++)
172 {
173 /* Create a netcdf-4 file. */
174 if (nc_create_par(file_name, NC_NETCDF4, comm, info,
175 &ncid)) ERR;
176
177 /* Create four dimensions. */
178 if (nc_def_dim(ncid, "ud1", TIMELEN, dimuids)) ERR;
179 if (nc_def_dim(ncid, "ud2", DIMSIZE3, &dimuids[1])) ERR;
180 if (nc_def_dim(ncid, "ud3", DIMSIZE2, &dimuids[2])) ERR;
181 if (nc_def_dim(ncid, "ud4", DIMSIZE1, &dimuids[3])) ERR;
182
183 /* Create 10 variables. */
184 for (i = 0; i < NUMVARS; i++)
185 if (nc_def_var(ncid, var_name2[i], NC_INT, NDIMS2,
186 dimuids, &varid2[i])) ERR;
187
188 if (nc_enddef(ncid)) ERR;
189
190 /* Set up selection parameters */
191 ustart[0] = 0;
192 ustart[1] = 0;
193 ustart[2] = 0;
194 ustart[3] = DIMSIZE1 * mpi_rank / mpi_size;
195 ucount[0] = 1;
196 ucount[1] = DIMSIZE3;
197 ucount[2] = DIMSIZE2;
198 ucount[3] = DIMSIZE1 / mpi_size;
199
200 /* Access parallel */
201 for (i = 0; i < NUMVARS; i++)
202 if (nc_var_par_access(ncid, varid2[i], access_flag)) ERR;
203
204 starttime = MPI_Wtime();
205
206 /* Write slabs of phony data. */
207 for(ustart[0] = 0; ustart[0] < TIMELEN; ustart[0]++)
208 for (i = 0; i < NUMVARS; i++)
209 if (nc_put_vara_float(ncid, varid2[i], ustart, ucount, udata)) ERR;
210
211 /* Close the netcdf file. */
212 if (nc_close(ncid)) ERR;
213
214 endtime = MPI_Wtime();
215 if (!mpi_rank)
216 {
217 write_time += (endtime - starttime) / NUM_TRIES;
218 bandwidth += (sizeof(float) * TIMELEN * DIMSIZE1 * DIMSIZE2 * DIMSIZE3 * NUMVARS) /
219 ((endtime - starttime) * 1024 * 1024 * NUM_TRIES);
220 }
221 }
222 free(udata);
223 if (!mpi_rank)
224 {
225 char chunk_string[NC_MAX_NAME + 1] = "";
226
227 /* What was our chunking? */
228 if (chunk_size[0])
229 sprintf(chunk_string, "%dx%dx%dx%d", (int)chunk_size[0], (int)chunk_size[1],
230 (int)chunk_size[2], (int)chunk_size[3]);
231 else
232 strcat(chunk_string, "contiguous");
233
234 /* Print our results. */
235 printf("%d\t\t%s\t%s\t%d\t\t%dx%dx%dx%d\t%s\t%f\t\t%f\t\t\t%d\n", mpi_size,
236 "MPI-IO ",
237 (access_flag == NC_INDEPENDENT ? "independent" : "collective"),
238 (int)cache_size / MEGABYTE, TIMELEN, DIMSIZE3, DIMSIZE2, DIMSIZE1, chunk_string, write_time,
239 bandwidth, NUM_TRIES);
240 }
241
242 /* Delete this file. */
243 remove(file_name);
244
245 return 0;
246 }
247
248 #define NUM_MODES 2
249 #define NUM_FACC 2
250 #define NUM_CHUNK_COMBOS_2D 3
251 #define NUM_CHUNK_COMBOS_4D 4
252 #define NUM_CACHE_SIZES 3
253
main(int argc,char ** argv)254 int main(int argc, char **argv)
255 {
256 MPI_Comm comm = MPI_COMM_WORLD;
257 MPI_Info info = MPI_INFO_NULL;
258 int mpi_size, mpi_rank;
259 int facc_type[NUM_FACC] = {NC_INDEPENDENT, NC_COLLECTIVE};
260 size_t chunk_size_2d[NUM_CHUNK_COMBOS_2D][NDIMS1] = {{0, 0},
261 {DIMSIZE2, DIMSIZE1},
262 {DIMSIZE2/2 + 1, DIMSIZE1 / 2}};
263 size_t chunk_size_4d[NUM_CHUNK_COMBOS_4D][NDIMS2] = {{0, 0, 0, 0},
264 {1, DIMSIZE3, DIMSIZE2, DIMSIZE1},
265 {TIMELEN / 2, DIMSIZE3 / 2 + 1, DIMSIZE2 / 2 + 1, DIMSIZE1 / 2},
266 {TIMELEN, DIMSIZE3, DIMSIZE2, DIMSIZE1}};
267 size_t cache_size[NUM_CACHE_SIZES] = {MEGABYTE, 32 * MEGABYTE, 64 * MEGABYTE};
268 int f, c, i;
269
270 /* Initialize MPI. */
271 MPI_Init(&argc, &argv);
272 MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
273 MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
274
275 /* Check for invalid number of processors. */
276 if ((float)DIMSIZE1 / mpi_size != (int)(DIMSIZE1 / mpi_size))
277 {
278 printf("%d divided by number of processors must be a whole number!\n",
279 DIMSIZE1);
280 return -1;
281 }
282
283 if (!mpi_rank)
284 {
285 printf("*** Testing parallel IO for NASA...\n");
286 printf("num_proc\tMPI mode\taccess\t\tcache (MB)\tgrid size\tchunks\tavg. write time(s)\t"
287 "avg. write bandwidth(MB/s)\tnum_tries\n");
288 }
289
290 for (i = 0; i < NUM_CACHE_SIZES; i++)
291 for (f = 0; f < NUM_FACC; f++)
292 for (c = 0; c < NUM_CHUNK_COMBOS_2D; c++)
293 if (test_pio_2d(cache_size[i], facc_type[f], comm,
294 info, mpi_size, mpi_rank, chunk_size_2d[c])) ERR;
295
296 for (i = 0; i < NUM_CACHE_SIZES; i++)
297 for (f = 0; f < NUM_FACC; f++)
298 for (c = 0; c < NUM_CHUNK_COMBOS_4D; c++)
299 if (test_pio_4d(cache_size[i], facc_type[f], comm,
300 info, mpi_size, mpi_rank, chunk_size_4d[c])) ERR;
301
302 if (!mpi_rank)
303 SUMMARIZE_ERR;
304 MPI_Finalize();
305
306 if (!mpi_rank)
307 FINAL_RESULTS;
308
309 return 0;
310 }
311