1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3 * Copyright (C) 1997 University of Chicago.
4 * See COPYRIGHT notice in top-level directory.
5 *
6 * Copyright (C) 2007 Oak Ridge National Laboratory
7 *
8 * Copyright (C) 2008 Sun Microsystems, Lustre group
9 */
10
11 #include "ad_lustre.h"
12 #include "adio_extern.h"
13
ADIOI_LUSTRE_SetInfo(ADIO_File fd,MPI_Info users_info,int * error_code)14 void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
15 {
16 char *value;
17 int flag, stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
18 struct lov_user_md lum = { 0 };
19 int err, myrank, fd_sys, perm, amode, old_mask;
20 int int_val, tmp_val;
21 static char myname[] = "ADIOI_LUSTRE_SETINFO";
22
23 value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
24 if ( (fd->info) == MPI_INFO_NULL) {
25 /* This must be part of the open call. can set striping parameters
26 if necessary. */
27 MPI_Info_create(&(fd->info));
28
29 ADIOI_Info_set(fd->info, "direct_read", "false");
30 ADIOI_Info_set(fd->info, "direct_write", "false");
31 fd->direct_read = fd->direct_write = 0;
32 /* initialize lustre hints */
33 ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", "1");
34 fd->hints->fs_hints.lustre.co_ratio = 1;
35 ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", "0");
36 fd->hints->fs_hints.lustre.coll_threshold = 0;
37 ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "enable");
38 fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_ENABLE;
39
40 /* has user specified striping or server buffering parameters
41 and do they have the same value on all processes? */
42 if (users_info != MPI_INFO_NULL) {
43 /* striping information */
44 ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
45 value, &flag);
46 if (flag)
47 str_unit=atoi(value);
48
49 ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
50 value, &flag);
51 if (flag)
52 str_factor=atoi(value);
53
54 ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
55 MPI_MAX_INFO_VAL, value, &flag);
56 if (flag)
57 start_iodev=atoi(value);
58
59 /* direct read and write */
60 ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
61 value, &flag);
62 if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
63 ADIOI_Info_set(fd->info, "direct_read", "true");
64 fd->direct_read = 1;
65 }
66 ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
67 value, &flag);
68 if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
69 ADIOI_Info_set(fd->info, "direct_write", "true");
70 fd->direct_write = 1;
71 }
72 }
73
74 /* set striping information with ioctl */
75 MPI_Comm_rank(fd->comm, &myrank);
76 if (myrank == 0) {
77 stripe_val[0] = str_factor;
78 stripe_val[1] = str_unit;
79 stripe_val[2] = start_iodev;
80 }
81 MPI_Bcast(stripe_val, 3, MPI_INT, 0, fd->comm);
82
83 if (stripe_val[0] != str_factor
84 || stripe_val[1] != str_unit
85 || stripe_val[2] != start_iodev) {
86 FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
87 "-striping_factor:striping_unit:start_iodevice "
88 "need to be identical across all processes\n");
89 MPI_Abort(MPI_COMM_WORLD, 1);
90 } else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
91 /* if user has specified striping info, process 0 tries to set it */
92 if (!myrank) {
93 if (fd->perm == ADIO_PERM_NULL) {
94 old_mask = umask(022);
95 umask(old_mask);
96 perm = old_mask ^ 0666;
97 }
98 else perm = fd->perm;
99
100 amode = 0;
101 if (fd->access_mode & ADIO_CREATE)
102 amode = amode | O_CREAT;
103 if (fd->access_mode & ADIO_RDONLY)
104 amode = amode | O_RDONLY;
105 if (fd->access_mode & ADIO_WRONLY)
106 amode = amode | O_WRONLY;
107 if (fd->access_mode & ADIO_RDWR)
108 amode = amode | O_RDWR;
109 if (fd->access_mode & ADIO_EXCL)
110 amode = amode | O_EXCL;
111
112 /* we need to create file so ensure this is set */
113 amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
114
115 fd_sys = open(fd->filename, amode, perm);
116 if (fd_sys == -1) {
117 if (errno != EEXIST)
118 fprintf(stderr,
119 "Failure to open file %s %d %d\n",strerror(errno), amode, perm);
120 } else {
121 lum.lmm_magic = LOV_USER_MAGIC;
122 lum.lmm_pattern = 0;
123 lum.lmm_stripe_size = str_unit;
124 lum.lmm_stripe_count = str_factor;
125 lum.lmm_stripe_offset = start_iodev;
126
127 err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
128 if (err == -1 && errno != EEXIST) {
129 fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
130 }
131 close(fd_sys);
132 }
133 } /* End of striping parameters validation */
134 }
135 MPI_Barrier(fd->comm);
136 }
137 /* get other hint */
138 if (users_info != MPI_INFO_NULL) {
139 /* CO: IO Clients/OST,
140 * to keep the load balancing between clients and OSTs */
141 ADIOI_Info_get(users_info, "romio_lustre_co_ratio", MPI_MAX_INFO_VAL, value,
142 &flag);
143 if (flag && (int_val = atoi(value)) > 0) {
144 tmp_val = int_val;
145 MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
146 if (tmp_val != int_val) {
147 MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
148 "romio_lustre_co_ratio",
149 error_code);
150 ADIOI_Free(value);
151 return;
152 }
153 ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", value);
154 fd->hints->fs_hints.lustre.co_ratio = atoi(value);
155 }
156 /* coll_threshold:
157 * if the req size is bigger than this, collective IO may not be performed.
158 */
159 ADIOI_Info_get(users_info, "romio_lustre_coll_threshold", MPI_MAX_INFO_VAL, value,
160 &flag);
161 if (flag && (int_val = atoi(value)) > 0) {
162 tmp_val = int_val;
163 MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
164 if (tmp_val != int_val) {
165 MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
166 "romio_lustre_coll_threshold",
167 error_code);
168 ADIOI_Free(value);
169 return;
170 }
171 ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", value);
172 fd->hints->fs_hints.lustre.coll_threshold = atoi(value);
173 }
174 /* ds_in_coll: disable data sieving in collective IO */
175 ADIOI_Info_get(users_info, "romio_lustre_ds_in_coll", MPI_MAX_INFO_VAL,
176 value, &flag);
177 if (flag && (!strcmp(value, "disable") ||
178 !strcmp(value, "DISABLE"))) {
179 tmp_val = int_val = 2;
180 MPI_Bcast(&tmp_val, 2, MPI_INT, 0, fd->comm);
181 if (tmp_val != int_val) {
182 MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
183 "romio_lustre_ds_in_coll",
184 error_code);
185 ADIOI_Free(value);
186 return;
187 }
188 ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "disable");
189 fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_DISABLE;
190 }
191 }
192 /* set the values for collective I/O and data sieving parameters */
193 ADIOI_GEN_SetInfo(fd, users_info, error_code);
194
195 if (ADIOI_Direct_read) fd->direct_read = 1;
196 if (ADIOI_Direct_write) fd->direct_write = 1;
197
198 ADIOI_Free(value);
199
200 *error_code = MPI_SUCCESS;
201 }
202