1 /* -*- Mode: C; c-basic-offset:4 ; -*- */
2 /*
3  *   Copyright (C) 1997 University of Chicago.
4  *   See COPYRIGHT notice in top-level directory.
5  *
6  *   Copyright (C) 2007 Oak Ridge National Laboratory
7  *
8  *   Copyright (C) 2008 Sun Microsystems, Lustre group
9  */
10 
11 #include "ad_lustre.h"
12 #include "adio_extern.h"
13 
ADIOI_LUSTRE_SetInfo(ADIO_File fd,MPI_Info users_info,int * error_code)14 void ADIOI_LUSTRE_SetInfo(ADIO_File fd, MPI_Info users_info, int *error_code)
15 {
16     char *value;
17     int flag, stripe_val[3], str_factor = -1, str_unit=0, start_iodev=-1;
18     struct lov_user_md lum = { 0 };
19     int err, myrank, fd_sys, perm, amode, old_mask;
20     int int_val, tmp_val;
21     static char myname[] = "ADIOI_LUSTRE_SETINFO";
22 
23     value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char));
24     if ( (fd->info) == MPI_INFO_NULL) {
25 	/* This must be part of the open call. can set striping parameters
26            if necessary. */
27 	MPI_Info_create(&(fd->info));
28 
29 	ADIOI_Info_set(fd->info, "direct_read", "false");
30 	ADIOI_Info_set(fd->info, "direct_write", "false");
31 	fd->direct_read = fd->direct_write = 0;
32         /* initialize lustre hints */
33 	ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", "1");
34         fd->hints->fs_hints.lustre.co_ratio = 1;
35 	ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", "0");
36         fd->hints->fs_hints.lustre.coll_threshold = 0;
37 	ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "enable");
38         fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_ENABLE;
39 
40 	/* has user specified striping or server buffering parameters
41            and do they have the same value on all processes? */
42 	if (users_info != MPI_INFO_NULL) {
43             /* striping information */
44 	    ADIOI_Info_get(users_info, "striping_unit", MPI_MAX_INFO_VAL,
45 			 value, &flag);
46 	    if (flag)
47 		str_unit=atoi(value);
48 
49 	    ADIOI_Info_get(users_info, "striping_factor", MPI_MAX_INFO_VAL,
50 			 value, &flag);
51 	    if (flag)
52 		str_factor=atoi(value);
53 
54 	    ADIOI_Info_get(users_info, "romio_lustre_start_iodevice",
55                          MPI_MAX_INFO_VAL, value, &flag);
56 	    if (flag)
57 		start_iodev=atoi(value);
58 
59             /* direct read and write */
60 	    ADIOI_Info_get(users_info, "direct_read", MPI_MAX_INFO_VAL,
61 			 value, &flag);
62 	    if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
63 		ADIOI_Info_set(fd->info, "direct_read", "true");
64 		fd->direct_read = 1;
65 	    }
66 	    ADIOI_Info_get(users_info, "direct_write", MPI_MAX_INFO_VAL,
67 			     value, &flag);
68 	    if (flag && (!strcmp(value, "true") || !strcmp(value, "TRUE"))) {
69 		ADIOI_Info_set(fd->info, "direct_write", "true");
70 		fd->direct_write = 1;
71 	    }
72 	}
73 
74         /* set striping information with ioctl */
75 	MPI_Comm_rank(fd->comm, &myrank);
76 	if (myrank == 0) {
77 	    stripe_val[0] = str_factor;
78 	    stripe_val[1] = str_unit;
79 	    stripe_val[2] = start_iodev;
80 	}
81 	MPI_Bcast(stripe_val, 3, MPI_INT, 0, fd->comm);
82 
83 	if (stripe_val[0] != str_factor
84 		|| stripe_val[1] != str_unit
85 		|| stripe_val[2] != start_iodev) {
86 	    FPRINTF(stderr, "ADIOI_LUSTRE_SetInfo: All keys"
87 		    "-striping_factor:striping_unit:start_iodevice "
88 		    "need to be identical across all processes\n");
89 	    MPI_Abort(MPI_COMM_WORLD, 1);
90 	} else if ((str_factor > 0) || (str_unit > 0) || (start_iodev >= 0)) {
91 	     /* if user has specified striping info, process 0 tries to set it */
92 	    if (!myrank) {
93 		if (fd->perm == ADIO_PERM_NULL) {
94 		    old_mask = umask(022);
95 		    umask(old_mask);
96 		    perm = old_mask ^ 0666;
97 		}
98 		else perm = fd->perm;
99 
100 		amode = 0;
101 		if (fd->access_mode & ADIO_CREATE)
102 		    amode = amode | O_CREAT;
103 		if (fd->access_mode & ADIO_RDONLY)
104 		    amode = amode | O_RDONLY;
105 		if (fd->access_mode & ADIO_WRONLY)
106 		    amode = amode | O_WRONLY;
107 		if (fd->access_mode & ADIO_RDWR)
108 		    amode = amode | O_RDWR;
109 		if (fd->access_mode & ADIO_EXCL)
110 		    amode = amode | O_EXCL;
111 
112 		/* we need to create file so ensure this is set */
113 		amode = amode | O_LOV_DELAY_CREATE | O_CREAT;
114 
115 		fd_sys = open(fd->filename, amode, perm);
116 		if (fd_sys == -1) {
117 		    if (errno != EEXIST)
118 			fprintf(stderr,
119 				"Failure to open file %s %d %d\n",strerror(errno), amode, perm);
120 		} else {
121 		    lum.lmm_magic = LOV_USER_MAGIC;
122 		    lum.lmm_pattern = 0;
123 		    lum.lmm_stripe_size = str_unit;
124 		    lum.lmm_stripe_count = str_factor;
125 		    lum.lmm_stripe_offset = start_iodev;
126 
127 		    err = ioctl(fd_sys, LL_IOC_LOV_SETSTRIPE, &lum);
128 		    if (err == -1 && errno != EEXIST) {
129 			fprintf(stderr, "Failure to set stripe info %s \n", strerror(errno));
130 		    }
131 		    close(fd_sys);
132 	       }
133 	    } /* End of striping parameters validation */
134 	}
135 	MPI_Barrier(fd->comm);
136     }
137     /* get other hint */
138     if (users_info != MPI_INFO_NULL) {
139         /* CO: IO Clients/OST,
140          * to keep the load balancing between clients and OSTs */
141         ADIOI_Info_get(users_info, "romio_lustre_co_ratio", MPI_MAX_INFO_VAL, value,
142                      &flag);
143 	if (flag && (int_val = atoi(value)) > 0) {
144             tmp_val = int_val;
145 	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
146 	    if (tmp_val != int_val) {
147                 MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
148                                                    "romio_lustre_co_ratio",
149                                                    error_code);
150                 ADIOI_Free(value);
151 		return;
152 	    }
153 	    ADIOI_Info_set(fd->info, "romio_lustre_co_ratio", value);
154             fd->hints->fs_hints.lustre.co_ratio = atoi(value);
155 	}
156         /* coll_threshold:
157          * if the req size is bigger than this, collective IO may not be performed.
158          */
159 	ADIOI_Info_get(users_info, "romio_lustre_coll_threshold", MPI_MAX_INFO_VAL, value,
160                      &flag);
161 	if (flag && (int_val = atoi(value)) > 0) {
162             tmp_val = int_val;
163 	    MPI_Bcast(&tmp_val, 1, MPI_INT, 0, fd->comm);
164 	    if (tmp_val != int_val) {
165 	        MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
166 		                                   "romio_lustre_coll_threshold",
167 	                                           error_code);
168                 ADIOI_Free(value);
169 	        return;
170 	    }
171 	    ADIOI_Info_set(fd->info, "romio_lustre_coll_threshold", value);
172             fd->hints->fs_hints.lustre.coll_threshold = atoi(value);
173         }
174         /* ds_in_coll: disable data sieving in collective IO */
175 	ADIOI_Info_get(users_info, "romio_lustre_ds_in_coll", MPI_MAX_INFO_VAL,
176 	             value, &flag);
177 	if (flag && (!strcmp(value, "disable") ||
178                      !strcmp(value, "DISABLE"))) {
179             tmp_val = int_val = 2;
180 	    MPI_Bcast(&tmp_val, 2, MPI_INT, 0, fd->comm);
181 	    if (tmp_val != int_val) {
182 	        MPIO_ERR_CREATE_CODE_INFO_NOT_SAME(myname,
183 		                                   "romio_lustre_ds_in_coll",
184 						   error_code);
185                 ADIOI_Free(value);
186                 return;
187 	    }
188 	    ADIOI_Info_set(fd->info, "romio_lustre_ds_in_coll", "disable");
189             fd->hints->fs_hints.lustre.ds_in_coll = ADIOI_HINT_DISABLE;
190 	}
191     }
192     /* set the values for collective I/O and data sieving parameters */
193     ADIOI_GEN_SetInfo(fd, users_info, error_code);
194 
195     if (ADIOI_Direct_read) fd->direct_read = 1;
196     if (ADIOI_Direct_write) fd->direct_write = 1;
197 
198     ADIOI_Free(value);
199 
200     *error_code = MPI_SUCCESS;
201 }
202