1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1997, 2013 Oracle and/or its affiliates.  All rights reserved.
5  *
6  * $Id$
7  */
8 
9 #include "db_config.h"
10 
11 #include "db_int.h"
12 
13 /*
14  * __os_open --
15  *	Open a file descriptor (including page size and log size information).
16  */
17 int
__os_open(env,name,page_size,flags,mode,fhpp)18 __os_open(env, name, page_size, flags, mode, fhpp)
19 	ENV *env;
20 	const char *name;
21 	u_int32_t page_size, flags;
22 	int mode;
23 	DB_FH **fhpp;
24 {
25 	DB_ENV *dbenv;
26 	DB_FH *fhp;
27 #ifndef DB_WINCE
28 	DWORD cluster_size, sector_size, free_clusters, total_clusters;
29 	_TCHAR *drive, dbuf[4]; /* <letter><colon><slash><nul> */
30 
31 #endif
32 	int access, attr, createflag, nrepeat, ret, share;
33 	_TCHAR *tname;
34 
35 	dbenv = env == NULL ? NULL : env->dbenv;
36 	*fhpp = NULL;
37 	tname = NULL;
38 
39 	if (dbenv != NULL &&
40 	    FLD_ISSET(dbenv->verbose, DB_VERB_FILEOPS | DB_VERB_FILEOPS_ALL))
41 		__db_msg(env, DB_STR_A("0025", "fileops: open %s",
42 		    "%s"), name);
43 
44 #undef	OKFLAGS
45 #define	OKFLAGS								\
46 	(DB_OSO_ABSMODE | DB_OSO_CREATE | DB_OSO_DIRECT | DB_OSO_DSYNC |\
47 	DB_OSO_EXCL | DB_OSO_RDONLY | DB_OSO_REGION |	DB_OSO_SEQ |	\
48 	DB_OSO_TEMP | DB_OSO_TRUNC)
49 	if ((ret = __db_fchk(env, "__os_open", flags, OKFLAGS)) != 0)
50 		return (ret);
51 
52 	TO_TSTRING(env, name, tname, ret);
53 	if (ret != 0)
54 		goto err;
55 
56 	/*
57 	 * Allocate the file handle and copy the file name.  We generally only
58 	 * use the name for verbose or error messages, but on systems where we
59 	 * can't unlink temporary files immediately, we use the name to unlink
60 	 * the temporary file when the file handle is closed.
61 	 *
62 	 * Lock the ENV handle and insert the new file handle on the list.
63 	 */
64 	if ((ret = __os_calloc(env, 1, sizeof(DB_FH), &fhp)) != 0)
65 		return (ret);
66 	if ((ret = __os_strdup(env, name, &fhp->name)) != 0)
67 		goto err;
68 	if (env != NULL) {
69 		MUTEX_LOCK(env, env->mtx_env);
70 		TAILQ_INSERT_TAIL(&env->fdlist, fhp, q);
71 		MUTEX_UNLOCK(env, env->mtx_env);
72 		F_SET(fhp, DB_FH_ENVLINK);
73 	}
74 
75 	/*
76 	 * Otherwise, use the Windows/32 CreateFile interface so that we can
77 	 * play magic games with files to get data flush effects similar to
78 	 * the POSIX O_DSYNC flag.
79 	 *
80 	 * !!!
81 	 * We currently ignore the 'mode' argument.  It would be possible
82 	 * to construct a set of security attributes that we could pass to
83 	 * CreateFile that would accurately represents the mode.  In worst
84 	 * case, this would require looking up user and all group names and
85 	 * creating an entry for each.  Alternatively, we could call the
86 	 * _chmod (partial emulation) function after file creation, although
87 	 * this leaves us with an obvious race.  However, these efforts are
88 	 * largely meaningless on FAT, the most common file system, which
89 	 * only has a "readable" and "writable" flag, applying to all users.
90 	 */
91 	access = GENERIC_READ;
92 	if (!LF_ISSET(DB_OSO_RDONLY))
93 		access |= GENERIC_WRITE;
94 
95 #ifdef DB_WINCE
96 	/*
97 	 * WinCE translates these flags into share flags for
98 	 * CreateFileForMapping.
99 	 * Also WinCE does not support the FILE_SHARE_DELETE flag.
100 	 */
101 	if (LF_ISSET(DB_OSO_REGION))
102 		share = GENERIC_READ | GENERIC_WRITE;
103 	else
104 		share = FILE_SHARE_READ | FILE_SHARE_WRITE;
105 #else
106 	share = FILE_SHARE_READ | FILE_SHARE_WRITE;
107 	if (__os_is_winnt())
108 		share |= FILE_SHARE_DELETE;
109 #endif
110 	attr = FILE_ATTRIBUTE_NORMAL;
111 
112 	/*
113 	 * Reproduce POSIX 1003.1 semantics: if O_CREATE and O_EXCL are both
114 	 * specified, fail, returning EEXIST, unless we create the file.
115 	 */
116 	if (LF_ISSET(DB_OSO_CREATE) && LF_ISSET(DB_OSO_EXCL))
117 		createflag = CREATE_NEW;	/* create only if !exist*/
118 	else if (!LF_ISSET(DB_OSO_CREATE) && LF_ISSET(DB_OSO_TRUNC))
119 		createflag = TRUNCATE_EXISTING; /* truncate, fail if !exist */
120 	else if (LF_ISSET(DB_OSO_TRUNC))
121 		createflag = CREATE_ALWAYS;	/* create and truncate */
122 	else if (LF_ISSET(DB_OSO_CREATE))
123 		createflag = OPEN_ALWAYS;	/* open or create */
124 	else
125 		createflag = OPEN_EXISTING;	/* open only if existing */
126 
127 	if (LF_ISSET(DB_OSO_DSYNC)) {
128 		F_SET(fhp, DB_FH_NOSYNC);
129 		attr |= FILE_FLAG_WRITE_THROUGH;
130 	}
131 
132 #ifndef DB_WINCE
133 	if (LF_ISSET(DB_OSO_SEQ))
134 		attr |= FILE_FLAG_SEQUENTIAL_SCAN;
135 	else
136 		attr |= FILE_FLAG_RANDOM_ACCESS;
137 #endif
138 
139 	if (LF_ISSET(DB_OSO_TEMP))
140 		attr |= FILE_FLAG_DELETE_ON_CLOSE;
141 
142 	/*
143 	 * We can turn filesystem buffering off if the page size is a
144 	 * multiple of the disk's sector size. To find the sector size,
145 	 * we call GetDiskFreeSpace, which expects a drive name like "d:\\"
146 	 * or NULL for the current disk (i.e., a relative path).
147 	 *
148 	 * WinCE only has GetDiskFreeSpaceEx which does not
149 	 * return the sector size.
150 	 */
151 #ifndef DB_WINCE
152 	if (LF_ISSET(DB_OSO_DIRECT) && page_size != 0 && name[0] != '\0') {
153 		if (name[1] == ':') {
154 			drive = dbuf;
155 			_sntprintf(dbuf, sizeof(dbuf), _T("%c:\\"), tname[0]);
156 		} else
157 			drive = NULL;
158 
159 		/*
160 		 * We ignore all results except sectorsize, but some versions
161 		 * of Windows require that the parameters are non-NULL.
162 		 */
163 		if (GetDiskFreeSpace(drive, &cluster_size,
164 		    &sector_size, &free_clusters, &total_clusters) &&
165 		    page_size % sector_size == 0)
166 			attr |= FILE_FLAG_NO_BUFFERING;
167 	}
168 #endif
169 
170 	fhp->handle = fhp->trunc_handle = INVALID_HANDLE_VALUE;
171 	for (nrepeat = 1;; ++nrepeat) {
172 		if (fhp->handle == INVALID_HANDLE_VALUE) {
173 #ifdef DB_WINCE
174 			if (LF_ISSET(DB_OSO_REGION))
175 				fhp->handle = CreateFileForMapping(tname,
176 				    access, share, NULL, createflag, attr, 0);
177 			else
178 #endif
179 				fhp->handle = CreateFile(tname,
180 				    access, share, NULL, createflag, attr, 0);
181 		}
182 
183 #ifdef HAVE_FTRUNCATE
184 		/*
185 		 * Older versions of WinCE may not support truncate, if so, the
186 		 * HAVE_FTRUNCATE macro should be #undef'ed, and we
187 		 * don't need to open this second handle.
188 		 *
189 		 * WinCE dose not support opening a second handle on the same
190 		 * file via CreateFileForMapping, but this dose not matter
191 		 * since we are not truncating region files but database files.
192 		 *
193 		 * But some older versions of WinCE even
194 		 * dose not allow a second handle opened via CreateFile. If
195 		 * this is the case, users will need to #undef the
196 		 * HAVE_FTRUNCATE macro in build_wince/db_config.h.
197 		 */
198 
199 		/*
200 		 * Windows does not provide truncate directly.  There is no
201 		 * safe way to use a handle for truncate concurrently with
202 		 * reads or writes.  To deal with this, we open a second handle
203 		 * used just for truncating.
204 		 */
205 		if (fhp->handle != INVALID_HANDLE_VALUE &&
206 		    !LF_ISSET(DB_OSO_RDONLY | DB_OSO_TEMP) &&
207 		    fhp->trunc_handle == INVALID_HANDLE_VALUE
208 #ifdef DB_WINCE
209 		    /* Do not open trunc handle for region files. */
210 		    && (!LF_ISSET(DB_OSO_REGION))
211 #endif
212 		    )
213 			fhp->trunc_handle = CreateFile(
214 			    tname, access, share, NULL, OPEN_EXISTING, attr, 0);
215 #endif
216 
217 #ifndef HAVE_FTRUNCATE
218 		if (fhp->handle == INVALID_HANDLE_VALUE)
219 #else
220 		if (fhp->handle == INVALID_HANDLE_VALUE ||
221 		    (!LF_ISSET(DB_OSO_RDONLY | DB_OSO_TEMP) &&
222 		    fhp->trunc_handle == INVALID_HANDLE_VALUE
223 #ifdef DB_WINCE
224 		    /* Do not open trunc handle for region files. */
225 		    && (!LF_ISSET(DB_OSO_REGION))
226 #endif
227 		    ))
228 #endif
229 		{
230 			/*
231 			 * If it's a "temporary" error, we retry up to 3 times,
232 			 * waiting up to 12 seconds.  While it's not a problem
233 			 * if we can't open a database, an inability to open a
234 			 * log file is cause for serious dismay.
235 			 */
236 			ret = __os_posix_err(__os_get_syserr());
237 			if ((ret != ENFILE && ret != EMFILE && ret != ENOSPC) ||
238 			    nrepeat > 3)
239 				goto err;
240 
241 			__os_yield(env, nrepeat * 2, 0);
242 		} else
243 			break;
244 	}
245 
246 	FREE_STRING(env, tname);
247 
248 	if (LF_ISSET(DB_OSO_REGION))
249 		F_SET(fhp, DB_FH_REGION);
250 	F_SET(fhp, DB_FH_OPENED);
251 	*fhpp = fhp;
252 	return (0);
253 
254 err:	FREE_STRING(env, tname);
255 	if (fhp != NULL)
256 		(void)__os_closehandle(env, fhp);
257 	return (ret);
258 }
259