1 /*
2  * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 /*
27  * Pathname canonicalization for Win32 file systems
28  */
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <ctype.h>
34 #include <assert.h>
35 #include <sys/stat.h>
36 
37 #include <windows.h>
38 #include <winbase.h>
39 #include <errno.h>
40 
41 /* We should also include jdk_util.h here, for the prototype of JDK_Canonicalize.
42    This isn't possible though because canonicalize_md.c is as well used in
43    different contexts within Oracle.
44  */
45 #include "io_util_md.h"
46 
47 /* Copy bytes to dst, not going past dend; return dst + number of bytes copied,
48    or NULL if dend would have been exceeded.  If first != '\0', copy that byte
49    before copying bytes from src to send - 1. */
50 static WCHAR*
wcp(WCHAR * dst,WCHAR * dend,WCHAR first,WCHAR * src,WCHAR * send)51 wcp(WCHAR *dst, WCHAR *dend, WCHAR first, WCHAR *src, WCHAR *send)
52 {
53     WCHAR *p = src, *q = dst;
54     if (first != L'\0') {
55         if (q < dend) {
56             *q++ = first;
57         } else {
58             errno = ENAMETOOLONG;
59             return NULL;
60         }
61     }
62     if (send - p > dend - q) {
63         errno = ENAMETOOLONG;
64         return NULL;
65     }
66     while (p < send)
67         *q++ = *p++;
68     return q;
69 }
70 
71 /* Find first instance of '\\' at or following start.  Return the address of
72    that byte or the address of the null terminator if '\\' is not found. */
73 static WCHAR *
wnextsep(WCHAR * start)74 wnextsep(WCHAR *start)
75 {
76     WCHAR *p = start;
77     int c;
78     while ((c = *p) && (c != L'\\'))
79         p++;
80     return p;
81 }
82 
83 /* Tell whether the given string contains any wildcard characters */
84 static int
wwild(WCHAR * start)85 wwild(WCHAR *start)
86 {
87     WCHAR *p = start;
88     int c;
89     while (c = *p) {
90         if ((c == L'*') || (c == L'?'))
91             return 1;
92         p++;
93     }
94     return 0;
95 }
96 
97 /* Tell whether the given string contains prohibited combinations of dots.
98    In the canonicalized form no path element may have dots at its end.
99    Allowed canonical paths: c:\xa...dksd\..ksa\.lk    c:\...a\.b\cd..x.x
100    Prohibited canonical paths: c:\..\x  c:\x.\d c:\...
101 */
102 static int
wdots(WCHAR * start)103 wdots(WCHAR *start)
104 {
105     WCHAR *p = start;
106     // Skip "\\.\" prefix
107     if (wcslen(p) > 4 && !wcsncmp(p, L"\\\\.\\", 4))
108         p = p + 4;
109 
110     while (*p) {
111         if ((p = wcschr(p, L'.')) == NULL) // find next occurrence of '.'
112             return 0; // no more dots
113         p++; // next char
114         while ((*p) == L'.') // go to the end of dots
115             p++;
116         if (*p && (*p != L'\\')) // path element does not end with a dot
117             p++; // go to the next char
118         else
119             return 1; // path element does end with a dot - prohibited
120     }
121     return 0; // no prohibited combinations of dots found
122 }
123 
124 /* If the lookup of a particular prefix fails because the file does not exist,
125    because it is of the wrong type, because access is denied, or because the
126    network is unreachable then canonicalization does not fail, it terminates
127    successfully after copying the rest of the original path to the result path.
128    Other I/O errors cause an error return.
129 */
130 int
lastErrorReportable()131 lastErrorReportable()
132 {
133     DWORD errval = GetLastError();
134     if ((errval == ERROR_FILE_NOT_FOUND)
135         || (errval == ERROR_DIRECTORY)
136         || (errval == ERROR_PATH_NOT_FOUND)
137         || (errval == ERROR_BAD_NETPATH)
138         || (errval == ERROR_BAD_NET_NAME)
139         || (errval == ERROR_ACCESS_DENIED)
140         || (errval == ERROR_NETWORK_UNREACHABLE)
141         || (errval == ERROR_NETWORK_ACCESS_DENIED)) {
142         return 0;
143     }
144     return 1;
145 }
146 
147 /* Convert a pathname to canonical form.  The input orig_path is assumed to
148    have been converted to native form already, via JVM_NativePath().  This is
149    necessary because _fullpath() rejects duplicate separator characters on
150    Win95, though it accepts them on NT. */
151 int
wcanonicalize(WCHAR * orig_path,WCHAR * result,int size)152 wcanonicalize(WCHAR *orig_path, WCHAR *result, int size)
153 {
154     WIN32_FIND_DATAW fd;
155     HANDLE h;
156     WCHAR *path;    /* Working copy of path */
157     WCHAR *src, *dst, *dend, c;
158 
159     /* Reject paths that contain wildcards */
160     if (wwild(orig_path)) {
161         errno = EINVAL;
162         return -1;
163     }
164 
165     if ((path = (WCHAR*)malloc(size * sizeof(WCHAR))) == NULL)
166         return -1;
167 
168     /* Collapse instances of "foo\.." and ensure absoluteness.  Note that
169        contrary to the documentation, the _fullpath procedure does not require
170        the drive to be available.  */
171     if(!_wfullpath(path, orig_path, size)) {
172         goto err;
173     }
174 
175     if (wdots(path)) /* Check for prohibited combinations of dots */
176         goto err;
177 
178     src = path;            /* Start scanning here */
179     dst = result;        /* Place results here */
180     dend = dst + size;        /* Don't go to or past here */
181 
182     /* Copy prefix, assuming path is absolute */
183     c = src[0];
184     if (((c <= L'z' && c >= L'a') || (c <= L'Z' && c >= L'A'))
185        && (src[1] == L':') && (src[2] == L'\\')) {
186         /* Drive specifier */
187         *src = towupper(*src);    /* Canonicalize drive letter */
188         if (!(dst = wcp(dst, dend, L'\0', src, src + 2))) {
189             goto err;
190         }
191 
192         src += 2;
193     } else if ((src[0] == L'\\') && (src[1] == L'\\')) {
194         /* UNC pathname */
195         WCHAR *p;
196         p = wnextsep(src + 2);    /* Skip past host name */
197         if (!*p) {
198             /* A UNC pathname must begin with "\\\\host\\share",
199                so reject this path as invalid if there is no share name */
200             errno = EINVAL;
201             goto err;
202         }
203         p = wnextsep(p + 1);    /* Skip past share name */
204         if (!(dst = wcp(dst, dend, L'\0', src, p)))
205             goto err;
206         src = p;
207     } else {
208         /* Invalid path */
209         errno = EINVAL;
210         goto err;
211     }
212     /* At this point we have copied either a drive specifier ("z:") or a UNC
213        prefix ("\\\\host\\share") to the result buffer, and src points to the
214        first byte of the remainder of the path.  We now scan through the rest
215        of the path, looking up each prefix in order to find the true name of
216        the last element of each prefix, thereby computing the full true name of
217        the original path. */
218     while (*src) {
219         WCHAR *p = wnextsep(src + 1);    /* Find next separator */
220         WCHAR c = *p;
221         WCHAR *pathbuf;
222         int pathlen;
223 
224         assert(*src == L'\\');        /* Invariant */
225         *p = L'\0';            /* Temporarily clear separator */
226 
227         if ((pathlen = (int)wcslen(path)) > MAX_PATH - 1) {
228             pathbuf = getPrefixed(path, pathlen);
229             h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
230             free(pathbuf);
231         } else
232             h = FindFirstFileW(path, &fd);    /* Look up prefix */
233 
234         *p = c;                /* Restore separator */
235         if (h != INVALID_HANDLE_VALUE) {
236             /* Lookup succeeded; append true name to result and continue */
237             FindClose(h);
238             if (!(dst = wcp(dst, dend, L'\\', fd.cFileName,
239                             fd.cFileName + wcslen(fd.cFileName)))){
240                 goto err;
241             }
242             src = p;
243             continue;
244         } else {
245             if (!lastErrorReportable()) {
246                if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))){
247                    goto err;
248                }
249                 break;
250             } else {
251                 goto err;
252             }
253         }
254     }
255 
256     if (dst >= dend) {
257     errno = ENAMETOOLONG;
258         goto err;
259     }
260     *dst = L'\0';
261     free(path);
262     return 0;
263 
264  err:
265     free(path);
266     return -1;
267 }
268 
269 /* Convert a pathname to canonical form.  The input prefix is assumed
270    to be in canonical form already, and the trailing filename must not
271    contain any wildcard, dot/double dot, or other "tricky" characters
272    that are rejected by the canonicalize() routine above.  This
273    routine is present to allow the canonicalization prefix cache to be
274    used while still returning canonical names with the correct
275    capitalization. */
276 int
wcanonicalizeWithPrefix(WCHAR * canonicalPrefix,WCHAR * pathWithCanonicalPrefix,WCHAR * result,int size)277 wcanonicalizeWithPrefix(WCHAR *canonicalPrefix, WCHAR *pathWithCanonicalPrefix, WCHAR *result, int size)
278 {
279     WIN32_FIND_DATAW fd;
280     HANDLE h;
281     WCHAR *src, *dst, *dend;
282     WCHAR *pathbuf;
283     int pathlen;
284 
285     src = pathWithCanonicalPrefix;
286     dst = result;        /* Place results here */
287     dend = dst + size;   /* Don't go to or past here */
288 
289 
290     if ((pathlen=(int)wcslen(pathWithCanonicalPrefix)) > MAX_PATH - 1) {
291         pathbuf = getPrefixed(pathWithCanonicalPrefix, pathlen);
292         h = FindFirstFileW(pathbuf, &fd);    /* Look up prefix */
293         free(pathbuf);
294     } else
295         h = FindFirstFileW(pathWithCanonicalPrefix, &fd);    /* Look up prefix */
296     if (h != INVALID_HANDLE_VALUE) {
297         /* Lookup succeeded; append true name to result and continue */
298         FindClose(h);
299         if (!(dst = wcp(dst, dend, L'\0',
300                         canonicalPrefix,
301                         canonicalPrefix + wcslen(canonicalPrefix)))) {
302             return -1;
303         }
304         if (!(dst = wcp(dst, dend, L'\\',
305                         fd.cFileName,
306                         fd.cFileName + wcslen(fd.cFileName)))) {
307             return -1;
308         }
309     } else {
310         if (!lastErrorReportable()) {
311             if (!(dst = wcp(dst, dend, L'\0', src, src + wcslen(src)))) {
312                 return -1;
313             }
314         } else {
315             return -1;
316         }
317     }
318 
319     if (dst >= dend) {
320         errno = ENAMETOOLONG;
321         return -1;
322     }
323     *dst = L'\0';
324     return 0;
325 }
326 
327 /* Non-Wide character version of canonicalize.
328    Converts to wchar and delegates to wcanonicalize. */
329 JNIEXPORT int
JDK_Canonicalize(const char * orig,char * out,int len)330 JDK_Canonicalize(const char *orig, char *out, int len) {
331     wchar_t* wpath = NULL;
332     wchar_t* wresult = NULL;
333     int wpath_len;
334     int ret = -1;
335 
336     /* Get required buffer size to convert to Unicode */
337     wpath_len = MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS,
338                                     orig, -1, NULL, 0);
339     if (wpath_len == 0) {
340         goto finish;
341     }
342 
343     if ((wpath = (wchar_t*) malloc(sizeof(wchar_t) * wpath_len)) == NULL) {
344         goto finish;
345     }
346 
347     if (MultiByteToWideChar(CP_ACP, MB_ERR_INVALID_CHARS,
348                             orig, -1, wpath, wpath_len) == 0) {
349         goto finish;
350     }
351 
352     if ((wresult = (wchar_t*) malloc(sizeof(wchar_t) * len)) == NULL) {
353         goto finish;
354     }
355 
356     if (wcanonicalize(wpath, wresult, len) != 0) {
357         goto finish;
358     }
359 
360     if (WideCharToMultiByte(CP_ACP, 0,
361                             wresult, -1, out, len, NULL, NULL) == 0) {
362         goto finish;
363     }
364 
365     // Change return value to success.
366     ret = 0;
367 
368 finish:
369     free(wresult);
370     free(wpath);
371 
372     return ret;
373 }
374 
375 /* The appropriate location of getPrefixed() is io_util_md.c, but it is
376    also used in a non-OpenJDK context within Oracle. There, canonicalize_md.c
377    is already pulled in and compiled, so to avoid more complicated solutions
378    we keep this method here.
379  */
380 
381 /* copy \\?\ or \\?\UNC\ to the front of path */
382 JNIEXPORT WCHAR*
getPrefixed(const WCHAR * path,int pathlen)383 getPrefixed(const WCHAR* path, int pathlen) {
384     WCHAR* pathbuf = (WCHAR*)malloc((pathlen + 10) * sizeof (WCHAR));
385     if (pathbuf != 0) {
386         if (path[0] == L'\\' && path[1] == L'\\') {
387             if (path[2] == L'?' && path[3] == L'\\'){
388                 /* if it already has a \\?\ don't do the prefix */
389                 wcscpy(pathbuf, path );
390             } else {
391                 /* only UNC pathname includes double slashes here */
392                 wcscpy(pathbuf, L"\\\\?\\UNC\0");
393                 wcscat(pathbuf, path + 1);
394             }
395         } else {
396             wcscpy(pathbuf, L"\\\\?\\\0");
397             wcscat(pathbuf, path );
398         }
399     }
400     return pathbuf;
401 }
402