1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 #include <sal/log.hxx>
22 
23 #include <algorithm>
24 #include <optional>
25 #include <stack>
26 #include <string_view>
27 
28 #include <systools/win32/uwinapi.h>
29 
30 #include "file_url.hxx"
31 #include "file_error.hxx"
32 
33 #include <rtl/alloc.h>
34 #include <rtl/character.hxx>
35 #include <rtl/strbuf.hxx>
36 #include <rtl/ustring.hxx>
37 #include <rtl/ustrbuf.hxx>
38 #include <osl/mutex.h>
39 #include <o3tl/char16_t2wchar_t.hxx>
40 
41 #include "path_helper.hxx"
42 
43 #define WSTR_SYSTEM_ROOT_PATH               u"\\\\.\\"
44 #define WSTR_LONG_PATH_PREFIX               u"\\\\?\\"
45 #define WSTR_LONG_PATH_PREFIX_UNC           u"\\\\?\\UNC\\"
46 
47 // FileURL functions
48 
49 namespace
50 {
51 // Internal functions that expect only backslashes as path separators
52 
startsWithDriveColon(const sal_Unicode * p)53 bool startsWithDriveColon(const sal_Unicode* p) { return rtl::isAsciiAlpha(p[0]) && p[1] == ':'; }
54 
startsWithDriveColon(const rtl_uString * p)55 bool startsWithDriveColon(const rtl_uString* p) { return startsWithDriveColon(p->buffer); }
56 
startsWithDriveColonSlash(const rtl_uString * p)57 bool startsWithDriveColonSlash(const rtl_uString* p)
58 {
59     return startsWithDriveColon(p) && p->buffer[2] == '\\';
60 }
61 
startsWithSlashSlash(const sal_Unicode * p)62 bool startsWithSlashSlash(const sal_Unicode* p) { return p[0] == '\\' && p[1] == '\\'; }
63 
startsWithSlashSlash(const rtl_uString * p)64 bool startsWithSlashSlash(const rtl_uString* p) { return startsWithSlashSlash(p->buffer); }
65 
66 // An absolute path starts either with \\ (an UNC or device path like \\.\ or \\?\)
67 // or with a ASCII alpha character followed by colon followed by backslash.
isAbsolute(const rtl_uString * p)68 bool isAbsolute(const rtl_uString* p)
69 {
70     return startsWithSlashSlash(p) || startsWithDriveColonSlash(p);
71 }
72 
onSameDrive(const rtl_uString * p1,const rtl_uString * p2)73 bool onSameDrive(const rtl_uString* p1, const rtl_uString* p2)
74 {
75     return rtl::toAsciiUpperCase(p1->buffer[0]) == rtl::toAsciiUpperCase(p2->buffer[0])
76            && rtl::toAsciiUpperCase(p1->buffer[1]) == rtl::toAsciiUpperCase(p2->buffer[1]);
77 }
78 
getRootLength(const rtl_uString * path)79 sal_Int32 getRootLength(const rtl_uString* path)
80 {
81     assert(isAbsolute(path));
82     const sal_Unicode* p = path->buffer;
83     sal_Int32 nResult = 0;
84     if (startsWithSlashSlash(p))
85     {
86         // Cases:
87         //   1. Device UNC: \\?\UNC\server\share or \\.\UNC\server\share
88         //   2. Non-device UNC: \\server\share
89         //   3. Device non-UNC: \\?\C: or \\.\C:
90         bool bUNC = false;
91         if ((p[2] == '.' || p[2] == '?') && p[3] == '\\')
92         {
93             if (p[4] == 'U' && p[5] == 'N' && p[6] == 'C' && p[7] == '\\')
94             {
95                 // \\?\UNC\server\share or \\.\UNC\server\share
96                 nResult = 8;
97                 bUNC = true;
98             }
99             else
100             {
101                 // \\?\C: or \\.\C:
102                 assert(startsWithDriveColon(p + 4));
103                 nResult = 6;
104             }
105         }
106         else
107         {
108             // \\server\share
109             nResult = 2;
110             bUNC = true;
111         }
112         if (bUNC)
113         {
114             // \\?\UNC\server\share or \\.\UNC\server\share or \\server\share
115             assert(nResult < path->length && p[nResult] != '\\');
116             // Skip server name and share name
117             for (int nSlashes = 0; nResult < path->length; ++nResult)
118             {
119                 if (p[nResult] == '\\' && ++nSlashes == 2)
120                     break;
121             }
122         }
123     }
124     else
125     {
126         // C:
127         assert(startsWithDriveColon(p));
128         nResult = 2;
129     }
130     return std::min(nResult, path->length);
131 }
132 
pathView(const rtl_uString * path,bool bOnlyRoot)133 std::u16string_view pathView(const rtl_uString* path, bool bOnlyRoot)
134 {
135     return std::u16string_view(path->buffer, bOnlyRoot ? getRootLength(path) : path->length);
136 }
137 
combinePath(std::u16string_view basePath,const sal_Unicode * relPath)138 OUString combinePath(std::u16string_view basePath, const sal_Unicode* relPath)
139 {
140     const bool needSep = basePath.back() != '\\' && relPath[0] != '\\';
141     const auto sSeparator = needSep ? std::u16string_view(u"\\") : std::u16string_view();
142     if (basePath.back() == '\\' && relPath[0] == '\\')
143         ++relPath; // avoid two adjacent backslashes
144     return OUString::Concat(basePath) + sSeparator + relPath;
145 }
146 
removeRelativeParts(const OUString & p)147 OUString removeRelativeParts(const OUString& p)
148 {
149     const sal_Int32 rootPos = getRootLength(p.pData);
150     OUStringBuffer buf(p.getLength());
151     buf.append(p.subView(0, rootPos));
152     std::stack<sal_Int32> partPositions;
153     bool bAfterSlash = false;
154     for (sal_Int32 i = rootPos; i < p.getLength(); ++i)
155     {
156         sal_Unicode c = p[i];
157         if (c == '\\')
158         {
159             if (i + 1 < p.getLength() && p[i + 1] == '.')
160             {
161                 if (i + 2 == p.getLength() || p[i + 2] == '\\')
162                 {
163                     // 1. Skip current directory (\.\ or trailing \.)
164                     ++i; // process next slash: it may start another "\.\"
165                 }
166                 else if (p[i + 2] == '.' && (i + 3 == p.getLength() || p[i + 3] == '\\'))
167                 {
168                     // 2. For parent directory (\..\), drop previous part and skip
169                     if (bAfterSlash && partPositions.size())
170                         partPositions.pop();
171                     sal_Int32 nParentPos = partPositions.size() ? partPositions.top() : rootPos;
172                     if (partPositions.size())
173                         partPositions.pop();
174                     buf.truncate(nParentPos);
175                     bAfterSlash = false; // we have just removed slash after parent part
176                     i += 2; // process next slash: it may start another "\.\"
177                 }
178             }
179             if (bAfterSlash)
180                 continue; // 3. Skip double backslashes (\\)
181             partPositions.push(buf.getLength());
182             bAfterSlash = true;
183         }
184         else
185             bAfterSlash = false;
186 
187         buf.append(c);
188     }
189     return buf.makeStringAndClear();
190 }
191 }
192 
IsValidFilePathComponent(sal_Unicode const * lpComponent,sal_Unicode const ** lppComponentEnd,DWORD dwFlags)193 static bool IsValidFilePathComponent(
194     sal_Unicode const * lpComponent, sal_Unicode const **lppComponentEnd,
195     DWORD dwFlags)
196 {
197         sal_Unicode const * lpComponentEnd = nullptr;
198         sal_Unicode const * lpCurrent = lpComponent;
199         bool    bValid = true;  /* Assume success */
200         sal_Unicode cLast = 0;
201 
202         /* Path component length must not exceed MAX_PATH even if long path with "\\?\" prefix is used */
203 
204         while ( !lpComponentEnd && lpCurrent && lpCurrent - lpComponent < MAX_PATH )
205         {
206             switch ( *lpCurrent )
207             {
208                 /* Both backslash and slash determine the end of a path component */
209             case '\0':
210             case '/':
211             case '\\':
212                 switch ( cLast )
213                 {
214                     /* Component must not end with '.' or blank and can't be empty */
215 
216                 case '.':
217                     if ( dwFlags & VALIDATEPATH_ALLOW_ELLIPSE )
218                     {
219                         if ( (dwFlags & VALIDATEPATH_ALLOW_INVALID_SPACE_AND_PERIOD) ||
220                              1 == lpCurrent - lpComponent )
221                         {
222                             /* Either do allow periods anywhere, or current directory */
223                             lpComponentEnd = lpCurrent;
224                             break;
225                         }
226                         else if ( 2 == lpCurrent - lpComponent && '.' == *lpComponent )
227                         {
228                             /* Parent directory is O.K. */
229                             lpComponentEnd = lpCurrent;
230                             break;
231                         }
232                     }
233                     [[fallthrough]];
234                 case 0:
235                 case ' ':
236                     if ( dwFlags & VALIDATEPATH_ALLOW_INVALID_SPACE_AND_PERIOD )
237                         lpComponentEnd = lpCurrent;
238                     else
239                     {
240                         lpComponentEnd = lpCurrent - 1;
241                         bValid = false;
242                     }
243                     break;
244                 default:
245                     lpComponentEnd = lpCurrent;
246                     break;
247                 }
248                 break;
249                 /* The following characters are reserved */
250             case '?':
251             case '*':
252             case '<':
253             case '>':
254             case '\"':
255             case '|':
256             case ':':
257                 lpComponentEnd = lpCurrent;
258                 bValid = false;
259                 break;
260             default:
261                 /* Characters below ASCII 32 are not allowed */
262                 if ( *lpCurrent < ' ' )
263                 {
264                     lpComponentEnd = lpCurrent;
265                     bValid = false;
266                 }
267                 break;
268             }
269             cLast = *lpCurrent++;
270         }
271 
272         /*  If we don't reached the end of the component the length of the component was too long
273             (See condition of while loop) */
274         if ( !lpComponentEnd )
275         {
276             bValid = false;
277             lpComponentEnd = lpCurrent;
278         }
279 
280         if ( bValid )
281         {
282             // Empty components are not allowed
283             if ( lpComponentEnd - lpComponent < 1 )
284                 bValid = false;
285 
286             // If we reached the end of the string nullptr is returned
287             else if ( !*lpComponentEnd )
288                 lpComponentEnd = nullptr;
289 
290         }
291 
292         if ( lppComponentEnd )
293             *lppComponentEnd = lpComponentEnd;
294 
295         return bValid;
296 }
297 
countInitialSeparators(sal_Unicode const * path)298 static sal_Int32 countInitialSeparators(sal_Unicode const * path) {
299     sal_Unicode const * p = path;
300     while (*p == '\\' || *p == '/') {
301         ++p;
302     }
303     return p - path;
304 }
305 
IsValidFilePath(rtl_uString * path,DWORD dwFlags,rtl_uString ** corrected)306 DWORD IsValidFilePath(rtl_uString *path, DWORD dwFlags, rtl_uString **corrected)
307 {
308         sal_Unicode const * lpszPath = path->buffer;
309         sal_Unicode const * lpComponent = lpszPath;
310         bool    bValid = true;
311         DWORD   dwPathType = PATHTYPE_ERROR;
312         sal_Int32 nLength = rtl_uString_getLength( path );
313 
314         if ( dwFlags & VALIDATEPATH_ALLOW_RELATIVE )
315             dwFlags |= VALIDATEPATH_ALLOW_ELLIPSE;
316 
317         DWORD   dwCandidatPathType = PATHTYPE_ERROR;
318 
319         if ( 0 == rtl_ustr_shortenedCompareIgnoreAsciiCase_WithLength( path->buffer, nLength, WSTR_LONG_PATH_PREFIX_UNC, SAL_N_ELEMENTS(WSTR_LONG_PATH_PREFIX_UNC) - 1, SAL_N_ELEMENTS(WSTR_LONG_PATH_PREFIX_UNC) - 1 ) )
320         {
321             /* This is long path in UNC notation */
322             lpComponent = lpszPath + SAL_N_ELEMENTS(WSTR_LONG_PATH_PREFIX_UNC) - 1;
323             dwCandidatPathType = PATHTYPE_ABSOLUTE_UNC | PATHTYPE_IS_LONGPATH;
324         }
325         else if ( 0 == rtl_ustr_shortenedCompareIgnoreAsciiCase_WithLength( path->buffer, nLength, WSTR_LONG_PATH_PREFIX, SAL_N_ELEMENTS(WSTR_LONG_PATH_PREFIX) - 1, SAL_N_ELEMENTS(WSTR_LONG_PATH_PREFIX) - 1 ) )
326         {
327             /* This is long path */
328             lpComponent = lpszPath + SAL_N_ELEMENTS(WSTR_LONG_PATH_PREFIX) - 1;
329 
330             if (startsWithDriveColon(lpComponent))
331             {
332                 lpComponent += 2;
333                 dwCandidatPathType = PATHTYPE_ABSOLUTE_LOCAL | PATHTYPE_IS_LONGPATH;
334             }
335         }
336         else if ( 2 == countInitialSeparators( lpszPath ) )
337         {
338             /* The UNC path notation */
339             lpComponent = lpszPath + 2;
340             dwCandidatPathType = PATHTYPE_ABSOLUTE_UNC;
341         }
342         else if (startsWithDriveColon(lpszPath))
343         {
344             /* Local path verification. Must start with <drive>: */
345             lpComponent = lpszPath + 2;
346             dwCandidatPathType = PATHTYPE_ABSOLUTE_LOCAL;
347         }
348 
349         if ( ( dwCandidatPathType & PATHTYPE_MASK_TYPE ) == PATHTYPE_ABSOLUTE_UNC )
350         {
351             bValid = IsValidFilePathComponent( lpComponent, &lpComponent, VALIDATEPATH_ALLOW_ELLIPSE );
352 
353             /* So far we have a valid servername. Now let's see if we also have a network resource */
354 
355             dwPathType = dwCandidatPathType;
356 
357             if ( bValid )
358             {
359                 if ( lpComponent &&  !*++lpComponent )
360                     lpComponent = nullptr;
361 
362                 if ( !lpComponent )
363                 {
364                     dwPathType |= PATHTYPE_IS_SERVER;
365                 }
366                 else
367                 {
368                     /* Now test the network resource */
369 
370                     bValid = IsValidFilePathComponent( lpComponent, &lpComponent, 0 );
371 
372                     /* If we now reached the end of the path, everything is O.K. */
373 
374                     if ( bValid && (!lpComponent || !*++lpComponent ) )
375                     {
376                         lpComponent = nullptr;
377                         dwPathType |= PATHTYPE_IS_VOLUME;
378                     }
379                 }
380             }
381         }
382         else if (  ( dwCandidatPathType & PATHTYPE_MASK_TYPE ) == PATHTYPE_ABSOLUTE_LOCAL )
383         {
384             if ( 1 == countInitialSeparators( lpComponent ) )
385                 lpComponent++;
386             else if ( *lpComponent )
387                 bValid = false;
388 
389             dwPathType = dwCandidatPathType;
390 
391             /* Now we are behind the backslash or it was a simple drive without backslash */
392 
393             if ( bValid && !*lpComponent )
394             {
395                 lpComponent = nullptr;
396                 dwPathType |= PATHTYPE_IS_VOLUME;
397             }
398         }
399         else if ( dwFlags & VALIDATEPATH_ALLOW_RELATIVE )
400         {
401             /* Can be a relative path */
402             lpComponent = lpszPath;
403 
404             /* Relative path can start with a backslash */
405 
406             if ( 1 == countInitialSeparators( lpComponent ) )
407             {
408                 lpComponent++;
409                 if ( !*lpComponent )
410                     lpComponent = nullptr;
411             }
412 
413             dwPathType = PATHTYPE_RELATIVE;
414         }
415         else
416         {
417             /* Anything else is an error */
418             bValid = false;
419             lpComponent = lpszPath;
420         }
421 
422         /* Now validate each component of the path */
423         rtl_uString * lastCorrected = path;
424         while ( bValid && lpComponent )
425         {
426             // Correct path by merging consecutive slashes:
427             if (*lpComponent == '\\' && corrected != nullptr) {
428                 sal_Int32 i = lpComponent - lpszPath;
429                 rtl_uString_newReplaceStrAt(corrected, lastCorrected, i, 1, nullptr);
430                     //TODO: handle out-of-memory
431                 lastCorrected = *corrected;
432                 lpszPath = (*corrected)->buffer;
433                 lpComponent = lpszPath + i;
434             }
435 
436             bValid = IsValidFilePathComponent( lpComponent, &lpComponent, dwFlags | VALIDATEPATH_ALLOW_INVALID_SPACE_AND_PERIOD);
437 
438             if ( bValid && lpComponent )
439             {
440                 lpComponent++;
441 
442                 /* If the string behind the backslash is empty, we've done */
443 
444                 if ( !*lpComponent )
445                     lpComponent = nullptr;
446             }
447         }
448 
449         /* The path can be longer than MAX_PATH only in case it has the longpath prefix */
450         if ( bValid && !( dwPathType &  PATHTYPE_IS_LONGPATH ) && rtl_ustr_getLength( lpszPath ) >= MAX_PATH )
451         {
452             bValid = false;
453         }
454 
455         return bValid ? dwPathType : PATHTYPE_ERROR;
456 }
457 
458 // Expects a proper absolute or relative path
PathRemoveFileSpec(LPWSTR lpPath,LPWSTR lpFileName,sal_Int32 nFileBufLen)459 static sal_Int32 PathRemoveFileSpec(LPWSTR lpPath, LPWSTR lpFileName, sal_Int32 nFileBufLen )
460 {
461     sal_Int32 nRemoved = 0;
462 
463     if (nFileBufLen && wcscmp(lpPath, L"\\\\") != 0) // tdf#98343 do not remove leading UNC backslashes!
464     {
465         lpFileName[0] = 0;
466         LPWSTR  lpLastBkSlash = wcsrchr( lpPath, '\\' );
467         LPWSTR  lpLastSlash = wcsrchr( lpPath, '/' );
468         LPWSTR  lpLastDelimiter = std::max(lpLastSlash, lpLastBkSlash);
469 
470         if ( lpLastDelimiter )
471         {
472                 sal_Int32 nDelLen = wcslen( lpLastDelimiter );
473                 if ( 1 == nDelLen )
474                 {
475                     if ( lpLastDelimiter > lpPath && *(lpLastDelimiter - 1) != ':' )
476                     {
477                         *lpLastDelimiter = 0;
478                         *lpFileName = 0;
479                         nRemoved = nDelLen;
480                     }
481                 }
482                 else if ( nDelLen && nDelLen - 1 < nFileBufLen )
483                 {
484                     wcscpy( lpFileName, lpLastDelimiter + 1 );
485                     *(++lpLastDelimiter) = 0;
486                     nRemoved = nDelLen - 1;
487                 }
488         }
489     }
490 
491     return nRemoved;
492 }
493 
494 // Undocumented in SHELL32.DLL ordinal 32
PathAddBackslash(LPWSTR lpPath,sal_uInt32 nBufLen)495 static LPWSTR PathAddBackslash(LPWSTR lpPath, sal_uInt32 nBufLen)
496 {
497     LPWSTR  lpEndPath = nullptr;
498 
499     if ( lpPath )
500     {
501             std::size_t nLen = wcslen(lpPath);
502 
503             if ( !nLen || ( lpPath[nLen-1] != '\\' && lpPath[nLen-1] != '/' && nLen < nBufLen - 1 ) )
504             {
505                 lpEndPath = lpPath + nLen;
506                 *lpEndPath++ = '\\';
507                 *lpEndPath = 0;
508             }
509     }
510     return lpEndPath;
511 }
512 
513 // True if the szPath + szFile is just a special prefix, not a path which we may test for existence.
514 // E.g., \\ or \\server or \\server\share or \\? or \\?\UNC or \\?\UNC\server or \\?\UNC\server\share
IsPathSpecialPrefix(LPWSTR szPath,LPWSTR szFile)515 static bool IsPathSpecialPrefix(LPWSTR szPath, LPWSTR szFile)
516 {
517     if (szPath[0] == '\\' && szPath[1] == '\\')
518     {
519         if (szPath[2] == 0)
520             return true; // "\\" -> now the server name or "." or "?" will append
521         else if (szPath[2] == '?' && szPath[3] == '\\')
522         {
523             if (szPath[4] == 0)
524                 return wcscmp(szFile, L"UNC") == 0; // "\\?\" -> now "UNC" will append
525             else
526             {
527                 if (wcsncmp(szPath + 4, L"UNC\\", 4) == 0)
528                 {
529                     if (szPath[8] == 0)
530                         return true; // "\\?\UNC\" -> now the server name will append
531                     else if (const wchar_t* pBackSlash = wcschr(szPath + 8, '\\'))
532                         return *(pBackSlash + 1) == 0; // "\\?\UNC\Server\" -> now share name will append
533                 }
534             }
535         }
536         else if (szPath[2] != '.')
537         {
538             if (const wchar_t* pBackSlash = wcschr(szPath + 2, '\\'))
539                 return *(pBackSlash + 1) == 0; // "\\Server\" -> now share name will append
540         }
541     }
542     return false;
543 }
544 
545 // Expects a proper absolute or relative path. NB: It is different from GetLongPathName WinAPI!
GetCaseCorrectPathNameEx(LPWSTR lpszPath,sal_uInt32 cchBuffer,DWORD nSkipLevels,bool bCheckExistence)546 static DWORD GetCaseCorrectPathNameEx(
547     LPWSTR  lpszPath,   // path buffer to convert
548     sal_uInt32 cchBuffer,      // size of path buffer
549     DWORD   nSkipLevels,
550     bool bCheckExistence )
551 {
552         ::osl::LongPathBuffer< WCHAR > szFile( MAX_PATH + 1 );
553         sal_Int32 nRemoved = PathRemoveFileSpec( lpszPath, szFile, MAX_PATH + 1 );
554         sal_Int32 nLastStepRemoved = nRemoved;
555         while ( nLastStepRemoved && szFile[0] == 0 )
556         {
557             // remove separators
558             nLastStepRemoved = PathRemoveFileSpec( lpszPath, szFile, MAX_PATH + 1 );
559             nRemoved += nLastStepRemoved;
560         }
561 
562         if ( nRemoved )
563         {
564             bool bSkipThis = false;
565 
566             if ( 0 == wcscmp( szFile, L".." ) )
567             {
568                 bSkipThis = true;
569                 nSkipLevels += 1;
570             }
571             else if ( 0 == wcscmp( szFile, L"." ) )
572             {
573                 bSkipThis = true;
574             }
575             else if ( nSkipLevels )
576             {
577                 bSkipThis = true;
578                 nSkipLevels--;
579             }
580             else
581                 bSkipThis = false;
582 
583             if ( !GetCaseCorrectPathNameEx( lpszPath, cchBuffer, nSkipLevels, bCheckExistence ) )
584                 return 0;
585 
586             PathAddBackslash( lpszPath, cchBuffer );
587 
588             /* Analyze parent if not only a trailing backslash was cut but a real file spec */
589             if ( !bSkipThis )
590             {
591                 if ( bCheckExistence )
592                 {
593 
594                     if (IsPathSpecialPrefix(lpszPath, szFile))
595                     {
596                         /* add the segment name back */
597                         wcscat(lpszPath, szFile);
598                     }
599                     else
600                     {
601                         osl::LongPathBuffer<WCHAR> aShortPath(MAX_LONG_PATH);
602                         wcscpy(aShortPath, lpszPath);
603                         wcscat(aShortPath, szFile);
604 
605                         WIN32_FIND_DATAW aFindFileData;
606                         HANDLE hFind = FindFirstFileW(aShortPath, &aFindFileData);
607 
608                         if (IsValidHandle(hFind))
609                         {
610                             wcscat(lpszPath, aFindFileData.cFileName[0]
611                                                  ? aFindFileData.cFileName
612                                                  : aFindFileData.cAlternateFileName);
613 
614                             FindClose(hFind);
615                         }
616                         else
617                             lpszPath[0] = 0;
618                     }
619                 }
620                 else
621                 {
622                     /* add the segment name back */
623                     wcscat( lpszPath, szFile );
624                 }
625             }
626         }
627         else
628         {
629             /* File specification can't be removed therefore the short path is either a drive
630                or a network share. If still levels to skip are left, the path specification
631                tries to travel below the file system root */
632             if ( nSkipLevels )
633                     lpszPath[0] = 0;
634             else
635                 _wcsupr( lpszPath );
636         }
637 
638         return wcslen( lpszPath );
639 }
640 
GetCaseCorrectPathName(LPCWSTR lpszShortPath,LPWSTR lpszLongPath,sal_uInt32 cchBuffer,bool bCheckExistence)641 DWORD GetCaseCorrectPathName(
642     LPCWSTR lpszShortPath,  // file name
643     LPWSTR  lpszLongPath,   // path buffer
644     sal_uInt32 cchBuffer,      // size of path buffer
645     bool bCheckExistence
646 )
647 {
648     /* Special handling for "\\.\" as system root */
649     if ( lpszShortPath && 0 == wcscmp( lpszShortPath, o3tl::toW(WSTR_SYSTEM_ROOT_PATH) ) )
650     {
651         if ( cchBuffer >= SAL_N_ELEMENTS(WSTR_SYSTEM_ROOT_PATH) )
652         {
653             wcscpy( lpszLongPath, o3tl::toW(WSTR_SYSTEM_ROOT_PATH) );
654             return SAL_N_ELEMENTS(WSTR_SYSTEM_ROOT_PATH) - 1;
655         }
656         else
657         {
658             return SAL_N_ELEMENTS(WSTR_SYSTEM_ROOT_PATH) - 1;
659         }
660     }
661     else if ( lpszShortPath )
662     {
663         if ( wcslen( lpszShortPath ) <= cchBuffer )
664         {
665             wcscpy( lpszLongPath, lpszShortPath );
666             return GetCaseCorrectPathNameEx( lpszLongPath, cchBuffer, 0, bCheckExistence );
667         }
668     }
669 
670     return 0;
671 }
672 
osl_decodeURL_(const OString & sUTF8)673 static std::optional<OUString> osl_decodeURL_(const OString& sUTF8)
674 {
675     const char  *pSrcEnd;
676     const char  *pSrc;
677     bool        bValidEncoded = true;   /* Assume success */
678 
679     /* The resulting decoded string length is shorter or equal to the source length */
680 
681     const sal_Int32 nSrcLen = sUTF8.getLength();
682     OStringBuffer aBuffer(nSrcLen + 1);
683 
684     pSrc = sUTF8.getStr();
685     pSrcEnd = pSrc + nSrcLen;
686 
687     /* Now decode the URL what should result in a UTF-8 string */
688     while ( bValidEncoded && pSrc < pSrcEnd )
689     {
690         switch ( *pSrc )
691         {
692         case '%':
693             {
694                 char    aToken[3];
695                 char    aChar;
696 
697                 pSrc++;
698                 aToken[0] = *pSrc++;
699                 aToken[1] = *pSrc++;
700                 aToken[2] = 0;
701 
702                 aChar = static_cast<char>(strtoul( aToken, nullptr, 16 ));
703 
704                 /* The chars are path delimiters and must not be encoded */
705 
706                 if ( 0 == aChar || '\\' == aChar || '/' == aChar || ':' == aChar )
707                     bValidEncoded = false;
708                 else
709                     aBuffer.append(aChar);
710             }
711             break;
712         case '\0':
713         case '#':
714         case '?':
715             bValidEncoded = false;
716             break;
717         default:
718             aBuffer.append(*pSrc++);
719             break;
720         }
721     }
722 
723     return bValidEncoded ? OUString(aBuffer.getStr(), aBuffer.getLength(), RTL_TEXTENCODING_UTF8)
724                          : std::optional<OUString>();
725 }
726 
osl_encodeURL_(std::u16string_view sURL)727 static OUString osl_encodeURL_(std::u16string_view sURL)
728 {
729     /* Encode non ascii characters within the URL */
730 
731     const char     *pURLScan;
732     sal_Int32       nURLScanLen;
733     sal_Int32       nURLScanCount;
734 
735     OString sUTF8 = OUStringToOString(sURL, RTL_TEXTENCODING_UTF8);
736 
737     OUStringBuffer sEncodedURL(sUTF8.getLength() * 3 + 1);
738     pURLScan = sUTF8.getStr();
739     nURLScanLen = sUTF8.getLength();
740     nURLScanCount = 0;
741 
742     while ( nURLScanCount < nURLScanLen )
743     {
744         char cCurrent = *pURLScan;
745         switch ( cCurrent )
746         {
747         default:
748             if (!( ( cCurrent >= 'a' && cCurrent <= 'z' ) || ( cCurrent >= 'A' && cCurrent <= 'Z' ) || ( cCurrent >= '0' && cCurrent <= '9' ) ) )
749             {
750                 char buf[3];
751                 sprintf( buf, "%02X", static_cast<unsigned char>(cCurrent) );
752                 sEncodedURL.append('%').appendAscii(buf, 2);
753                 break;
754             }
755             [[fallthrough]];
756         case '!':
757         case '\'':
758         case '(':
759         case ')':
760         case '*':
761         case '-':
762         case '.':
763         case '_':
764         case '~':
765         case '$':
766         case '&':
767         case '+':
768         case ',':
769         case '=':
770         case '@':
771         case ':':
772         case '/':
773         case '\\':
774         case '|':
775             sEncodedURL.appendAscii(&cCurrent, 1);
776             break;
777         case 0:
778             break;
779         }
780 
781         pURLScan++;
782         nURLScanCount++;
783     }
784 
785     return sEncodedURL.makeStringAndClear();
786 }
787 
osl_getSystemPathFromFileURL_(rtl_uString * strURL,rtl_uString ** pustrPath,bool bAllowRelative)788 oslFileError osl_getSystemPathFromFileURL_( rtl_uString *strURL, rtl_uString **pustrPath, bool bAllowRelative )
789 {
790     OUString sTempPath;
791     oslFileError        nError = osl_File_E_INVAL;  /* Assume failure */
792 
793     /*  If someone hasn't encoded the complete URL we convert it to UTF8 now to prevent from
794         having a mixed encoded URL later */
795 
796     OString sUTF8 = OUStringToOString(OUString::unacquired(&strURL), RTL_TEXTENCODING_UTF8);
797 
798     /* If the length of strUTF8 and strURL differs it indicates that the URL was not correct encoded */
799 
800     SAL_WARN_IF(
801         sUTF8.getLength() != strURL->length &&
802         0 == rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength( strURL->buffer, strURL->length, "file:\\", 6 )
803         , "sal.osl"
804         ,"osl_getSystemPathFromFileURL: \"" << OUString(strURL) << "\" is not encoded !!!");
805 
806     if (auto sDecodedURL = osl_decodeURL_(sUTF8))
807     {
808         /* Replace backslashes and pipes */
809 
810         sDecodedURL = sDecodedURL->replace('/', '\\').replace('|', ':');
811 
812         /* Must start with "file:/" */
813         if ( sDecodedURL->startsWithIgnoreAsciiCase("file:\\") )
814         {
815             sal_uInt32  nSkip;
816 
817             if ( sDecodedURL->startsWithIgnoreAsciiCase("file:\\\\\\") )
818                 nSkip = 8;
819             else if (
820                 sDecodedURL->startsWithIgnoreAsciiCase("file:\\\\localhost\\") ||
821                 sDecodedURL->startsWithIgnoreAsciiCase("file:\\\\127.0.0.1\\")
822                       )
823                 nSkip = 17;
824             else if ( sDecodedURL->startsWithIgnoreAsciiCase("file:\\\\") )
825                 nSkip = 5;
826             else
827                 nSkip = 6;
828 
829             const sal_uInt32 nDecodedLen = sDecodedURL->getLength();
830 
831             /* Indicates local root */
832             if ( nDecodedLen == nSkip )
833                 sTempPath = WSTR_SYSTEM_ROOT_PATH;
834             else
835             {
836                 /* do not separate the directory and file case, so the maximal path length without prefix is MAX_PATH-12 */
837                 if ( nDecodedLen - nSkip <= MAX_PATH - 12 )
838                 {
839                     sTempPath = sDecodedURL->subView(nSkip);
840                 }
841                 else
842                 {
843                     ::osl::LongPathBuffer< sal_Unicode > aBuf( MAX_LONG_PATH );
844                     sal_uInt32 nNewLen = GetCaseCorrectPathName( o3tl::toW(sDecodedURL->getStr()) + nSkip,
845                                                                  o3tl::toW(aBuf),
846                                                                  aBuf.getBufSizeInSymbols(),
847                                                                  false );
848 
849                     if ( nNewLen <= MAX_PATH - 12
850                       || sDecodedURL->matchIgnoreAsciiCase(WSTR_SYSTEM_ROOT_PATH, nSkip)
851                       || sDecodedURL->matchIgnoreAsciiCase(WSTR_LONG_PATH_PREFIX, nSkip) )
852                     {
853                         sTempPath = std::u16string_view(aBuf, nNewLen);
854                     }
855                     else if ( sDecodedURL->match("\\\\", nSkip) )
856                     {
857                         /* it should be an UNC path, use the according prefix */
858                         sTempPath = OUString::Concat(WSTR_LONG_PATH_PREFIX_UNC) + std::u16string_view(aBuf + 2, nNewLen - 2);
859                     }
860                     else
861                     {
862                         sTempPath = OUString::Concat(WSTR_LONG_PATH_PREFIX) + std::u16string_view(aBuf, nNewLen);
863                     }
864                 }
865             }
866 
867             if ( IsValidFilePath( sTempPath.pData, VALIDATEPATH_ALLOW_ELLIPSE, &sTempPath.pData ) )
868                 nError = osl_File_E_None;
869         }
870         else if ( bAllowRelative )  /* This maybe a relative file URL */
871         {
872             /* In future the relative path could be converted to absolute if it is too long */
873             sTempPath = *sDecodedURL;
874 
875             if ( IsValidFilePath( sTempPath.pData, VALIDATEPATH_ALLOW_RELATIVE | VALIDATEPATH_ALLOW_ELLIPSE, &sTempPath.pData ) )
876                 nError = osl_File_E_None;
877         }
878         else
879           SAL_INFO_IF(nError, "sal.osl",
880               "osl_getSystemPathFromFileURL: \"" << OUString(strURL) << "\" is not an absolute FileURL");
881 
882     }
883 
884     if ( osl_File_E_None == nError )
885         rtl_uString_assign( pustrPath, sTempPath.pData );
886 
887     SAL_INFO_IF(nError, "sal.osl",
888         "osl_getSystemPathFromFileURL: \"" << OUString(strURL) << "\" is not a FileURL");
889 
890     return nError;
891 }
892 
osl_getFileURLFromSystemPath(rtl_uString * strPath,rtl_uString ** pstrURL)893 oslFileError osl_getFileURLFromSystemPath( rtl_uString* strPath, rtl_uString** pstrURL )
894 {
895     oslFileError nError = osl_File_E_INVAL; /* Assume failure */
896     OUString sTempURL;
897     DWORD dwPathType = PATHTYPE_ERROR;
898 
899     if (strPath)
900         dwPathType = IsValidFilePath(strPath, VALIDATEPATH_ALLOW_RELATIVE, nullptr);
901 
902     if (dwPathType)
903     {
904         OUString sTempPath;
905         const OUString& sPath = OUString::unacquired(&strPath);
906 
907         if ( dwPathType & PATHTYPE_IS_LONGPATH )
908         {
909             /* the path has the longpath prefix, lets remove it */
910             switch ( dwPathType & PATHTYPE_MASK_TYPE )
911             {
912                 case PATHTYPE_ABSOLUTE_UNC:
913                     static_assert(SAL_N_ELEMENTS(WSTR_LONG_PATH_PREFIX_UNC) - 1 == 8,
914                                   "Unexpected long path UNC prefix!");
915 
916                     /* generate the normal UNC path */
917                     sTempPath = "\\\\" + sPath.copy(8).replace('\\', '/');
918                     break;
919 
920                 case PATHTYPE_ABSOLUTE_LOCAL:
921                     static_assert(SAL_N_ELEMENTS(WSTR_LONG_PATH_PREFIX) - 1 == 4,
922                                   "Unexpected long path prefix!");
923 
924                     /* generate the normal path */
925                     sTempPath = sPath.copy(4).replace('\\', '/');
926                     break;
927 
928                 default:
929                     OSL_FAIL( "Unexpected long path format!" );
930                     sTempPath = sPath.replace('\\', '/');
931                     break;
932             }
933         }
934         else
935         {
936             /* Replace backslashes */
937             sTempPath = sPath.replace('\\', '/');
938         }
939 
940         switch ( dwPathType & PATHTYPE_MASK_TYPE )
941         {
942         case PATHTYPE_RELATIVE:
943             sTempURL = sTempPath;
944             nError = osl_File_E_None;
945             break;
946         case PATHTYPE_ABSOLUTE_UNC:
947             sTempURL = "file:" + sTempPath;
948             nError = osl_File_E_None;
949             break;
950         case PATHTYPE_ABSOLUTE_LOCAL:
951             sTempURL = "file:///" + sTempPath;
952             nError = osl_File_E_None;
953             break;
954         default:
955             break;
956         }
957     }
958 
959     if ( osl_File_E_None == nError )
960     {
961         /* Encode the URL */
962         rtl_uString_assign(pstrURL, osl_encodeURL_(sTempURL).pData);
963         OSL_ASSERT(*pstrURL != nullptr);
964     }
965 
966     SAL_INFO_IF(nError, "sal.osl",
967         "osl_getFileURLFromSystemPath: \"" << OUString(strPath) << "\" is not a systemPath");
968     return nError;
969 }
970 
osl_getSystemPathFromFileURL(rtl_uString * ustrURL,rtl_uString ** pustrPath)971 oslFileError SAL_CALL osl_getSystemPathFromFileURL(
972     rtl_uString *ustrURL, rtl_uString **pustrPath)
973 {
974     return osl_getSystemPathFromFileURL_( ustrURL, pustrPath, true );
975 }
976 
osl_searchFileURL(rtl_uString * ustrFileName,rtl_uString * ustrSystemSearchPath,rtl_uString ** pustrPath)977 oslFileError SAL_CALL osl_searchFileURL(
978     rtl_uString *ustrFileName,
979     rtl_uString *ustrSystemSearchPath,
980     rtl_uString **pustrPath)
981 {
982     rtl_uString     *ustrUNCPath = nullptr;
983     rtl_uString     *ustrSysPath = nullptr;
984     oslFileError    error;
985 
986     /* First try to interpret the file name as a URL even a relative one */
987     error = osl_getSystemPathFromFileURL_( ustrFileName, &ustrUNCPath, true );
988 
989     /* So far we either have an UNC path or something invalid
990        Now create a system path */
991     if ( osl_File_E_None == error )
992         error = osl_getSystemPathFromFileURL_( ustrUNCPath, &ustrSysPath, true );
993 
994     if ( osl_File_E_None == error )
995     {
996         DWORD   nBufferLength;
997         DWORD   dwResult;
998         LPWSTR  lpBuffer = nullptr;
999         LPWSTR  lpszFilePart;
1000 
1001         /* Repeat calling SearchPath ...
1002            Start with MAX_PATH for the buffer. In most cases this
1003            will be enough and does not force the loop to run twice */
1004         dwResult = MAX_PATH;
1005 
1006         do
1007         {
1008             /* If search path is empty use a nullptr pointer instead according to MSDN documentation of SearchPath */
1009             LPCWSTR lpszSearchPath = ustrSystemSearchPath && ustrSystemSearchPath->length ? o3tl::toW(ustrSystemSearchPath->buffer) : nullptr;
1010             LPCWSTR lpszSearchFile = o3tl::toW(ustrSysPath->buffer);
1011 
1012             /* Allocate space for buffer according to previous returned count of required chars */
1013             /* +1 is not necessary if we follow MSDN documentation but for robustness we do so */
1014             nBufferLength = dwResult + 1;
1015             lpBuffer = lpBuffer ?
1016                 static_cast<LPWSTR>(realloc(lpBuffer, nBufferLength * sizeof(WCHAR))) :
1017                 static_cast<LPWSTR>(malloc(nBufferLength * sizeof(WCHAR)));
1018 
1019             dwResult = SearchPathW( lpszSearchPath, lpszSearchFile, nullptr, nBufferLength, lpBuffer, &lpszFilePart );
1020         } while ( dwResult && dwResult >= nBufferLength );
1021 
1022         /*  ... until an error occurs or buffer is large enough.
1023             dwResult == nBufferLength can not happen according to documentation but lets be robust ;-) */
1024 
1025         if ( dwResult )
1026         {
1027             rtl_uString_newFromStr( &ustrSysPath, o3tl::toU(lpBuffer) );
1028             error = osl_getFileURLFromSystemPath( ustrSysPath, pustrPath );
1029         }
1030         else
1031         {
1032             WIN32_FIND_DATAW aFindFileData;
1033             HANDLE  hFind;
1034 
1035             /* something went wrong, perhaps the path was absolute */
1036             error = oslTranslateFileError( GetLastError() );
1037 
1038             hFind = FindFirstFileW( o3tl::toW(ustrSysPath->buffer), &aFindFileData );
1039 
1040             if ( IsValidHandle(hFind) )
1041             {
1042                 error = osl_getFileURLFromSystemPath( ustrSysPath, pustrPath );
1043                 FindClose( hFind );
1044             }
1045         }
1046 
1047         free( lpBuffer );
1048     }
1049 
1050     if ( ustrSysPath )
1051         rtl_uString_release( ustrSysPath );
1052 
1053     if ( ustrUNCPath )
1054         rtl_uString_release( ustrUNCPath );
1055 
1056     return error;
1057 }
1058 
osl_getAbsoluteFileURL(rtl_uString * ustrBaseURL,rtl_uString * ustrRelativeURL,rtl_uString ** pustrAbsoluteURL)1059 oslFileError SAL_CALL osl_getAbsoluteFileURL( rtl_uString* ustrBaseURL, rtl_uString* ustrRelativeURL, rtl_uString** pustrAbsoluteURL )
1060 {
1061     oslFileError eError = osl_File_E_None;
1062     rtl_uString     *ustrRelSysPath = nullptr;
1063     rtl_uString     *ustrBaseSysPath = nullptr;
1064 
1065     if ( ustrBaseURL && ustrBaseURL->length )
1066     {
1067         eError = osl_getSystemPathFromFileURL_( ustrBaseURL, &ustrBaseSysPath, false );
1068         OSL_ENSURE( osl_File_E_None == eError, "osl_getAbsoluteFileURL called with relative or invalid base URL" );
1069     }
1070     if (eError == osl_File_E_None)
1071     {
1072         eError = osl_getSystemPathFromFileURL_(ustrRelativeURL, &ustrRelSysPath,
1073                                                ustrBaseSysPath != nullptr);
1074         OSL_ENSURE( osl_File_E_None == eError, "osl_getAbsoluteFileURL called with empty base URL and/or invalid relative URL" );
1075     }
1076 
1077     if ( !eError )
1078     {
1079         OUString sResultPath;
1080 /*@@@ToDo
1081   The whole FileURL implementation should be merged
1082   with the rtl/uri class.
1083 */
1084         // If ustrRelSysPath is absolute, we don't need ustrBaseSysPath.
1085         if (ustrBaseSysPath && !isAbsolute(ustrRelSysPath))
1086         {
1087             // ustrBaseSysPath is known here to be a valid absolute path -> its first two characters
1088             // are ASCII (either alpha + colon, or double backslashes)
1089 
1090             // Don't use SetCurrentDirectoryW together with GetFullPathNameW, because:
1091             // (a) it needs synchronization and may affect threads that may access relative paths;
1092             // (b) it would give wrong results for non-existing base path (allowed by RFC2396).
1093 
1094             if (startsWithDriveColon(ustrRelSysPath))
1095             {
1096                 // Special case: a path relative to a specific drive's current directory.
1097                 // Should we error out here?
1098 
1099                 // If ustrBaseSysPath is on the same drive as ustrRelSysPath, then take base path
1100                 // as is; otherwise, use current directory on ustrRelSysPath's drive as base path
1101                 if (onSameDrive(ustrRelSysPath, ustrBaseSysPath))
1102                 {
1103                     sResultPath = combinePath(OUString::unacquired(&ustrBaseSysPath),
1104                                               ustrRelSysPath->buffer + 2);
1105                 }
1106                 else
1107                 {
1108                     // Call GetFullPathNameW to get current directory on ustrRelSysPath's drive
1109                     wchar_t baseDrive[3] = { ustrRelSysPath->buffer[0], ':' }; // just "C:"
1110                     osl::LongPathBuffer<wchar_t> aBuf(MAX_LONG_PATH);
1111                     DWORD dwResult
1112                         = GetFullPathNameW(baseDrive, aBuf.getBufSizeInSymbols(), aBuf, nullptr);
1113                     if (dwResult)
1114                     {
1115                         if (dwResult >= aBuf.getBufSizeInSymbols())
1116                             eError = osl_File_E_INVAL;
1117                         else
1118                             sResultPath = combinePath(o3tl::toU(aBuf), ustrRelSysPath->buffer + 2);
1119                     }
1120                     else
1121                         eError = oslTranslateFileError(GetLastError());
1122                 }
1123             }
1124             else
1125             {
1126                 // Is this a rooted relative path (starting with a backslash)?
1127                 // Then we need only root from base. E.g.,
1128                 // ustrBaseSysPath is "\\server\share\path1\" and ustrRelSysPath is "\path2\to\file"
1129                 //   => \\server\share\path2\to\file
1130                 // ustrBaseSysPath is "D:\path1\" and ustrRelSysPath is "\path2\to\file"
1131                 //   => D:\path2\to\file
1132                 auto sBaseView(pathView(ustrBaseSysPath, ustrRelSysPath->buffer[0] == '\\'));
1133                 sResultPath = combinePath(sBaseView, ustrRelSysPath->buffer);
1134             }
1135         }
1136         else
1137             sResultPath = OUString::unacquired(&ustrRelSysPath);
1138 
1139         if (eError == osl_File_E_None)
1140         {
1141             sResultPath = removeRelativeParts(sResultPath);
1142             eError = osl_getFileURLFromSystemPath(sResultPath.pData, pustrAbsoluteURL);
1143         }
1144     }
1145 
1146     if ( ustrBaseSysPath )
1147         rtl_uString_release( ustrBaseSysPath );
1148 
1149     if ( ustrRelSysPath )
1150         rtl_uString_release( ustrRelSysPath );
1151 
1152     return  eError;
1153 }
1154 
osl_getCanonicalName(rtl_uString * strRequested,rtl_uString ** strValid)1155 oslFileError SAL_CALL osl_getCanonicalName( rtl_uString *strRequested, rtl_uString **strValid )
1156 {
1157     rtl_uString_newFromString(strValid, strRequested);
1158     return osl_File_E_None;
1159 }
1160 
1161 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
1162