1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1999-2013, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************/
10 
11 
12 /*----------------------------------------------------------------------------
13  *
14  *       Memory mapped file wrappers for use by the ICU Data Implementation
15  *       All of the platform-specific implementation for mapping data files
16  *         is here.  The rest of the ICU Data implementation uses only the
17  *         wrapper functions.
18  *
19  *----------------------------------------------------------------------------*/
20 /* Defines _XOPEN_SOURCE for access to POSIX functions.
21  * Must be before any other #includes. */
22 #include "uposixdefs.h"
23 
24 #include "unicode/putil.h"
25 #include "unicode/ustring.h"
26 #include "udatamem.h"
27 #include "umapfile.h"
28 
29 /* memory-mapping base definitions ------------------------------------------ */
30 
31 #if MAP_IMPLEMENTATION==MAP_WIN32
32 #ifndef WIN32_LEAN_AND_MEAN
33 #   define WIN32_LEAN_AND_MEAN
34 #endif
35 #   define VC_EXTRALEAN
36 #   define NOUSER
37 #   define NOSERVICE
38 #   define NOIME
39 #   define NOMCX
40 
41 #   if U_PLATFORM_HAS_WINUWP_API == 1
42         // Some previous versions of the Windows 10 SDK don't expose various APIs for UWP applications
43         // to use, even though UWP apps are allowed to call and use them.  Temporarily change the
44         // WINAPI family partition below to Desktop, so that function declarations are visible for UWP.
45 #       include <winapifamily.h>
46 #       if !(WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_SYSTEM))
47 #           pragma push_macro("WINAPI_PARTITION_DESKTOP")
48 #           undef WINAPI_PARTITION_DESKTOP
49 #           define WINAPI_PARTITION_DESKTOP 1
50 #           define CHANGED_WINAPI_PARTITION_DESKTOP_VALUE
51 #       endif
52 #   endif
53 
54 #   include <windows.h>
55 
56 #   if U_PLATFORM_HAS_WINUWP_API == 1 && defined(CHANGED_WINAPI_PARTITION_DESKTOP_VALUE)
57 #       pragma pop_macro("WINAPI_PARTITION_DESKTOP")
58 #   endif
59 
60 #   include "cmemory.h"
61 
62 typedef HANDLE MemoryMap;
63 
64 #   define IS_MAP(map) ((map)!=nullptr)
65 
66 #elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL
67     typedef size_t MemoryMap;
68 
69 #   define IS_MAP(map) ((map)!=0)
70 
71 #   include <unistd.h>
72 #   include <sys/mman.h>
73 #   include <sys/stat.h>
74 #   include <fcntl.h>
75 
76 #   ifndef MAP_FAILED
77 #       define MAP_FAILED ((void*)-1)
78 #   endif
79 
80 #   if MAP_IMPLEMENTATION==MAP_390DLL
81         /*   No memory mapping for 390 batch mode.  Fake it using dll loading.  */
82 #       include <dll.h>
83 #       include "cstring.h"
84 #       include "cmemory.h"
85 #       include "unicode/udata.h"
86 #       define LIB_PREFIX "lib"
87 #       define LIB_SUFFIX ".dll"
88         /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */
89 #       define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat"
90 #   endif
91 #elif MAP_IMPLEMENTATION==MAP_STDIO
92 #   include <stdio.h>
93 #   include "cmemory.h"
94 
95     typedef void *MemoryMap;
96 
97 #   define IS_MAP(map) ((map)!=nullptr)
98 #endif
99 
100 /*----------------------------------------------------------------------------*
101  *                                                                            *
102  *   Memory Mapped File support.  Platform dependent implementation of        *
103  *                           functions used by the rest of the implementation.*
104  *                                                                            *
105  *----------------------------------------------------------------------------*/
106 #if MAP_IMPLEMENTATION==MAP_NONE
107     U_CFUNC UBool
uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)108     uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
109         if (U_FAILURE(*status)) {
110             return FALSE;
111         }
112         UDataMemory_init(pData); /* Clear the output struct. */
113         return FALSE;            /* no file access */
114     }
115 
uprv_unmapFile(UDataMemory * pData)116     U_CFUNC void uprv_unmapFile(UDataMemory *pData) {
117         /* nothing to do */
118     }
119 #elif MAP_IMPLEMENTATION==MAP_WIN32
120     U_CFUNC UBool
uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)121     uprv_mapFile(
122          UDataMemory *pData,    /* Fill in with info on the result doing the mapping. */
123                                 /*   Output only; any original contents are cleared.  */
124          const char *path,      /* File path to be opened/mapped.                     */
125          UErrorCode *status     /* Error status, used to report out-of-memory errors. */
126          )
127     {
128         if (U_FAILURE(*status)) {
129             return FALSE;
130         }
131 
132         HANDLE map = nullptr;
133         HANDLE file = INVALID_HANDLE_VALUE;
134 
135         UDataMemory_init(pData); /* Clear the output struct.        */
136 
137         /* open the input file */
138 #if U_PLATFORM_HAS_WINUWP_API == 0
139         // Note: In the non-UWP code-path (ie: Win32), the value of the path variable might have come from
140         // the CRT 'getenv' function, and would be therefore be encoded in the default ANSI code page.
141         // This means that we can't call the *W version of API below, whereas in the UWP code-path
142         // there is no 'getenv' call, and thus the string will be only UTF-8/Invariant characters.
143         file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr,
144             OPEN_EXISTING,
145             FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, nullptr);
146 #else
147         // Convert from UTF-8 string to UTF-16 string.
148         wchar_t utf16Path[MAX_PATH];
149         int32_t pathUtf16Len = 0;
150         u_strFromUTF8(reinterpret_cast<UChar*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status);
151 
152         if (U_FAILURE(*status)) {
153             return FALSE;
154         }
155         if (*status == U_STRING_NOT_TERMINATED_WARNING) {
156             // Report back an error instead of a warning.
157             *status = U_BUFFER_OVERFLOW_ERROR;
158             return FALSE;
159         }
160 
161         file = CreateFileW(utf16Path, GENERIC_READ, FILE_SHARE_READ, nullptr,
162             OPEN_EXISTING,
163             FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, nullptr);
164 #endif
165         if (file == INVALID_HANDLE_VALUE) {
166             // If we failed to open the file due to an out-of-memory error, then we want
167             // to report that error back to the caller.
168             if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
169                 *status = U_MEMORY_ALLOCATION_ERROR;
170             }
171             return FALSE;
172         }
173 
174         // Note: We use NULL/nullptr for lpAttributes parameter below.
175         // This means our handle cannot be inherited and we will get the default security descriptor.
176         /* create an unnamed Windows file-mapping object for the specified file */
177         map = CreateFileMappingW(file, nullptr, PAGE_READONLY, 0, 0, nullptr);
178 
179         CloseHandle(file);
180         if (map == nullptr) {
181             // If we failed to create the mapping due to an out-of-memory error, then
182             // we want to report that error back to the caller.
183             if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
184                 *status = U_MEMORY_ALLOCATION_ERROR;
185             }
186             return FALSE;
187         }
188 
189         /* map a view of the file into our address space */
190         pData->pHeader = reinterpret_cast<const DataHeader *>(MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0));
191         if (pData->pHeader == nullptr) {
192             CloseHandle(map);
193             return FALSE;
194         }
195         pData->map = map;
196         return TRUE;
197     }
198 
199     U_CFUNC void
uprv_unmapFile(UDataMemory * pData)200     uprv_unmapFile(UDataMemory *pData) {
201         if (pData != nullptr && pData->map != nullptr) {
202             UnmapViewOfFile(pData->pHeader);
203             CloseHandle(pData->map);
204             pData->pHeader = nullptr;
205             pData->map = nullptr;
206         }
207     }
208 
209 
210 
211 #elif MAP_IMPLEMENTATION==MAP_POSIX
212     U_CFUNC UBool
uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)213     uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
214         int fd;
215         int length;
216         struct stat mystat;
217         void *data;
218 
219         if (U_FAILURE(*status)) {
220             return FALSE;
221         }
222 
223         UDataMemory_init(pData); /* Clear the output struct.        */
224 
225         /* determine the length of the file */
226         if(stat(path, &mystat)!=0 || mystat.st_size<=0) {
227             return FALSE;
228         }
229         length=mystat.st_size;
230 
231         /* open the file */
232         fd=open(path, O_RDONLY);
233         if(fd==-1) {
234             return FALSE;
235         }
236 
237         /* get a view of the mapping */
238 #if U_PLATFORM != U_PF_HPUX
239         data=mmap(0, length, PROT_READ, MAP_SHARED,  fd, 0);
240 #else
241         data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
242 #endif
243         close(fd); /* no longer needed */
244         if(data==MAP_FAILED) {
245             // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
246             return FALSE;
247         }
248 
249         pData->map = (char *)data + length;
250         pData->pHeader=(const DataHeader *)data;
251         pData->mapAddr = data;
252 #if U_PLATFORM == U_PF_IPHONE
253         posix_madvise(data, length, POSIX_MADV_RANDOM);
254 #endif
255         return TRUE;
256     }
257 
258     U_CFUNC void
uprv_unmapFile(UDataMemory * pData)259     uprv_unmapFile(UDataMemory *pData) {
260         if(pData!=nullptr && pData->map!=nullptr) {
261             size_t dataLen = (char *)pData->map - (char *)pData->mapAddr;
262             if(munmap(pData->mapAddr, dataLen)==-1) {
263             }
264             pData->pHeader=nullptr;
265             pData->map=0;
266             pData->mapAddr=nullptr;
267         }
268     }
269 
270 
271 
272 #elif MAP_IMPLEMENTATION==MAP_STDIO
273     /* copy of the filestrm.c/T_FileStream_size() implementation */
274     static int32_t
umap_fsize(FILE * f)275     umap_fsize(FILE *f) {
276         int32_t savedPos = ftell(f);
277         int32_t size = 0;
278 
279         /*Changes by Bertrand A. D. doesn't affect the current position
280         goes to the end of the file before ftell*/
281         fseek(f, 0, SEEK_END);
282         size = (int32_t)ftell(f);
283         fseek(f, savedPos, SEEK_SET);
284         return size;
285     }
286 
287     U_CFUNC UBool
uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)288     uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
289         FILE *file;
290         int32_t fileLength;
291         void *p;
292 
293         if (U_FAILURE(*status)) {
294             return FALSE;
295         }
296 
297         UDataMemory_init(pData); /* Clear the output struct.        */
298         /* open the input file */
299         file=fopen(path, "rb");
300         if(file==nullptr) {
301             return FALSE;
302         }
303 
304         /* get the file length */
305         fileLength=umap_fsize(file);
306         if(ferror(file) || fileLength<=20) {
307             fclose(file);
308             return FALSE;
309         }
310 
311         /* allocate the memory to hold the file data */
312         p=uprv_malloc(fileLength);
313         if(p==nullptr) {
314             fclose(file);
315             *status = U_MEMORY_ALLOCATION_ERROR;
316             return FALSE;
317         }
318 
319         /* read the file */
320         if(fileLength!=fread(p, 1, fileLength, file)) {
321             uprv_free(p);
322             fclose(file);
323             return FALSE;
324         }
325 
326         fclose(file);
327         pData->map=p;
328         pData->pHeader=(const DataHeader *)p;
329         pData->mapAddr=p;
330         return TRUE;
331     }
332 
333     U_CFUNC void
uprv_unmapFile(UDataMemory * pData)334     uprv_unmapFile(UDataMemory *pData) {
335         if(pData!=nullptr && pData->map!=nullptr) {
336             uprv_free(pData->map);
337             pData->map     = nullptr;
338             pData->mapAddr = nullptr;
339             pData->pHeader = nullptr;
340         }
341     }
342 
343 
344 #elif MAP_IMPLEMENTATION==MAP_390DLL
345     /*  390 specific Library Loading.
346      *  This is the only platform left that dynamically loads an ICU Data Library.
347      *  All other platforms use .data files when dynamic loading is required, but
348      *  this turn out to be awkward to support in 390 batch mode.
349      *
350      *  The idea here is to hide the fact that 390 is using dll loading from the
351      *   rest of ICU, and make it look like there is file loading happening.
352      *
353      */
354 
strcpy_returnEnd(char * dest,const char * src)355     static char *strcpy_returnEnd(char *dest, const char *src)
356     {
357         while((*dest=*src)!=0) {
358             ++dest;
359             ++src;
360         }
361         return dest;
362     }
363 
364     /*------------------------------------------------------------------------------
365      *
366      *  computeDirPath   given a user-supplied path of an item to be opened,
367      *                         compute and return
368      *                            - the full directory path to be used
369      *                              when opening the file.
370      *                            - Pointer to null at end of above returned path
371      *
372      *                       Parameters:
373      *                          path:        input path.  Buffer is not altered.
374      *                          pathBuffer:  Output buffer.  Any contents are overwritten.
375      *
376      *                       Returns:
377      *                          Pointer to null termination in returned pathBuffer.
378      *
379      *                    TODO:  This works the way ICU historically has, but the
380      *                           whole data fallback search path is so complicated that
381      *                           probably almost no one will ever really understand it,
382      *                           the potential for confusion is large.  (It's not just
383      *                           this one function, but the whole scheme.)
384      *
385      *------------------------------------------------------------------------------*/
uprv_computeDirPath(const char * path,char * pathBuffer)386     static char *uprv_computeDirPath(const char *path, char *pathBuffer)
387     {
388         char   *finalSlash;       /* Ptr to last dir separator in input path, or null if none. */
389         int32_t pathLen;          /* Length of the returned directory path                     */
390 
391         finalSlash = 0;
392         if (path != 0) {
393             finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR);
394         }
395 
396         *pathBuffer = 0;
397         if (finalSlash == 0) {
398         /* No user-supplied path.
399             * Copy the ICU_DATA path to the path buffer and return that*/
400             const char *icuDataDir;
401             icuDataDir=u_getDataDirectory();
402             if(icuDataDir!=nullptr && *icuDataDir!=0) {
403                 return strcpy_returnEnd(pathBuffer, icuDataDir);
404             } else {
405                 /* there is no icuDataDir either.  Just return the empty pathBuffer. */
406                 return pathBuffer;
407             }
408         }
409 
410         /* User supplied path did contain a directory portion.
411         * Copy it to the output path buffer */
412         pathLen = (int32_t)(finalSlash - path + 1);
413         uprv_memcpy(pathBuffer, path, pathLen);
414         *(pathBuffer+pathLen) = 0;
415         return pathBuffer+pathLen;
416     }
417 
418 
419 #   define DATA_TYPE "dat"
420 
uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)421     U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
422         const char *inBasename;
423         char *basename;
424         char pathBuffer[1024];
425         const DataHeader *pHeader;
426         dllhandle *handle;
427         void *val=0;
428 
429         if (U_FAILURE(*status)) {
430             return FALSE;
431         }
432 
433         inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR);
434         if(inBasename==nullptr) {
435             inBasename = path;
436         } else {
437             inBasename++;
438         }
439         basename=uprv_computeDirPath(path, pathBuffer);
440         if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) {
441             /* must mmap file... for build */
442             int fd;
443             int length;
444             struct stat mystat;
445             void *data;
446             UDataMemory_init(pData); /* Clear the output struct. */
447 
448             /* determine the length of the file */
449             if(stat(path, &mystat)!=0 || mystat.st_size<=0) {
450                 return FALSE;
451             }
452             length=mystat.st_size;
453 
454             /* open the file */
455             fd=open(path, O_RDONLY);
456             if(fd==-1) {
457                 return FALSE;
458             }
459 
460             /* get a view of the mapping */
461             data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
462             close(fd); /* no longer needed */
463             if(data==MAP_FAILED) {
464                 // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
465                 return FALSE;
466             }
467             pData->map = (char *)data + length;
468             pData->pHeader=(const DataHeader *)data;
469             pData->mapAddr = data;
470             return TRUE;
471         }
472 
473 #       ifdef OS390BATCH
474             /* ### hack: we still need to get u_getDataDirectory() fixed
475             for OS/390 (batch mode - always return "//"? )
476             and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!)
477             This is probably due to the strange file system on OS/390.  It's more like
478             a database with short entry names than a typical file system. */
479             /* U_ICUDATA_NAME should always have the correct name */
480             /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */
481             /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */
482             /* PROJECT!!!!! */
483             uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA");
484 #       else
485             /* set up the library name */
486             uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX);
487 #       endif
488 
489 #       ifdef UDATA_DEBUG
490              fprintf(stderr, "dllload: %s ", pathBuffer);
491 #       endif
492 
493         handle=dllload(pathBuffer);
494 
495 #       ifdef UDATA_DEBUG
496                fprintf(stderr, " -> %08X\n", handle );
497 #       endif
498 
499         if(handle != nullptr) {
500                /* we have a data DLL - what kind of lookup do we need here? */
501                /* try to find the Table of Contents */
502                UDataMemory_init(pData); /* Clear the output struct.        */
503                val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME);
504                if(val == 0) {
505                     /* failed... so keep looking */
506                     return FALSE;
507                }
508 #              ifdef UDATA_DEBUG
509                     fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val);
510 #              endif
511 
512                pData->pHeader=(const DataHeader *)val;
513                return TRUE;
514          } else {
515                return FALSE; /* no handle */
516          }
517     }
518 
uprv_unmapFile(UDataMemory * pData)519     U_CFUNC void uprv_unmapFile(UDataMemory *pData) {
520         if(pData!=nullptr && pData->map!=nullptr) {
521             uprv_free(pData->map);
522             pData->map     = nullptr;
523             pData->mapAddr = nullptr;
524             pData->pHeader = nullptr;
525         }
526     }
527 
528 #else
529 #   error MAP_IMPLEMENTATION is set incorrectly
530 #endif
531