1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1999-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *   file name:  udata.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 1999oct25
16 *   created by: Markus W. Scherer
17 */
18 
19 #include "unicode/utypes.h"  /* U_PLATFORM etc. */
20 
21 #ifdef __GNUC__
22 /* if gcc
23 #define ATTRIBUTE_WEAK __attribute__ ((weak))
24 might have to #include some other header
25 */
26 #endif
27 
28 #include "unicode/putil.h"
29 #include "unicode/udata.h"
30 #include "unicode/uversion.h"
31 #include "charstr.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "mutex.h"
35 #include "putilimp.h"
36 #include "restrace.h"
37 #include "uassert.h"
38 #include "ucln_cmn.h"
39 #include "ucmndata.h"
40 #include "udatamem.h"
41 #include "uhash.h"
42 #include "umapfile.h"
43 #include "umutex.h"
44 
45 /***********************************************************************
46 *
47 *   Notes on the organization of the ICU data implementation
48 *
49 *      All of the public API is defined in udata.h
50 *
51 *      The implementation is split into several files...
52 *
53 *         - udata.c  (this file) contains higher level code that knows about
54 *                     the search paths for locating data, caching opened data, etc.
55 *
56 *         - umapfile.c  contains the low level platform-specific code for actually loading
57 *                     (memory mapping, file reading, whatever) data into memory.
58 *
59 *         - ucmndata.c  deals with the tables of contents of ICU data items within
60 *                     an ICU common format data file.  The implementation includes
61 *                     an abstract interface and support for multiple TOC formats.
62 *                     All knowledge of any specific TOC format is encapsulated here.
63 *
64 *         - udatamem.c has code for managing UDataMemory structs.  These are little
65 *                     descriptor objects for blocks of memory holding ICU data of
66 *                     various types.
67 */
68 
69 /* configuration ---------------------------------------------------------- */
70 
71 /* If you are excruciatingly bored turn this on .. */
72 /* #define UDATA_DEBUG 1 */
73 
74 #if defined(UDATA_DEBUG)
75 #   include <stdio.h>
76 #endif
77 
78 U_NAMESPACE_USE
79 
80 /*
81  *  Forward declarations
82  */
83 static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err);
84 
85 /***********************************************************************
86 *
87 *    static (Global) data
88 *
89 ************************************************************************/
90 
91 /*
92  * Pointers to the common ICU data.
93  *
94  * We store multiple pointers to ICU data packages and iterate through them
95  * when looking for a data item.
96  *
97  * It is possible to combine this with dependency inversion:
98  * One or more data package libraries may export
99  * functions that each return a pointer to their piece of the ICU data,
100  * and this file would import them as weak functions, without a
101  * strong linker dependency from the common library on the data library.
102  *
103  * Then we can have applications depend on only that part of ICU's data
104  * that they really need, reducing the size of binaries that take advantage
105  * of this.
106  */
107 static UDataMemory *gCommonICUDataArray[10] = { NULL };   // Access protected by icu global mutex.
108 
109 static u_atomic_int32_t gHaveTriedToLoadCommonData = ATOMIC_INT32_T_INITIALIZER(0);  //  See extendICUData().
110 
111 static UHashtable  *gCommonDataCache = NULL;  /* Global hash table of opened ICU data files.  */
112 static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER;
113 
114 #if !defined(ICU_DATA_DIR_WINDOWS)
115 static UDataFileAccess  gDataFileAccess = UDATA_DEFAULT_ACCESS;  // Access not synchronized.
116                                                                  // Modifying is documented as thread-unsafe.
117 #else
118 // If we are using the Windows data directory, then look in one spot only.
119 static UDataFileAccess  gDataFileAccess = UDATA_NO_FILES;
120 #endif
121 
122 static UBool U_CALLCONV
udata_cleanup(void)123 udata_cleanup(void)
124 {
125     int32_t i;
126 
127     if (gCommonDataCache) {             /* Delete the cache of user data mappings.  */
128         uhash_close(gCommonDataCache);  /*   Table owns the contents, and will delete them. */
129         gCommonDataCache = NULL;        /*   Cleanup is not thread safe.                */
130     }
131     gCommonDataCacheInitOnce.reset();
132 
133     for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray) && gCommonICUDataArray[i] != NULL; ++i) {
134         udata_close(gCommonICUDataArray[i]);
135         gCommonICUDataArray[i] = NULL;
136     }
137     gHaveTriedToLoadCommonData = 0;
138 
139     return TRUE;                   /* Everything was cleaned up */
140 }
141 
142 static UBool U_CALLCONV
findCommonICUDataByName(const char * inBasename,UErrorCode & err)143 findCommonICUDataByName(const char *inBasename, UErrorCode &err)
144 {
145     UBool found = FALSE;
146     int32_t i;
147 
148     UDataMemory  *pData = udata_findCachedData(inBasename, err);
149     if (U_FAILURE(err) || pData == NULL)
150         return FALSE;
151 
152     {
153         Mutex lock;
154         for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
155             if ((gCommonICUDataArray[i] != NULL) && (gCommonICUDataArray[i]->pHeader == pData->pHeader)) {
156                 /* The data pointer is already in the array. */
157                 found = TRUE;
158                 break;
159             }
160         }
161     }
162     return found;
163 }
164 
165 
166 /*
167  * setCommonICUData.   Set a UDataMemory to be the global ICU Data
168  */
169 static UBool
setCommonICUData(UDataMemory * pData,UBool warn,UErrorCode * pErr)170 setCommonICUData(UDataMemory *pData,     /*  The new common data.  Belongs to caller, we copy it. */
171                  UBool       warn,       /*  If true, set USING_DEFAULT warning if ICUData was    */
172                                          /*    changed by another thread before we got to it.     */
173                  UErrorCode *pErr)
174 {
175     UDataMemory  *newCommonData = UDataMemory_createNewInstance(pErr);
176     int32_t i;
177     UBool didUpdate = FALSE;
178     if (U_FAILURE(*pErr)) {
179         return FALSE;
180     }
181 
182     /*  For the assignment, other threads must cleanly see either the old            */
183     /*    or the new, not some partially initialized new.  The old can not be        */
184     /*    deleted - someone may still have a pointer to it lying around in           */
185     /*    their locals.                                                              */
186     UDatamemory_assign(newCommonData, pData);
187     umtx_lock(NULL);
188     for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) {
189         if (gCommonICUDataArray[i] == NULL) {
190             gCommonICUDataArray[i] = newCommonData;
191             didUpdate = TRUE;
192             break;
193         } else if (gCommonICUDataArray[i]->pHeader == pData->pHeader) {
194             /* The same data pointer is already in the array. */
195             break;
196         }
197     }
198     umtx_unlock(NULL);
199 
200     if (i == UPRV_LENGTHOF(gCommonICUDataArray) && warn) {
201         *pErr = U_USING_DEFAULT_WARNING;
202     }
203     if (didUpdate) {
204         ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
205     } else {
206         uprv_free(newCommonData);
207     }
208     return didUpdate;
209 }
210 
211 #if !defined(ICU_DATA_DIR_WINDOWS)
212 
213 static UBool
setCommonICUDataPointer(const void * pData,UBool,UErrorCode * pErrorCode)214 setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) {
215     UDataMemory tData;
216     UDataMemory_init(&tData);
217     UDataMemory_setData(&tData, pData);
218     udata_checkCommonData(&tData, pErrorCode);
219     return setCommonICUData(&tData, FALSE, pErrorCode);
220 }
221 
222 #endif
223 
224 static const char *
findBasename(const char * path)225 findBasename(const char *path) {
226     const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
227     if(basename==NULL) {
228         return path;
229     } else {
230         return basename+1;
231     }
232 }
233 
234 #ifdef UDATA_DEBUG
235 static const char *
packageNameFromPath(const char * path)236 packageNameFromPath(const char *path)
237 {
238     if((path == NULL) || (*path == 0)) {
239         return U_ICUDATA_NAME;
240     }
241 
242     path = findBasename(path);
243 
244     if((path == NULL) || (*path == 0)) {
245         return U_ICUDATA_NAME;
246     }
247 
248     return path;
249 }
250 #endif
251 
252 /*----------------------------------------------------------------------*
253  *                                                                      *
254  *   Cache for common data                                              *
255  *      Functions for looking up or adding entries to a cache of        *
256  *      data that has been previously opened.  Avoids a potentially     *
257  *      expensive operation of re-opening the data for subsequent       *
258  *      uses.                                                           *
259  *                                                                      *
260  *      Data remains cached for the duration of the process.            *
261  *                                                                      *
262  *----------------------------------------------------------------------*/
263 
264 typedef struct DataCacheElement {
265     char          *name;
266     UDataMemory   *item;
267 } DataCacheElement;
268 
269 
270 
271 /*
272  * Deleter function for DataCacheElements.
273  *         udata cleanup function closes the hash table; hash table in turn calls back to
274  *         here for each entry.
275  */
DataCacheElement_deleter(void * pDCEl)276 static void U_CALLCONV DataCacheElement_deleter(void *pDCEl) {
277     DataCacheElement *p = (DataCacheElement *)pDCEl;
278     udata_close(p->item);              /* unmaps storage */
279     uprv_free(p->name);                /* delete the hash key string. */
280     uprv_free(pDCEl);                  /* delete 'this'          */
281 }
282 
udata_initHashTable(UErrorCode & err)283 static void U_CALLCONV udata_initHashTable(UErrorCode &err) {
284     U_ASSERT(gCommonDataCache == NULL);
285     gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &err);
286     if (U_FAILURE(err)) {
287        return;
288     }
289     U_ASSERT(gCommonDataCache != NULL);
290     uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter);
291     ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup);
292 }
293 
294  /*   udata_getCacheHashTable()
295   *     Get the hash table used to store the data cache entries.
296   *     Lazy create it if it doesn't yet exist.
297   */
udata_getHashTable(UErrorCode & err)298 static UHashtable *udata_getHashTable(UErrorCode &err) {
299     umtx_initOnce(gCommonDataCacheInitOnce, &udata_initHashTable, err);
300     return gCommonDataCache;
301 }
302 
303 
304 
udata_findCachedData(const char * path,UErrorCode & err)305 static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err)
306 {
307     UHashtable        *htable;
308     UDataMemory       *retVal = NULL;
309     DataCacheElement  *el;
310     const char        *baseName;
311 
312     htable = udata_getHashTable(err);
313     if (U_FAILURE(err)) {
314         return NULL;
315     }
316 
317     baseName = findBasename(path);   /* Cache remembers only the base name, not the full path. */
318     umtx_lock(NULL);
319     el = (DataCacheElement *)uhash_get(htable, baseName);
320     umtx_unlock(NULL);
321     if (el != NULL) {
322         retVal = el->item;
323     }
324 #ifdef UDATA_DEBUG
325     fprintf(stderr, "Cache: [%s] -> %p\n", baseName, (void*) retVal);
326 #endif
327     return retVal;
328 }
329 
330 
udata_cacheDataItem(const char * path,UDataMemory * item,UErrorCode * pErr)331 static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) {
332     DataCacheElement *newElement;
333     const char       *baseName;
334     int32_t           nameLen;
335     UHashtable       *htable;
336     DataCacheElement *oldValue = NULL;
337     UErrorCode        subErr = U_ZERO_ERROR;
338 
339     htable = udata_getHashTable(*pErr);
340     if (U_FAILURE(*pErr)) {
341         return NULL;
342     }
343 
344     /* Create a new DataCacheElement - the thingy we store in the hash table -
345      * and copy the supplied path and UDataMemoryItems into it.
346      */
347     newElement = (DataCacheElement *)uprv_malloc(sizeof(DataCacheElement));
348     if (newElement == NULL) {
349         *pErr = U_MEMORY_ALLOCATION_ERROR;
350         return NULL;
351     }
352     newElement->item = UDataMemory_createNewInstance(pErr);
353     if (U_FAILURE(*pErr)) {
354         uprv_free(newElement);
355         return NULL;
356     }
357     UDatamemory_assign(newElement->item, item);
358 
359     baseName = findBasename(path);
360     nameLen = (int32_t)uprv_strlen(baseName);
361     newElement->name = (char *)uprv_malloc(nameLen+1);
362     if (newElement->name == NULL) {
363         *pErr = U_MEMORY_ALLOCATION_ERROR;
364         uprv_free(newElement->item);
365         uprv_free(newElement);
366         return NULL;
367     }
368     uprv_strcpy(newElement->name, baseName);
369 
370     /* Stick the new DataCacheElement into the hash table.
371     */
372     umtx_lock(NULL);
373     oldValue = (DataCacheElement *)uhash_get(htable, path);
374     if (oldValue != NULL) {
375         subErr = U_USING_DEFAULT_WARNING;
376     }
377     else {
378         uhash_put(
379             htable,
380             newElement->name,               /* Key   */
381             newElement,                     /* Value */
382             &subErr);
383     }
384     umtx_unlock(NULL);
385 
386 #ifdef UDATA_DEBUG
387     fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name,
388     (void*) newElement->item, u_errorName(subErr), (void*) newElement->item->vFuncs);
389 #endif
390 
391     if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) {
392         *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */
393         uprv_free(newElement->name);
394         uprv_free(newElement->item);
395         uprv_free(newElement);
396         return oldValue ? oldValue->item : NULL;
397     }
398 
399     return newElement->item;
400 }
401 
402 /*----------------------------------------------------------------------*==============
403  *                                                                      *
404  *  Path management.  Could be shared with other tools/etc if need be   *
405  * later on.                                                            *
406  *                                                                      *
407  *----------------------------------------------------------------------*/
408 
409 U_NAMESPACE_BEGIN
410 
411 class UDataPathIterator
412 {
413 public:
414     UDataPathIterator(const char *path, const char *pkg,
415                       const char *item, const char *suffix, UBool doCheckLastFour,
416                       UErrorCode *pErrorCode);
417     const char *next(UErrorCode *pErrorCode);
418 
419 private:
420     const char *path;                              /* working path (u_icudata_Dir) */
421     const char *nextPath;                          /* path following this one */
422     const char *basename;                          /* item's basename (icudt22e_mt.res)*/
423 
424     StringPiece suffix;                            /* item suffix (can be null) */
425 
426     uint32_t    basenameLen;                       /* length of basename */
427 
428     CharString  itemPath;                          /* path passed in with item name */
429     CharString  pathBuffer;                        /* output path for this it'ion */
430     CharString  packageStub;                       /* example:  "/icudt28b". Will ignore that leaf in set paths. */
431 
432     UBool       checkLastFour;                     /* if TRUE then allow paths such as '/foo/myapp.dat'
433                                                     * to match, checks last 4 chars of suffix with
434                                                     * last 4 of path, then previous chars. */
435 };
436 
437 /**
438  * @param iter    The iterator to be initialized. Its current state does not matter.
439  * @param inPath  The full pathname to be iterated over.  If NULL, defaults to U_ICUDATA_NAME
440  * @param pkg     Package which is being searched for, ex "icudt28l".  Will ignore leaf directories such as /icudt28l
441  * @param item    Item to be searched for.  Can include full path, such as /a/b/foo.dat
442  * @param inSuffix  Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly.
443  *             Ex:   'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2.
444  *                   '/blarg/stuff.dat' would also be found.
445  *  Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case
446  *        the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr").
447  */
UDataPathIterator(const char * inPath,const char * pkg,const char * item,const char * inSuffix,UBool doCheckLastFour,UErrorCode * pErrorCode)448 UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg,
449                                      const char *item, const char *inSuffix, UBool doCheckLastFour,
450                                      UErrorCode *pErrorCode)
451 {
452 #ifdef UDATA_DEBUG
453         fprintf(stderr, "SUFFIX1=%s PATH=%s\n", inSuffix, inPath);
454 #endif
455     /** Path **/
456     if(inPath == NULL) {
457         path = u_getDataDirectory();
458     } else {
459         path = inPath;
460     }
461 
462     /** Package **/
463     if(pkg != NULL) {
464       packageStub.append(U_FILE_SEP_CHAR, *pErrorCode).append(pkg, *pErrorCode);
465 #ifdef UDATA_DEBUG
466       fprintf(stderr, "STUB=%s [%d]\n", packageStub.data(), packageStub.length());
467 #endif
468     }
469 
470     /** Item **/
471     basename = findBasename(item);
472     basenameLen = (int32_t)uprv_strlen(basename);
473 
474     /** Item path **/
475     if(basename == item) {
476         nextPath = path;
477     } else {
478         itemPath.append(item, (int32_t)(basename-item), *pErrorCode);
479         nextPath = itemPath.data();
480     }
481 #ifdef UDATA_DEBUG
482     fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, (void*) inSuffix);
483 #endif
484 
485     /** Suffix  **/
486     if(inSuffix != NULL) {
487         suffix = inSuffix;
488     } else {
489         suffix = "";
490     }
491 
492     checkLastFour = doCheckLastFour;
493 
494     /* pathBuffer will hold the output path strings returned by this iterator */
495 
496 #ifdef UDATA_DEBUG
497     fprintf(stderr, "0: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n",
498             item,
499             path,
500             basename,
501             suffix.data(),
502             itemPath.data(),
503             nextPath,
504             checkLastFour?"TRUE":"false");
505 #endif
506 }
507 
508 /**
509  * Get the next path on the list.
510  *
511  * @param iter The Iter to be used
512  * @param len  If set, pointer to the length of the returned path, for convenience.
513  * @return Pointer to the next path segment, or NULL if there are no more.
514  */
next(UErrorCode * pErrorCode)515 const char *UDataPathIterator::next(UErrorCode *pErrorCode)
516 {
517     if(U_FAILURE(*pErrorCode)) {
518         return NULL;
519     }
520 
521     const char *currentPath = NULL;
522     int32_t     pathLen = 0;
523     const char *pathBasename;
524 
525     do
526     {
527         if( nextPath == NULL ) {
528             break;
529         }
530         currentPath = nextPath;
531 
532         if(nextPath == itemPath.data()) { /* we were processing item's path. */
533             nextPath = path; /* start with regular path next tm. */
534             pathLen = (int32_t)uprv_strlen(currentPath);
535         } else {
536             /* fix up next for next time */
537             nextPath = uprv_strchr(currentPath, U_PATH_SEP_CHAR);
538             if(nextPath == NULL) {
539                 /* segment: entire path */
540                 pathLen = (int32_t)uprv_strlen(currentPath);
541             } else {
542                 /* segment: until next segment */
543                 pathLen = (int32_t)(nextPath - currentPath);
544                 /* skip divider */
545                 nextPath ++;
546             }
547         }
548 
549         if(pathLen == 0) {
550             continue;
551         }
552 
553 #ifdef UDATA_DEBUG
554         fprintf(stderr, "rest of path (IDD) = %s\n", currentPath);
555         fprintf(stderr, "                     ");
556         {
557             int32_t qqq;
558             for(qqq=0;qqq<pathLen;qqq++)
559             {
560                 fprintf(stderr, " ");
561             }
562 
563             fprintf(stderr, "^\n");
564         }
565 #endif
566         pathBuffer.clear().append(currentPath, pathLen, *pErrorCode);
567 
568         /* check for .dat files */
569         pathBasename = findBasename(pathBuffer.data());
570 
571         if(checkLastFour == TRUE &&
572            (pathLen>=4) &&
573            uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */
574            uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0  && /* base matches */
575            uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */
576 
577 #ifdef UDATA_DEBUG
578             fprintf(stderr, "Have %s file on the path: %s\n", suffix.data(), pathBuffer.data());
579 #endif
580             /* do nothing */
581         }
582         else
583         {       /* regular dir path */
584             if(pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) {
585                 if((pathLen>=4) &&
586                    uprv_strncmp(pathBuffer.data()+(pathLen-4), ".dat", 4) == 0)
587                 {
588 #ifdef UDATA_DEBUG
589                     fprintf(stderr, "skipping non-directory .dat file %s\n", pathBuffer.data());
590 #endif
591                     continue;
592                 }
593 
594                 /* Check if it is a directory with the same name as our package */
595                 if(!packageStub.isEmpty() &&
596                    (pathLen > packageStub.length()) &&
597                    !uprv_strcmp(pathBuffer.data() + pathLen - packageStub.length(), packageStub.data())) {
598 #ifdef UDATA_DEBUG
599                   fprintf(stderr, "Found stub %s (will add package %s of len %d)\n", packageStub.data(), basename, basenameLen);
600 #endif
601                   pathBuffer.truncate(pathLen - packageStub.length());
602                 }
603                 pathBuffer.append(U_FILE_SEP_CHAR, *pErrorCode);
604             }
605 
606             /* + basename */
607             pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode);
608 
609             if (!suffix.empty())  /* tack on suffix */
610             {
611                 if (suffix.length() > 4) {
612                     // If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res")
613                     // then we need to ensure that the path ends with a separator.
614                     pathBuffer.ensureEndsWithFileSeparator(*pErrorCode);
615                 }
616                 pathBuffer.append(suffix, *pErrorCode);
617             }
618         }
619 
620 #ifdef UDATA_DEBUG
621         fprintf(stderr, " -->  %s\n", pathBuffer.data());
622 #endif
623 
624         return pathBuffer.data();
625 
626     } while(path);
627 
628     /* fell way off the end */
629     return NULL;
630 }
631 
632 U_NAMESPACE_END
633 
634 /* ==================================================================================*/
635 
636 
637 /*----------------------------------------------------------------------*
638  *                                                                      *
639  *  Add a static reference to the common data library                   *
640  *   Unless overridden by an explicit udata_setCommonData, this will be *
641  *      our common data.                                                *
642  *                                                                      *
643  *----------------------------------------------------------------------*/
644 #if !defined(ICU_DATA_DIR_WINDOWS)
645 // When using the Windows system data, we expect only a single data file.
646 extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT;
647 #endif
648 
649 /*
650  * This would be a good place for weak-linkage declarations of
651  * partial-data-library access functions where each returns a pointer
652  * to its data package, if it is linked in.
653  */
654 /*
655 extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK;
656 extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK;
657 */
658 
659 /*----------------------------------------------------------------------*
660  *                                                                      *
661  *   openCommonData   Attempt to open a common format (.dat) file       *
662  *                    Map it into memory (if it's not there already)    *
663  *                    and return a UDataMemory object for it.           *
664  *                                                                      *
665  *                    If the requested data is already open and cached  *
666  *                       just return the cached UDataMem object.        *
667  *                                                                      *
668  *----------------------------------------------------------------------*/
669 static UDataMemory *
openCommonData(const char * path,int32_t commonDataIndex,UErrorCode * pErrorCode)670 openCommonData(const char *path,          /*  Path from OpenChoice?          */
671                int32_t commonDataIndex,   /*  ICU Data (index >= 0) if path == NULL */
672                UErrorCode *pErrorCode)
673 {
674     UDataMemory tData;
675     const char *pathBuffer;
676     const char *inBasename;
677 
678     if (U_FAILURE(*pErrorCode)) {
679         return NULL;
680     }
681 
682     UDataMemory_init(&tData);
683 
684     /* ??????? TODO revisit this */
685     if (commonDataIndex >= 0) {
686         /* "mini-cache" for common ICU data */
687         if(commonDataIndex >= UPRV_LENGTHOF(gCommonICUDataArray)) {
688             return NULL;
689         }
690         {
691             Mutex lock;
692             if(gCommonICUDataArray[commonDataIndex] != NULL) {
693                 return gCommonICUDataArray[commonDataIndex];
694             }
695 #if !defined(ICU_DATA_DIR_WINDOWS)
696 // When using the Windows system data, we expect only a single data file.
697             int32_t i;
698             for(i = 0; i < commonDataIndex; ++i) {
699                 if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) {
700                     /* The linked-in data is already in the list. */
701                     return NULL;
702                 }
703             }
704 #endif
705         }
706 
707         /* Add the linked-in data to the list. */
708         /*
709          * This is where we would check and call weakly linked partial-data-library
710          * access functions.
711          */
712         /*
713         if (uprv_getICUData_collation) {
714             setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode);
715         }
716         if (uprv_getICUData_conversion) {
717             setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode);
718         }
719         */
720 #if !defined(ICU_DATA_DIR_WINDOWS)
721 // When using the Windows system data, we expect only a single data file.
722         setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode);
723         {
724             Mutex lock;
725             return gCommonICUDataArray[commonDataIndex];
726         }
727 #endif
728     }
729 
730 
731     /* request is NOT for ICU Data.  */
732 
733     /* Find the base name portion of the supplied path.   */
734     /*   inBasename will be left pointing somewhere within the original path string.      */
735     inBasename = findBasename(path);
736 #ifdef UDATA_DEBUG
737     fprintf(stderr, "inBasename = %s\n", inBasename);
738 #endif
739 
740     if(*inBasename==0) {
741         /* no basename.     This will happen if the original path was a directory name,   */
742         /*    like  "a/b/c/".   (Fallback to separate files will still work.)             */
743 #ifdef UDATA_DEBUG
744         fprintf(stderr, "ocd: no basename in %s, bailing.\n", path);
745 #endif
746         if (U_SUCCESS(*pErrorCode)) {
747             *pErrorCode=U_FILE_ACCESS_ERROR;
748         }
749         return NULL;
750     }
751 
752    /* Is the requested common data file already open and cached?                     */
753    /*   Note that the cache is keyed by the base name only.  The rest of the path,   */
754    /*     if any, is not considered.                                                 */
755     UDataMemory  *dataToReturn = udata_findCachedData(inBasename, *pErrorCode);
756     if (dataToReturn != NULL || U_FAILURE(*pErrorCode)) {
757         return dataToReturn;
758     }
759 
760     /* Requested item is not in the cache.
761      * Hunt it down, trying all the path locations
762      */
763 
764     UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode);
765 
766     while ((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL)
767     {
768 #ifdef UDATA_DEBUG
769         fprintf(stderr, "ocd: trying path %s - ", pathBuffer);
770 #endif
771         uprv_mapFile(&tData, pathBuffer, pErrorCode);
772 #ifdef UDATA_DEBUG
773         fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded");
774 #endif
775     }
776     if (U_FAILURE(*pErrorCode)) {
777         return NULL;
778     }
779 
780 #if defined(OS390_STUBDATA) && defined(OS390BATCH)
781     if (!UDataMemory_isLoaded(&tData)) {
782         char ourPathBuffer[1024];
783         /* One more chance, for extendCommonData() */
784         uprv_strncpy(ourPathBuffer, path, 1019);
785         ourPathBuffer[1019]=0;
786         uprv_strcat(ourPathBuffer, ".dat");
787         uprv_mapFile(&tData, ourPathBuffer, pErrorCode);
788     }
789 #endif
790 
791     if (U_FAILURE(*pErrorCode)) {
792         return NULL;
793     }
794     if (!UDataMemory_isLoaded(&tData)) {
795         /* no common data */
796         *pErrorCode=U_FILE_ACCESS_ERROR;
797         return NULL;
798     }
799 
800     /* we have mapped a file, check its header */
801     udata_checkCommonData(&tData, pErrorCode);
802 
803 
804     /* Cache the UDataMemory struct for this .dat file,
805      *   so we won't need to hunt it down and map it again next time
806      *   something is needed from it.                */
807     return udata_cacheDataItem(inBasename, &tData, pErrorCode);
808 }
809 
810 
811 /*----------------------------------------------------------------------*
812  *                                                                      *
813  *   extendICUData   If the full set of ICU data was not loaded at      *
814  *                   program startup, load it now.  This function will  *
815  *                   be called when the lookup of an ICU data item in   *
816  *                   the common ICU data fails.                         *
817  *                                                                      *
818  *                   return true if new data is loaded, false otherwise.*
819  *                                                                      *
820  *----------------------------------------------------------------------*/
extendICUData(UErrorCode * pErr)821 static UBool extendICUData(UErrorCode *pErr)
822 {
823     UDataMemory   *pData;
824     UDataMemory   copyPData;
825     UBool         didUpdate = FALSE;
826 
827     /*
828      * There is a chance for a race condition here.
829      * Normally, ICU data is loaded from a DLL or via mmap() and
830      * setCommonICUData() will detect if the same address is set twice.
831      * If ICU is built with data loading via fread() then the address will
832      * be different each time the common data is loaded and we may add
833      * multiple copies of the data.
834      * In this case, use a mutex to prevent the race.
835      * Use a specific mutex to avoid nested locks of the global mutex.
836      */
837 #if MAP_IMPLEMENTATION==MAP_STDIO
838     static UMutex extendICUDataMutex;
839     umtx_lock(&extendICUDataMutex);
840 #endif
841     if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) {
842         /* See if we can explicitly open a .dat file for the ICUData. */
843         pData = openCommonData(
844                    U_ICUDATA_NAME,            /*  "icudt20l" , for example.          */
845                    -1,                        /*  Pretend we're not opening ICUData  */
846                    pErr);
847 
848         /* How about if there is no pData, eh... */
849 
850        UDataMemory_init(&copyPData);
851        if(pData != NULL) {
852           UDatamemory_assign(&copyPData, pData);
853           copyPData.map = 0;              /* The mapping for this data is owned by the hash table */
854           copyPData.mapAddr = 0;          /*   which will unmap it when ICU is shut down.         */
855                                           /* CommonICUData is also unmapped when ICU is shut down.*/
856                                           /* To avoid unmapping the data twice, zero out the map  */
857                                           /*   fields in the UDataMemory that we're assigning     */
858                                           /*   to CommonICUData.                                  */
859 
860           didUpdate = /* no longer using this result */
861               setCommonICUData(&copyPData,/*  The new common data.                                */
862                        FALSE,             /*  No warnings if write didn't happen                  */
863                        pErr);             /*  setCommonICUData honors errors; NOP if error set    */
864         }
865 
866         umtx_storeRelease(gHaveTriedToLoadCommonData, 1);
867     }
868 
869     didUpdate = findCommonICUDataByName(U_ICUDATA_NAME, *pErr);  /* Return 'true' when a racing writes out the extended                 */
870                                                           /* data after another thread has failed to see it (in openCommonData), so     */
871                                                           /* extended data can be examined.                                             */
872                                                           /* Also handles a race through here before gHaveTriedToLoadCommonData is set. */
873 
874 #if MAP_IMPLEMENTATION==MAP_STDIO
875     umtx_unlock(&extendICUDataMutex);
876 #endif
877     return didUpdate;               /* Return true if ICUData pointer was updated.   */
878                                     /*   (Could potentially have been done by another thread racing */
879                                     /*   us through here, but that's fine, we still return true    */
880                                     /*   so that current thread will also examine extended data.   */
881 }
882 
883 /*----------------------------------------------------------------------*
884  *                                                                      *
885  *   udata_setCommonData                                                *
886  *                                                                      *
887  *----------------------------------------------------------------------*/
888 U_CAPI void U_EXPORT2
udata_setCommonData(const void * data,UErrorCode * pErrorCode)889 udata_setCommonData(const void *data, UErrorCode *pErrorCode) {
890     UDataMemory dataMemory;
891 
892     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
893         return;
894     }
895 
896     if(data==NULL) {
897         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
898         return;
899     }
900 
901     /* set the data pointer and test for validity */
902     UDataMemory_init(&dataMemory);
903     UDataMemory_setData(&dataMemory, data);
904     udata_checkCommonData(&dataMemory, pErrorCode);
905     if (U_FAILURE(*pErrorCode)) {return;}
906 
907     /* we have good data */
908     /* Set it up as the ICU Common Data.  */
909     setCommonICUData(&dataMemory, TRUE, pErrorCode);
910 }
911 
912 /*---------------------------------------------------------------------------
913  *
914  *  udata_setAppData
915  *
916  *---------------------------------------------------------------------------- */
917 U_CAPI void U_EXPORT2
udata_setAppData(const char * path,const void * data,UErrorCode * err)918 udata_setAppData(const char *path, const void *data, UErrorCode *err)
919 {
920     UDataMemory     udm;
921 
922     if(err==NULL || U_FAILURE(*err)) {
923         return;
924     }
925     if(data==NULL) {
926         *err=U_ILLEGAL_ARGUMENT_ERROR;
927         return;
928     }
929 
930     UDataMemory_init(&udm);
931     UDataMemory_setData(&udm, data);
932     udata_checkCommonData(&udm, err);
933     udata_cacheDataItem(path, &udm, err);
934 }
935 
936 /*----------------------------------------------------------------------------*
937  *                                                                            *
938  *  checkDataItem     Given a freshly located/loaded data item, either        *
939  *                    an entry in a common file or a separately loaded file,  *
940  *                    sanity check its header, and see if the data is         *
941  *                    acceptable to the app.                                  *
942  *                    If the data is good, create and return a UDataMemory    *
943  *                    object that can be returned to the application.         *
944  *                    Return NULL on any sort of failure.                     *
945  *                                                                            *
946  *----------------------------------------------------------------------------*/
947 static UDataMemory *
checkDataItem(const DataHeader * pHeader,UDataMemoryIsAcceptable * isAcceptable,void * context,const char * type,const char * name,UErrorCode * nonFatalErr,UErrorCode * fatalErr)948 checkDataItem
949 (
950  const DataHeader         *pHeader,         /* The data item to be checked.                */
951  UDataMemoryIsAcceptable  *isAcceptable,    /* App's call-back function                    */
952  void                     *context,         /*   pass-thru param for above.                */
953  const char               *type,            /*   pass-thru param for above.                */
954  const char               *name,            /*   pass-thru param for above.                */
955  UErrorCode               *nonFatalErr,     /* Error code if this data was not acceptable  */
956                                             /*   but openChoice should continue with       */
957                                             /*   trying to get data from fallback path.    */
958  UErrorCode               *fatalErr         /* Bad error, caller should return immediately */
959  )
960 {
961     UDataMemory  *rDataMem = NULL;          /* the new UDataMemory, to be returned.        */
962 
963     if (U_FAILURE(*fatalErr)) {
964         return NULL;
965     }
966 
967     if(pHeader->dataHeader.magic1==0xda &&
968         pHeader->dataHeader.magic2==0x27 &&
969         (isAcceptable==NULL || isAcceptable(context, type, name, &pHeader->info))
970     ) {
971         rDataMem=UDataMemory_createNewInstance(fatalErr);
972         if (U_FAILURE(*fatalErr)) {
973             return NULL;
974         }
975         rDataMem->pHeader = pHeader;
976     } else {
977         /* the data is not acceptable, look further */
978         /* If we eventually find something good, this errorcode will be */
979         /*    cleared out.                                              */
980         *nonFatalErr=U_INVALID_FORMAT_ERROR;
981     }
982     return rDataMem;
983 }
984 
985 /**
986  * @return 0 if not loaded, 1 if loaded or err
987  */
doLoadFromIndividualFiles(const char * pkgName,const char * dataPath,const char * tocEntryPathSuffix,const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * subErrorCode,UErrorCode * pErrorCode)988 static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
989         const char *dataPath, const char *tocEntryPathSuffix,
990             /* following arguments are the same as doOpenChoice itself */
991             const char *path, const char *type, const char *name,
992              UDataMemoryIsAcceptable *isAcceptable, void *context,
993              UErrorCode *subErrorCode,
994              UErrorCode *pErrorCode)
995 {
996     const char         *pathBuffer;
997     UDataMemory         dataMemory;
998     UDataMemory *pEntryData;
999 
1000     /* look in ind. files: package\nam.typ  ========================= */
1001     /* init path iterator for individual files */
1002     UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode);
1003 
1004     while ((pathBuffer = iter.next(pErrorCode)) != NULL)
1005     {
1006 #ifdef UDATA_DEBUG
1007         fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer);
1008 #endif
1009         if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode))
1010         {
1011             pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
1012             if (pEntryData != NULL) {
1013                 /* Data is good.
1014                 *  Hand off ownership of the backing memory to the user's UDataMemory.
1015                 *  and return it.   */
1016                 pEntryData->mapAddr = dataMemory.mapAddr;
1017                 pEntryData->map     = dataMemory.map;
1018 
1019 #ifdef UDATA_DEBUG
1020                 fprintf(stderr, "** Mapped file: %s\n", pathBuffer);
1021 #endif
1022                 return pEntryData;
1023             }
1024 
1025             /* the data is not acceptable, or some error occurred.  Either way, unmap the memory */
1026             udata_close(&dataMemory);
1027 
1028             /* If we had a nasty error, bail out completely.  */
1029             if (U_FAILURE(*pErrorCode)) {
1030                 return NULL;
1031             }
1032 
1033             /* Otherwise remember that we found data but didn't like it for some reason  */
1034             *subErrorCode=U_INVALID_FORMAT_ERROR;
1035         }
1036 #ifdef UDATA_DEBUG
1037         fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded");
1038 #endif
1039     }
1040     return NULL;
1041 }
1042 
1043 /**
1044  * @return 0 if not loaded, 1 if loaded or err
1045  */
doLoadFromCommonData(UBool isICUData,const char *,const char *,const char *,const char * tocEntryName,const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * subErrorCode,UErrorCode * pErrorCode)1046 static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName*/,
1047         const char * /*dataPath*/, const char * /*tocEntryPathSuffix*/, const char *tocEntryName,
1048             /* following arguments are the same as doOpenChoice itself */
1049             const char *path, const char *type, const char *name,
1050              UDataMemoryIsAcceptable *isAcceptable, void *context,
1051              UErrorCode *subErrorCode,
1052              UErrorCode *pErrorCode)
1053 {
1054     UDataMemory        *pEntryData;
1055     const DataHeader   *pHeader;
1056     UDataMemory        *pCommonData;
1057     int32_t            commonDataIndex;
1058     UBool              checkedExtendedICUData = FALSE;
1059     /* try to get common data.  The loop is for platforms such as the 390 that do
1060      *  not initially load the full set of ICU data.  If the lookup of an ICU data item
1061      *  fails, the full (but slower to load) set is loaded, the and the loop repeats,
1062      *  trying the lookup again.  Once the full set of ICU data is loaded, the loop wont
1063      *  repeat because the full set will be checked the first time through.
1064      *
1065      *  The loop also handles the fallback to a .dat file if the application linked
1066      *   to the stub data library rather than a real library.
1067      */
1068     for (commonDataIndex = isICUData ? 0 : -1;;) {
1069         pCommonData=openCommonData(path, commonDataIndex, subErrorCode); /** search for pkg **/
1070 
1071         if(U_SUCCESS(*subErrorCode) && pCommonData!=NULL) {
1072             int32_t length;
1073 
1074             /* look up the data piece in the common data */
1075             pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode);
1076 #ifdef UDATA_DEBUG
1077             fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, (void*) pHeader, u_errorName(*subErrorCode));
1078 #endif
1079 
1080             if(pHeader!=NULL) {
1081                 pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode);
1082 #ifdef UDATA_DEBUG
1083                 fprintf(stderr, "pEntryData=%p\n", (void*) pEntryData);
1084 #endif
1085                 if (U_FAILURE(*pErrorCode)) {
1086                     return NULL;
1087                 }
1088                 if (pEntryData != NULL) {
1089                     pEntryData->length = length;
1090                     return pEntryData;
1091                 }
1092             }
1093         }
1094         // If we failed due to being out-of-memory, then stop early and report the error.
1095         if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) {
1096             *pErrorCode = *subErrorCode;
1097             return NULL;
1098         }
1099         /* Data wasn't found.  If we were looking for an ICUData item and there is
1100          * more data available, load it and try again,
1101          * otherwise break out of this loop. */
1102         if (!isICUData) {
1103             return NULL;
1104         } else if (pCommonData != NULL) {
1105             ++commonDataIndex;  /* try the next data package */
1106         } else if ((!checkedExtendedICUData) && extendICUData(subErrorCode)) {
1107             checkedExtendedICUData = TRUE;
1108             /* try this data package slot again: it changed from NULL to non-NULL */
1109         } else {
1110             return NULL;
1111         }
1112     }
1113 }
1114 
1115 /*
1116  * Identify the Time Zone resources that are subject to special override data loading.
1117  */
isTimeZoneFile(const char * name,const char * type)1118 static UBool isTimeZoneFile(const char *name, const char *type) {
1119     return ((uprv_strcmp(type, "res") == 0) &&
1120             (uprv_strcmp(name, "zoneinfo64") == 0 ||
1121              uprv_strcmp(name, "timezoneTypes") == 0 ||
1122              uprv_strcmp(name, "windowsZones") == 0 ||
1123              uprv_strcmp(name, "metaZones") == 0));
1124 }
1125 
1126 /*
1127  *  A note on the ownership of Mapped Memory
1128  *
1129  *  For common format files, ownership resides with the UDataMemory object
1130  *    that lives in the cache of opened common data.  These UDataMemorys are private
1131  *    to the udata implementation, and are never seen directly by users.
1132  *
1133  *    The UDataMemory objects returned to users will have the address of some desired
1134  *    data within the mapped region, but they wont have the mapping info itself, and thus
1135  *    won't cause anything to be removed from memory when they are closed.
1136  *
1137  *  For individual data files, the UDataMemory returned to the user holds the
1138  *  information necessary to unmap the data on close.  If the user independently
1139  *  opens the same data file twice, two completely independent mappings will be made.
1140  *  (There is no cache of opened data items from individual files, only a cache of
1141  *   opened Common Data files, that is, files containing a collection of data items.)
1142  *
1143  *  For common data passed in from the user via udata_setAppData() or
1144  *  udata_setCommonData(), ownership remains with the user.
1145  *
1146  *  UDataMemory objects themselves, as opposed to the memory they describe,
1147  *  can be anywhere - heap, stack/local or global.
1148  *  They have a flag to indicate when they're heap allocated and thus
1149  *  must be deleted when closed.
1150  */
1151 
1152 
1153 /*----------------------------------------------------------------------------*
1154  *                                                                            *
1155  * main data loading functions                                                *
1156  *                                                                            *
1157  *----------------------------------------------------------------------------*/
1158 static UDataMemory *
doOpenChoice(const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * pErrorCode)1159 doOpenChoice(const char *path, const char *type, const char *name,
1160              UDataMemoryIsAcceptable *isAcceptable, void *context,
1161              UErrorCode *pErrorCode)
1162 {
1163     UDataMemory         *retVal = NULL;
1164 
1165     const char         *dataPath;
1166 
1167     int32_t             tocEntrySuffixIndex;
1168     const char         *tocEntryPathSuffix;
1169     UErrorCode          subErrorCode=U_ZERO_ERROR;
1170     const char         *treeChar;
1171 
1172     UBool               isICUData = FALSE;
1173 
1174 
1175     FileTracer::traceOpen(path, type, name);
1176 
1177 
1178     /* Is this path ICU data? */
1179     if(path == NULL ||
1180        !strcmp(path, U_ICUDATA_ALIAS) ||  /* "ICUDATA" */
1181        !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, /* "icudt26e-" */
1182                      uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) ||
1183        !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, /* "ICUDATA-" */
1184                      uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) {
1185       isICUData = TRUE;
1186     }
1187 
1188 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)  /* Windows:  try "foo\bar" and "foo/bar" */
1189     /* remap from alternate path char to the main one */
1190     CharString altSepPath;
1191     if(path) {
1192         if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) {
1193             altSepPath.append(path, *pErrorCode);
1194             char *p;
1195             while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != NULL) {
1196                 *p = U_FILE_SEP_CHAR;
1197             }
1198 #if defined (UDATA_DEBUG)
1199             fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s);
1200 #endif
1201             path = altSepPath.data();
1202         }
1203     }
1204 #endif
1205 
1206     CharString tocEntryName; /* entry name in tree format. ex:  'icudt28b/coll/ar.res' */
1207     CharString tocEntryPath; /* entry name in path format. ex:  'icudt28b\\coll\\ar.res' */
1208 
1209     CharString pkgName;
1210     CharString treeName;
1211 
1212     /* ======= Set up strings */
1213     if(path==NULL) {
1214         pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1215     } else {
1216         const char *pkg;
1217         const char *first;
1218         pkg = uprv_strrchr(path, U_FILE_SEP_CHAR);
1219         first = uprv_strchr(path, U_FILE_SEP_CHAR);
1220         if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */
1221             /* see if this is an /absolute/path/to/package  path */
1222             if(pkg) {
1223                 pkgName.append(pkg+1, *pErrorCode);
1224             } else {
1225                 pkgName.append(path, *pErrorCode);
1226             }
1227         } else {
1228             treeChar = uprv_strchr(path, U_TREE_SEPARATOR);
1229             if(treeChar) {
1230                 treeName.append(treeChar+1, *pErrorCode); /* following '-' */
1231                 if(isICUData) {
1232                     pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1233                 } else {
1234                     pkgName.append(path, (int32_t)(treeChar-path), *pErrorCode);
1235                     if (first == NULL) {
1236                         /*
1237                         This user data has no path, but there is a tree name.
1238                         Look up the correct path from the data cache later.
1239                         */
1240                         path = pkgName.data();
1241                     }
1242                 }
1243             } else {
1244                 if(isICUData) {
1245                     pkgName.append(U_ICUDATA_NAME, *pErrorCode);
1246                 } else {
1247                     pkgName.append(path, *pErrorCode);
1248                 }
1249             }
1250         }
1251     }
1252 
1253 #ifdef UDATA_DEBUG
1254     fprintf(stderr, " P=%s T=%s\n", pkgName.data(), treeName.data());
1255 #endif
1256 
1257     /* setting up the entry name and file name
1258      * Make up a full name by appending the type to the supplied
1259      *  name, assuming that a type was supplied.
1260      */
1261 
1262     /* prepend the package */
1263     tocEntryName.append(pkgName, *pErrorCode);
1264     tocEntryPath.append(pkgName, *pErrorCode);
1265     tocEntrySuffixIndex = tocEntryName.length();
1266 
1267     if(!treeName.isEmpty()) {
1268         tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
1269         tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode);
1270     }
1271 
1272     tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
1273     tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(name, *pErrorCode);
1274     if(type!=NULL && *type!=0) {
1275         tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode);
1276         tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode);
1277     }
1278     // The +1 is for the U_FILE_SEP_CHAR that is always appended above.
1279     tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */
1280 
1281 #ifdef UDATA_DEBUG
1282     fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data());
1283     fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data());
1284 #endif
1285 
1286 #if !defined(ICU_DATA_DIR_WINDOWS)
1287     if(path == NULL) {
1288         path = COMMON_DATA_NAME; /* "icudt26e" */
1289     }
1290 #else
1291     // When using the Windows system data, we expects only a single data file.
1292     path = COMMON_DATA_NAME; /* "icudt26e" */
1293 #endif
1294 
1295     /************************ Begin loop looking for ind. files ***************/
1296 #ifdef UDATA_DEBUG
1297     fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", "(n/a)", packageNameFromPath(path));
1298 #endif
1299 
1300     /* End of dealing with a null basename */
1301     dataPath = u_getDataDirectory();
1302 
1303     /****    Time zone individual files override  */
1304     if (isICUData && isTimeZoneFile(name, type)) {
1305         const char *tzFilesDir = u_getTimeZoneFilesDirectory(pErrorCode);
1306         if (tzFilesDir[0] != 0) {
1307 #ifdef UDATA_DEBUG
1308             fprintf(stderr, "Trying Time Zone Files directory = %s\n", tzFilesDir);
1309 #endif
1310             retVal = doLoadFromIndividualFiles(/* pkgName.data() */ "", tzFilesDir, tocEntryPathSuffix,
1311                             /* path */ "", type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1312             if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1313                 return retVal;
1314             }
1315         }
1316     }
1317 
1318     /****    COMMON PACKAGE  - only if packages are first. */
1319     if(gDataFileAccess == UDATA_PACKAGES_FIRST) {
1320 #ifdef UDATA_DEBUG
1321         fprintf(stderr, "Trying packages (UDATA_PACKAGES_FIRST)\n");
1322 #endif
1323         /* #2 */
1324         retVal = doLoadFromCommonData(isICUData,
1325                             pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
1326                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1327         if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1328             return retVal;
1329         }
1330     }
1331 
1332     /****    INDIVIDUAL FILES  */
1333     if((gDataFileAccess==UDATA_PACKAGES_FIRST) ||
1334        (gDataFileAccess==UDATA_FILES_FIRST)) {
1335 #ifdef UDATA_DEBUG
1336         fprintf(stderr, "Trying individual files\n");
1337 #endif
1338         /* Check to make sure that there is a dataPath to iterate over */
1339         if ((dataPath && *dataPath) || !isICUData) {
1340             retVal = doLoadFromIndividualFiles(pkgName.data(), dataPath, tocEntryPathSuffix,
1341                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1342             if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1343                 return retVal;
1344             }
1345         }
1346     }
1347 
1348     /****    COMMON PACKAGE  */
1349     if((gDataFileAccess==UDATA_ONLY_PACKAGES) ||
1350        (gDataFileAccess==UDATA_FILES_FIRST)) {
1351 #ifdef UDATA_DEBUG
1352         fprintf(stderr, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n");
1353 #endif
1354         retVal = doLoadFromCommonData(isICUData,
1355                             pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(),
1356                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1357         if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1358             return retVal;
1359         }
1360     }
1361 
1362     /* Load from DLL.  If we haven't attempted package load, we also haven't had any chance to
1363         try a DLL (static or setCommonData/etc)  load.
1364          If we ever have a "UDATA_ONLY_FILES", add it to the or list here.  */
1365     if(gDataFileAccess==UDATA_NO_FILES) {
1366 #ifdef UDATA_DEBUG
1367         fprintf(stderr, "Trying common data (UDATA_NO_FILES)\n");
1368 #endif
1369         retVal = doLoadFromCommonData(isICUData,
1370                             pkgName.data(), "", tocEntryPathSuffix, tocEntryName.data(),
1371                             path, type, name, isAcceptable, context, &subErrorCode, pErrorCode);
1372         if((retVal != NULL) || U_FAILURE(*pErrorCode)) {
1373             return retVal;
1374         }
1375     }
1376 
1377     /* data not found */
1378     if(U_SUCCESS(*pErrorCode)) {
1379         if(U_SUCCESS(subErrorCode)) {
1380             /* file not found */
1381             *pErrorCode=U_FILE_ACCESS_ERROR;
1382         } else {
1383             /* entry point not found or rejected */
1384             *pErrorCode=subErrorCode;
1385         }
1386     }
1387     return retVal;
1388 }
1389 
1390 
1391 
1392 /* API ---------------------------------------------------------------------- */
1393 
1394 U_CAPI UDataMemory * U_EXPORT2
udata_open(const char * path,const char * type,const char * name,UErrorCode * pErrorCode)1395 udata_open(const char *path, const char *type, const char *name,
1396            UErrorCode *pErrorCode) {
1397 #ifdef UDATA_DEBUG
1398   fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
1399     fflush(stderr);
1400 #endif
1401 
1402     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1403         return NULL;
1404     } else if(name==NULL || *name==0) {
1405         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1406         return NULL;
1407     } else {
1408         return doOpenChoice(path, type, name, NULL, NULL, pErrorCode);
1409     }
1410 }
1411 
1412 
1413 
1414 U_CAPI UDataMemory * U_EXPORT2
udata_openChoice(const char * path,const char * type,const char * name,UDataMemoryIsAcceptable * isAcceptable,void * context,UErrorCode * pErrorCode)1415 udata_openChoice(const char *path, const char *type, const char *name,
1416                  UDataMemoryIsAcceptable *isAcceptable, void *context,
1417                  UErrorCode *pErrorCode) {
1418 #ifdef UDATA_DEBUG
1419   fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type);
1420 #endif
1421 
1422     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1423         return NULL;
1424     } else if(name==NULL || *name==0 || isAcceptable==NULL) {
1425         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1426         return NULL;
1427     } else {
1428         return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode);
1429     }
1430 }
1431 
1432 
1433 
1434 U_CAPI void U_EXPORT2
udata_getInfo(UDataMemory * pData,UDataInfo * pInfo)1435 udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
1436     if(pInfo!=NULL) {
1437         if(pData!=NULL && pData->pHeader!=NULL) {
1438             const UDataInfo *info=&pData->pHeader->info;
1439             uint16_t dataInfoSize=udata_getInfoSize(info);
1440             if(pInfo->size>dataInfoSize) {
1441                 pInfo->size=dataInfoSize;
1442             }
1443             uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2);
1444             if(info->isBigEndian!=U_IS_BIG_ENDIAN) {
1445                 /* opposite endianness */
1446                 uint16_t x=info->reservedWord;
1447                 pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8));
1448             }
1449         } else {
1450             pInfo->size=0;
1451         }
1452     }
1453 }
1454 
1455 
udata_setFileAccess(UDataFileAccess access,UErrorCode *)1456 U_CAPI void U_EXPORT2 udata_setFileAccess(UDataFileAccess access, UErrorCode * /*status*/)
1457 {
1458     // Note: this function is documented as not thread safe.
1459     gDataFileAccess = access;
1460 }
1461