1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 *   Copyright (C) 1997-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 ******************************************************************************
10 *
11 *  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12 *
13 *   Date        Name        Description
14 *   04/14/97    aliu        Creation.
15 *   04/24/97    aliu        Added getDefaultDataDirectory() and
16 *                            getDefaultLocaleID().
17 *   04/28/97    aliu        Rewritten to assume Unix and apply general methods
18 *                            for assumed case.  Non-UNIX platforms must be
19 *                            special-cased.  Rewrote numeric methods dealing
20 *                            with NaN and Infinity to be platform independent
21 *                             over all IEEE 754 platforms.
22 *   05/13/97    aliu        Restored sign of timezone
23 *                            (semantics are hours West of GMT)
24 *   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25 *                             nextDouble..
26 *   07/22/98    stephen     Added remainder, max, min, trunc
27 *   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
28 *   08/24/98    stephen     Added longBitsFromDouble
29 *   09/08/98    stephen     Minor changes for Mac Port
30 *   03/02/99    stephen     Removed openFile().  Added AS400 support.
31 *                            Fixed EBCDIC tables
32 *   04/15/99    stephen     Converted to C.
33 *   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
34 *   08/04/99    jeffrey R.  Added OS/2 changes
35 *   11/15/99    helena      Integrated S/390 IEEE support.
36 *   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
37 *   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
38 *   01/03/08    Steven L.   Fake Time Support
39 ******************************************************************************
40 */
41 
42 // Defines _XOPEN_SOURCE for access to POSIX functions.
43 // Must be before any other #includes.
44 #include "uposixdefs.h"
45 
46 // First, the platform type. Need this for U_PLATFORM.
47 #include "unicode/platform.h"
48 
49 #if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50 /* tzset isn't defined in strict ANSI on MinGW. */
51 #undef __STRICT_ANSI__
52 #endif
53 
54 /*
55  * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
56  */
57 #include <time.h>
58 
59 #if !U_PLATFORM_USES_ONLY_WIN32_API
60 #include <sys/time.h>
61 #endif
62 
63 /* include the rest of the ICU headers */
64 #include "unicode/putil.h"
65 #include "unicode/ustring.h"
66 #include "putilimp.h"
67 #include "uassert.h"
68 #include "umutex.h"
69 #include "cmemory.h"
70 #include "cstring.h"
71 #include "locmap.h"
72 #include "ucln_cmn.h"
73 #include "charstr.h"
74 
75 /* Include standard headers. */
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <math.h>
80 #include <locale.h>
81 #include <float.h>
82 
83 #ifndef U_COMMON_IMPLEMENTATION
84 #error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
85 #endif
86 
87 
88 /* include system headers */
89 #if U_PLATFORM_USES_ONLY_WIN32_API
90     /*
91      * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92      * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93      * to use native APIs as much as possible?
94      */
95 #ifndef WIN32_LEAN_AND_MEAN
96 #   define WIN32_LEAN_AND_MEAN
97 #endif
98 #   define VC_EXTRALEAN
99 #   define NOUSER
100 #   define NOSERVICE
101 #   define NOIME
102 #   define NOMCX
103 #   include <windows.h>
104 #   include "unicode/uloc.h"
105 #   include "wintz.h"
106 #elif U_PLATFORM == U_PF_OS400
107 #   include <float.h>
108 #   include <qusec.h>       /* error code structure */
109 #   include <qusrjobi.h>
110 #   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
111 #   include <mih/testptr.h> /* For uprv_maximumPtr */
112 #elif U_PLATFORM == U_PF_OS390
113 #   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
114 #elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
115 #   include <limits.h>
116 #   include <unistd.h>
117 #   if U_PLATFORM == U_PF_SOLARIS
118 #       ifndef _XPG4_2
119 #           define _XPG4_2
120 #       endif
121 #   elif U_PLATFORM == U_PF_ANDROID
122 #       include <sys/system_properties.h>
123 #       include <dlfcn.h>
124 #   endif
125 #elif U_PLATFORM == U_PF_QNX
126 #   include <sys/neutrino.h>
127 #endif
128 
129 #ifdef U_STRINGI_PATCHES
130 #ifdef USE_WINDOWS_LOCALE_API
131 #include <windows.h>
132 #include <winnls.h>
133 #endif
134 #endif /* U_STRINGI_PATCHES */
135 
136 /*
137  * Only include langinfo.h if we have a way to get the codeset. If we later
138  * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
139  *
140  */
141 
142 #if U_HAVE_NL_LANGINFO_CODESET
143 #include <langinfo.h>
144 #endif
145 
146 /**
147  * Simple things (presence of functions, etc) should just go in configure.in and be added to
148  * icucfg.h via autoheader.
149  */
150 #if U_PLATFORM_IMPLEMENTS_POSIX
151 #   if U_PLATFORM == U_PF_OS400
152 #    define HAVE_DLFCN_H 0
153 #    define HAVE_DLOPEN 0
154 #   else
155 #   ifndef HAVE_DLFCN_H
156 #    define HAVE_DLFCN_H 1
157 #   endif
158 #   ifndef HAVE_DLOPEN
159 #    define HAVE_DLOPEN 1
160 #   endif
161 #   endif
162 #   ifndef HAVE_GETTIMEOFDAY
163 #    define HAVE_GETTIMEOFDAY 1
164 #   endif
165 #else
166 #   define HAVE_DLFCN_H 0
167 #   define HAVE_DLOPEN 0
168 #   define HAVE_GETTIMEOFDAY 0
169 #endif
170 
171 U_NAMESPACE_USE
172 
173 /* Define the extension for data files, again... */
174 #define DATA_TYPE "dat"
175 
176 /* Leave this copyright notice here! */
177 static const char copyright[] = U_COPYRIGHT_STRING;
178 
179 /* floating point implementations ------------------------------------------- */
180 
181 /* We return QNAN rather than SNAN*/
182 #define SIGN 0x80000000U
183 
184 /* Make it easy to define certain types of constants */
185 typedef union {
186     int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
187     double d64;
188 } BitPatternConversion;
189 static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
190 static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
191 
192 /*---------------------------------------------------------------------------
193   Platform utilities
194   Our general strategy is to assume we're on a POSIX platform.  Platforms which
195   are non-POSIX must declare themselves so.  The default POSIX implementation
196   will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
197   functions).
198   ---------------------------------------------------------------------------*/
199 
200 #if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
201 #   undef U_POSIX_LOCALE
202 #else
203 #   define U_POSIX_LOCALE    1
204 #endif
205 
206 /*
207     WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
208     can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
209 */
210 #if !IEEE_754
211 static char*
u_topNBytesOfDouble(double * d,int n)212 u_topNBytesOfDouble(double* d, int n)
213 {
214 #if U_IS_BIG_ENDIAN
215     return (char*)d;
216 #else
217     return (char*)(d + 1) - n;
218 #endif
219 }
220 
221 static char*
u_bottomNBytesOfDouble(double * d,int n)222 u_bottomNBytesOfDouble(double* d, int n)
223 {
224 #if U_IS_BIG_ENDIAN
225     return (char*)(d + 1) - n;
226 #else
227     return (char*)d;
228 #endif
229 }
230 #endif   /* !IEEE_754 */
231 
232 #if IEEE_754
233 static UBool
u_signBit(double d)234 u_signBit(double d) {
235     uint8_t hiByte;
236 #if U_IS_BIG_ENDIAN
237     hiByte = *(uint8_t *)&d;
238 #else
239     hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
240 #endif
241     return (hiByte & 0x80) != 0;
242 }
243 #endif
244 
245 
246 
247 #if defined (U_DEBUG_FAKETIME)
248 /* Override the clock to test things without having to move the system clock.
249  * Assumes POSIX gettimeofday() will function
250  */
251 UDate fakeClock_t0 = 0; /** Time to start the clock from **/
252 UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
253 UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
254 
getUTCtime_real()255 static UDate getUTCtime_real() {
256     struct timeval posixTime;
257     gettimeofday(&posixTime, NULL);
258     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
259 }
260 
getUTCtime_fake()261 static UDate getUTCtime_fake() {
262     static UMutex fakeClockMutex;
263     umtx_lock(&fakeClockMutex);
264     if(!fakeClock_set) {
265         UDate real = getUTCtime_real();
266         const char *fake_start = getenv("U_FAKETIME_START");
267         if((fake_start!=NULL) && (fake_start[0]!=0)) {
268             sscanf(fake_start,"%lf",&fakeClock_t0);
269             fakeClock_dt = fakeClock_t0 - real;
270             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
271                     "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
272                     fakeClock_t0, fake_start, fakeClock_dt, real);
273         } else {
274           fakeClock_dt = 0;
275             fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
276                     "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
277         }
278         fakeClock_set = TRUE;
279     }
280     umtx_unlock(&fakeClockMutex);
281 
282     return getUTCtime_real() + fakeClock_dt;
283 }
284 #endif
285 
286 #if U_PLATFORM_USES_ONLY_WIN32_API
287 typedef union {
288     int64_t int64;
289     FILETIME fileTime;
290 } FileTimeConversion;   /* This is like a ULARGE_INTEGER */
291 
292 /* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
293 #define EPOCH_BIAS  INT64_C(116444736000000000)
294 #define HECTONANOSECOND_PER_MILLISECOND   10000
295 
296 #endif
297 
298 /*---------------------------------------------------------------------------
299   Universal Implementations
300   These are designed to work on all platforms.  Try these, and if they
301   don't work on your platform, then special case your platform with new
302   implementations.
303 ---------------------------------------------------------------------------*/
304 
305 U_CAPI UDate U_EXPORT2
uprv_getUTCtime()306 uprv_getUTCtime()
307 {
308 #if defined(U_DEBUG_FAKETIME)
309     return getUTCtime_fake(); /* Hook for overriding the clock */
310 #else
311     return uprv_getRawUTCtime();
312 #endif
313 }
314 
315 /* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
316 U_CAPI UDate U_EXPORT2
uprv_getRawUTCtime()317 uprv_getRawUTCtime()
318 {
319 #if U_PLATFORM_USES_ONLY_WIN32_API
320 
321     FileTimeConversion winTime;
322     GetSystemTimeAsFileTime(&winTime.fileTime);
323     return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
324 #else
325 
326 #if HAVE_GETTIMEOFDAY
327     struct timeval posixTime;
328     gettimeofday(&posixTime, NULL);
329     return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
330 #else
331     time_t epochtime;
332     time(&epochtime);
333     return (UDate)epochtime * U_MILLIS_PER_SECOND;
334 #endif
335 
336 #endif
337 }
338 
339 /*-----------------------------------------------------------------------------
340   IEEE 754
341   These methods detect and return NaN and infinity values for doubles
342   conforming to IEEE 754.  Platforms which support this standard include X86,
343   Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
344   If this doesn't work on your platform, you have non-IEEE floating-point, and
345   will need to code your own versions.  A naive implementation is to return 0.0
346   for getNaN and getInfinity, and false for isNaN and isInfinite.
347   ---------------------------------------------------------------------------*/
348 
349 U_CAPI UBool U_EXPORT2
uprv_isNaN(double number)350 uprv_isNaN(double number)
351 {
352 #if IEEE_754
353     BitPatternConversion convertedNumber;
354     convertedNumber.d64 = number;
355     /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
356     return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
357 
358 #elif U_PLATFORM == U_PF_OS390
359     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
360                         sizeof(uint32_t));
361     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
362                         sizeof(uint32_t));
363 
364     return ((highBits & 0x7F080000L) == 0x7F080000L) &&
365       (lowBits == 0x00000000L);
366 
367 #else
368     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
369     /* you'll need to replace this default implementation with what's correct*/
370     /* for your platform.*/
371     return number != number;
372 #endif
373 }
374 
375 U_CAPI UBool U_EXPORT2
uprv_isInfinite(double number)376 uprv_isInfinite(double number)
377 {
378 #if IEEE_754
379     BitPatternConversion convertedNumber;
380     convertedNumber.d64 = number;
381     /* Infinity is exactly 0x7FF0000000000000U. */
382     return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
383 #elif U_PLATFORM == U_PF_OS390
384     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
385                         sizeof(uint32_t));
386     uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
387                         sizeof(uint32_t));
388 
389     return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
390 
391 #else
392     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
393     /* value, you'll need to replace this default implementation with what's*/
394     /* correct for your platform.*/
395     return number == (2.0 * number);
396 #endif
397 }
398 
399 U_CAPI UBool U_EXPORT2
uprv_isPositiveInfinity(double number)400 uprv_isPositiveInfinity(double number)
401 {
402 #if IEEE_754 || U_PLATFORM == U_PF_OS390
403     return (UBool)(number > 0 && uprv_isInfinite(number));
404 #else
405     return uprv_isInfinite(number);
406 #endif
407 }
408 
409 U_CAPI UBool U_EXPORT2
uprv_isNegativeInfinity(double number)410 uprv_isNegativeInfinity(double number)
411 {
412 #if IEEE_754 || U_PLATFORM == U_PF_OS390
413     return (UBool)(number < 0 && uprv_isInfinite(number));
414 
415 #else
416     uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
417                         sizeof(uint32_t));
418     return((highBits & SIGN) && uprv_isInfinite(number));
419 
420 #endif
421 }
422 
423 U_CAPI double U_EXPORT2
uprv_getNaN()424 uprv_getNaN()
425 {
426 #if IEEE_754 || U_PLATFORM == U_PF_OS390
427     return gNan.d64;
428 #else
429     /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
430     /* you'll need to replace this default implementation with what's correct*/
431     /* for your platform.*/
432     return 0.0;
433 #endif
434 }
435 
436 U_CAPI double U_EXPORT2
uprv_getInfinity()437 uprv_getInfinity()
438 {
439 #if IEEE_754 || U_PLATFORM == U_PF_OS390
440     return gInf.d64;
441 #else
442     /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
443     /* value, you'll need to replace this default implementation with what's*/
444     /* correct for your platform.*/
445     return 0.0;
446 #endif
447 }
448 
449 U_CAPI double U_EXPORT2
uprv_floor(double x)450 uprv_floor(double x)
451 {
452     return floor(x);
453 }
454 
455 U_CAPI double U_EXPORT2
uprv_ceil(double x)456 uprv_ceil(double x)
457 {
458     return ceil(x);
459 }
460 
461 U_CAPI double U_EXPORT2
uprv_round(double x)462 uprv_round(double x)
463 {
464     return uprv_floor(x + 0.5);
465 }
466 
467 U_CAPI double U_EXPORT2
uprv_fabs(double x)468 uprv_fabs(double x)
469 {
470     return fabs(x);
471 }
472 
473 U_CAPI double U_EXPORT2
uprv_modf(double x,double * y)474 uprv_modf(double x, double* y)
475 {
476     return modf(x, y);
477 }
478 
479 U_CAPI double U_EXPORT2
uprv_fmod(double x,double y)480 uprv_fmod(double x, double y)
481 {
482     return fmod(x, y);
483 }
484 
485 U_CAPI double U_EXPORT2
uprv_pow(double x,double y)486 uprv_pow(double x, double y)
487 {
488     /* This is declared as "double pow(double x, double y)" */
489     return pow(x, y);
490 }
491 
492 U_CAPI double U_EXPORT2
uprv_pow10(int32_t x)493 uprv_pow10(int32_t x)
494 {
495     return pow(10.0, (double)x);
496 }
497 
498 U_CAPI double U_EXPORT2
uprv_fmax(double x,double y)499 uprv_fmax(double x, double y)
500 {
501 #if IEEE_754
502     /* first handle NaN*/
503     if(uprv_isNaN(x) || uprv_isNaN(y))
504         return uprv_getNaN();
505 
506     /* check for -0 and 0*/
507     if(x == 0.0 && y == 0.0 && u_signBit(x))
508         return y;
509 
510 #endif
511 
512     /* this should work for all flt point w/o NaN and Inf special cases */
513     return (x > y ? x : y);
514 }
515 
516 U_CAPI double U_EXPORT2
uprv_fmin(double x,double y)517 uprv_fmin(double x, double y)
518 {
519 #if IEEE_754
520     /* first handle NaN*/
521     if(uprv_isNaN(x) || uprv_isNaN(y))
522         return uprv_getNaN();
523 
524     /* check for -0 and 0*/
525     if(x == 0.0 && y == 0.0 && u_signBit(y))
526         return y;
527 
528 #endif
529 
530     /* this should work for all flt point w/o NaN and Inf special cases */
531     return (x > y ? y : x);
532 }
533 
534 U_CAPI UBool U_EXPORT2
uprv_add32_overflow(int32_t a,int32_t b,int32_t * res)535 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
536     // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
537     // This function could be optimized by calling one of those primitives.
538     auto a64 = static_cast<int64_t>(a);
539     auto b64 = static_cast<int64_t>(b);
540     int64_t res64 = a64 + b64;
541     *res = static_cast<int32_t>(res64);
542     return res64 != *res;
543 }
544 
545 U_CAPI UBool U_EXPORT2
uprv_mul32_overflow(int32_t a,int32_t b,int32_t * res)546 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
547     // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
548     // This function could be optimized by calling one of those primitives.
549     auto a64 = static_cast<int64_t>(a);
550     auto b64 = static_cast<int64_t>(b);
551     int64_t res64 = a64 * b64;
552     *res = static_cast<int32_t>(res64);
553     return res64 != *res;
554 }
555 
556 /**
557  * Truncates the given double.
558  * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
559  * This is different than calling floor() or ceil():
560  * floor(3.3) = 3, floor(-3.3) = -4
561  * ceil(3.3) = 4, ceil(-3.3) = -3
562  */
563 U_CAPI double U_EXPORT2
uprv_trunc(double d)564 uprv_trunc(double d)
565 {
566 #if IEEE_754
567     /* handle error cases*/
568     if(uprv_isNaN(d))
569         return uprv_getNaN();
570     if(uprv_isInfinite(d))
571         return uprv_getInfinity();
572 
573     if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
574         return ceil(d);
575     else
576         return floor(d);
577 
578 #else
579     return d >= 0 ? floor(d) : ceil(d);
580 
581 #endif
582 }
583 
584 /**
585  * Return the largest positive number that can be represented by an integer
586  * type of arbitrary bit length.
587  */
588 U_CAPI double U_EXPORT2
uprv_maxMantissa(void)589 uprv_maxMantissa(void)
590 {
591     return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
592 }
593 
594 U_CAPI double U_EXPORT2
uprv_log(double d)595 uprv_log(double d)
596 {
597     return log(d);
598 }
599 
600 U_CAPI void * U_EXPORT2
uprv_maximumPtr(void * base)601 uprv_maximumPtr(void * base)
602 {
603 #if U_PLATFORM == U_PF_OS400
604     /*
605      * With the provided function we should never be out of range of a given segment
606      * (a traditional/typical segment that is).  Our segments have 5 bytes for the
607      * id and 3 bytes for the offset.  The key is that the casting takes care of
608      * only retrieving the offset portion minus x1000.  Hence, the smallest offset
609      * seen in a program is x001000 and when casted to an int would be 0.
610      * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
611      *
612      * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
613      * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
614      * This function determines the activation based on the pointer that is passed in and
615      * calculates the appropriate maximum available size for
616      * each pointer type (TERASPACE and non-TERASPACE)
617      *
618      * Unlike other operating systems, the pointer model isn't determined at
619      * compile time on i5/OS.
620      */
621     if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
622         /* if it is a TERASPACE pointer the max is 2GB - 4k */
623         return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
624     }
625     /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
626     return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
627 
628 #else
629     return U_MAX_PTR(base);
630 #endif
631 }
632 
633 /*---------------------------------------------------------------------------
634   Platform-specific Implementations
635   Try these, and if they don't work on your platform, then special case your
636   platform with new implementations.
637   ---------------------------------------------------------------------------*/
638 
639 /* Generic time zone layer -------------------------------------------------- */
640 
641 /* Time zone utilities */
642 U_CAPI void U_EXPORT2
uprv_tzset()643 uprv_tzset()
644 {
645 #if defined(U_TZSET)
646     U_TZSET();
647 #else
648     /* no initialization*/
649 #endif
650 }
651 
652 U_CAPI int32_t U_EXPORT2
uprv_timezone()653 uprv_timezone()
654 {
655 #ifdef U_TIMEZONE
656     return U_TIMEZONE;
657 #else
658     time_t t, t1, t2;
659     struct tm tmrec;
660     int32_t tdiff = 0;
661 
662     time(&t);
663     uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
664 #if U_PLATFORM != U_PF_IPHONE
665     UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
666 #endif
667     t1 = mktime(&tmrec);                 /* local time in seconds*/
668     uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
669     t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
670     tdiff = t2 - t1;
671 
672 #if U_PLATFORM != U_PF_IPHONE
673     /* imitate NT behaviour, which returns same timezone offset to GMT for
674        winter and summer.
675        This does not work on all platforms. For instance, on glibc on Linux
676        and on Mac OS 10.5, tdiff calculated above remains the same
677        regardless of whether DST is in effect or not. iOS is another
678        platform where this does not work. Linux + glibc and Mac OS 10.5
679        have U_TIMEZONE defined so that this code is not reached.
680     */
681     if (dst_checked)
682         tdiff += 3600;
683 #endif
684     return tdiff;
685 #endif
686 }
687 
688 /* Note that U_TZNAME does *not* have to be tzname, but if it is,
689    some platforms need to have it declared here. */
690 
691 #if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
692 /* RS6000 and others reject char **tzname.  */
693 extern U_IMPORT char *U_TZNAME[];
694 #endif
695 
696 #if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
697 /* These platforms are likely to use Olson timezone IDs. */
698 /* common targets of the symbolic link at TZDEFAULT are:
699  * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
700  * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
701  * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
702  * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
703  * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
704  * To avoid checking lots of paths, just check that the target path
705  * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
706  */
707 
708 #define CHECK_LOCALTIME_LINK 1
709 #if U_PLATFORM_IS_DARWIN_BASED
710 #include <tzfile.h>
711 #define TZZONEINFO      (TZDIR "/")
712 #elif U_PLATFORM == U_PF_SOLARIS
713 #define TZDEFAULT       "/etc/localtime"
714 #define TZZONEINFO      "/usr/share/lib/zoneinfo/"
715 #define TZ_ENV_CHECK    "localtime"
716 #else
717 #define TZDEFAULT       "/etc/localtime"
718 #define TZZONEINFO      "/usr/share/zoneinfo/"
719 #endif
720 #define TZZONEINFOTAIL  "/zoneinfo/"
721 #if U_HAVE_DIRENT_H
722 #define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
723 /* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
724    symlinked to /etc/localtime, which makes searchForTZFile return
725    'localtime' when it's the first match. */
726 #define TZFILE_SKIP2    "localtime"
727 #define SEARCH_TZFILE
728 #include <dirent.h>  /* Needed to search through system timezone files */
729 #endif
730 static char gTimeZoneBuffer[PATH_MAX];
731 static char *gTimeZoneBufferPtr = NULL;
732 #endif
733 
734 #if !U_PLATFORM_USES_ONLY_WIN32_API
735 #define isNonDigit(ch) (ch < '0' || '9' < ch)
isValidOlsonID(const char * id)736 static UBool isValidOlsonID(const char *id) {
737     int32_t idx = 0;
738 
739     /* Determine if this is something like Iceland (Olson ID)
740     or AST4ADT (non-Olson ID) */
741     while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
742         idx++;
743     }
744 
745     /* If we went through the whole string, then it might be okay.
746     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
747     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
748     The rest of the time it could be an Olson ID. George */
749     return (UBool)(id[idx] == 0
750         || uprv_strcmp(id, "PST8PDT") == 0
751         || uprv_strcmp(id, "MST7MDT") == 0
752         || uprv_strcmp(id, "CST6CDT") == 0
753         || uprv_strcmp(id, "EST5EDT") == 0);
754 }
755 
756 /* On some Unix-like OS, 'posix' subdirectory in
757    /usr/share/zoneinfo replicates the top-level contents. 'right'
758    subdirectory has the same set of files, but individual files
759    are different from those in the top-level directory or 'posix'
760    because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
761    has files for UTC.
762    When the first match for /etc/localtime is in either of them
763    (usually in posix because 'right' has different file contents),
764    or TZ environment variable points to one of them, createTimeZone
765    fails because, say, 'posix/America/New_York' is not an Olson
766    timezone id ('America/New_York' is). So, we have to skip
767    'posix/' and 'right/' at the beginning. */
skipZoneIDPrefix(const char ** id)768 static void skipZoneIDPrefix(const char** id) {
769     if (uprv_strncmp(*id, "posix/", 6) == 0
770         || uprv_strncmp(*id, "right/", 6) == 0)
771     {
772         *id += 6;
773     }
774 }
775 #endif
776 
777 #if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
778 
779 #define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
780 typedef struct OffsetZoneMapping {
781     int32_t offsetSeconds;
782     int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
783     const char *stdID;
784     const char *dstID;
785     const char *olsonID;
786 } OffsetZoneMapping;
787 
788 enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
789 
790 /*
791 This list tries to disambiguate a set of abbreviated timezone IDs and offsets
792 and maps it to an Olson ID.
793 Before adding anything to this list, take a look at
794 icu/source/tools/tzcode/tz.alias
795 Sometimes no daylight savings (0) is important to define due to aliases.
796 This list can be tested with icu/source/test/compat/tzone.pl
797 More values could be added to daylightType to increase precision.
798 */
799 static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
800     {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
801     {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
802     {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
803     {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
804     {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
805     {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
806     {-36000, 2, "EST", "EST", "Australia/Sydney"},
807     {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
808     {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
809     {-34200, 2, "CST", "CST", "Australia/South"},
810     {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
811     {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
812     {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
813     {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
814     {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
815     {-28800, 2, "WST", "WST", "Australia/West"},
816     {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
817     {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
818     {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
819     {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
820     {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
821     {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
822     {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
823     {-14400, 1, "AZT", "AZST", "Asia/Baku"},
824     {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
825     {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
826     {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
827     {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
828     {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
829     {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
830     {-3600, 0, "CET", "WEST", "Africa/Algiers"},
831     {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
832     {0, 1, "GMT", "IST", "Europe/Dublin"},
833     {0, 1, "GMT", "BST", "Europe/London"},
834     {0, 0, "WET", "WEST", "Africa/Casablanca"},
835     {0, 0, "WET", "WET", "Africa/El_Aaiun"},
836     {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
837     {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
838     {10800, 1, "PMST", "PMDT", "America/Miquelon"},
839     {10800, 2, "UYT", "UYST", "America/Montevideo"},
840     {10800, 1, "WGT", "WGST", "America/Godthab"},
841     {10800, 2, "BRT", "BRST", "Brazil/East"},
842     {12600, 1, "NST", "NDT", "America/St_Johns"},
843     {14400, 1, "AST", "ADT", "Canada/Atlantic"},
844     {14400, 2, "AMT", "AMST", "America/Cuiaba"},
845     {14400, 2, "CLT", "CLST", "Chile/Continental"},
846     {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
847     {14400, 2, "PYT", "PYST", "America/Asuncion"},
848     {18000, 1, "CST", "CDT", "America/Havana"},
849     {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
850     {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
851     {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
852     {21600, 0, "CST", "CDT", "America/Guatemala"},
853     {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
854     {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
855     {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
856     {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
857     {32400, 1, "AKST", "AKDT", "US/Alaska"},
858     {36000, 1, "HAST", "HADT", "US/Aleutian"}
859 };
860 
861 /*#define DEBUG_TZNAME*/
862 
remapShortTimeZone(const char * stdID,const char * dstID,int32_t daylightType,int32_t offset)863 static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
864 {
865     int32_t idx;
866 #ifdef DEBUG_TZNAME
867     fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
868 #endif
869     for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
870     {
871         if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
872             && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
873             && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
874             && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
875         {
876             return OFFSET_ZONE_MAPPINGS[idx].olsonID;
877         }
878     }
879     return NULL;
880 }
881 #endif
882 
883 #ifdef SEARCH_TZFILE
884 #define MAX_READ_SIZE 512
885 
886 typedef struct DefaultTZInfo {
887     char* defaultTZBuffer;
888     int64_t defaultTZFileSize;
889     FILE* defaultTZFilePtr;
890     UBool defaultTZstatus;
891     int32_t defaultTZPosition;
892 } DefaultTZInfo;
893 
894 /*
895  * This method compares the two files given to see if they are a match.
896  * It is currently use to compare two TZ files.
897  */
compareBinaryFiles(const char * defaultTZFileName,const char * TZFileName,DefaultTZInfo * tzInfo)898 static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
899     FILE* file;
900     int64_t sizeFile;
901     int64_t sizeFileLeft;
902     int32_t sizeFileRead;
903     int32_t sizeFileToRead;
904     char bufferFile[MAX_READ_SIZE];
905     UBool result = TRUE;
906 
907     if (tzInfo->defaultTZFilePtr == NULL) {
908         tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
909     }
910     file = fopen(TZFileName, "r");
911 
912     tzInfo->defaultTZPosition = 0; /* reset position to begin search */
913 
914     if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
915         /* First check that the file size are equal. */
916         if (tzInfo->defaultTZFileSize == 0) {
917             fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
918             tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
919         }
920         fseek(file, 0, SEEK_END);
921         sizeFile = ftell(file);
922         sizeFileLeft = sizeFile;
923 
924         if (sizeFile != tzInfo->defaultTZFileSize) {
925             result = FALSE;
926         } else {
927             /* Store the data from the files in seperate buffers and
928              * compare each byte to determine equality.
929              */
930             if (tzInfo->defaultTZBuffer == NULL) {
931                 rewind(tzInfo->defaultTZFilePtr);
932                 tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
933                 sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
934             }
935             rewind(file);
936             while(sizeFileLeft > 0) {
937                 uprv_memset(bufferFile, 0, MAX_READ_SIZE);
938                 sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
939 
940                 sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
941                 if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
942                     result = FALSE;
943                     break;
944                 }
945                 sizeFileLeft -= sizeFileRead;
946                 tzInfo->defaultTZPosition += sizeFileRead;
947             }
948         }
949     } else {
950         result = FALSE;
951     }
952 
953     if (file != NULL) {
954         fclose(file);
955     }
956 
957     return result;
958 }
959 
960 
961 /* dirent also lists two entries: "." and ".." that we can safely ignore. */
962 #define SKIP1 "."
963 #define SKIP2 ".."
964 static UBool U_CALLCONV putil_cleanup(void);
965 static CharString *gSearchTZFileResult = NULL;
966 
967 /*
968  * This method recursively traverses the directory given for a matching TZ file and returns the first match.
969  * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
970  */
searchForTZFile(const char * path,DefaultTZInfo * tzInfo)971 static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
972     DIR* dirp = NULL;
973     struct dirent* dirEntry = NULL;
974     char* result = NULL;
975     UErrorCode status = U_ZERO_ERROR;
976 
977     /* Save the current path */
978     CharString curpath(path, -1, status);
979     if (U_FAILURE(status)) {
980         goto cleanupAndReturn;
981     }
982 
983     dirp = opendir(path);
984     if (dirp == NULL) {
985         goto cleanupAndReturn;
986     }
987 
988     if (gSearchTZFileResult == NULL) {
989         gSearchTZFileResult = new CharString;
990         if (gSearchTZFileResult == NULL) {
991             goto cleanupAndReturn;
992         }
993         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
994     }
995 
996     /* Check each entry in the directory. */
997     while((dirEntry = readdir(dirp)) != NULL) {
998         const char* dirName = dirEntry->d_name;
999         if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
1000             && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
1001             /* Create a newpath with the new entry to test each entry in the directory. */
1002             CharString newpath(curpath, status);
1003             newpath.append(dirName, -1, status);
1004             if (U_FAILURE(status)) {
1005                 break;
1006             }
1007 
1008             DIR* subDirp = NULL;
1009             if ((subDirp = opendir(newpath.data())) != NULL) {
1010                 /* If this new path is a directory, make a recursive call with the newpath. */
1011                 closedir(subDirp);
1012                 newpath.append('/', status);
1013                 if (U_FAILURE(status)) {
1014                     break;
1015                 }
1016                 result = searchForTZFile(newpath.data(), tzInfo);
1017                 /*
1018                  Have to get out here. Otherwise, we'd keep looking
1019                  and return the first match in the top-level directory
1020                  if there's a match in the top-level. If not, this function
1021                  would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
1022                  It worked without this in most cases because we have a fallback of calling
1023                  localtime_r to figure out the default timezone.
1024                 */
1025                 if (result != NULL)
1026                     break;
1027             } else {
1028                 if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
1029                     int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
1030                     if (amountToSkip > newpath.length()) {
1031                         amountToSkip = newpath.length();
1032                     }
1033                     const char* zoneid = newpath.data() + amountToSkip;
1034                     skipZoneIDPrefix(&zoneid);
1035                     gSearchTZFileResult->clear();
1036                     gSearchTZFileResult->append(zoneid, -1, status);
1037                     if (U_FAILURE(status)) {
1038                         break;
1039                     }
1040                     result = gSearchTZFileResult->data();
1041                     /* Get out after the first one found. */
1042                     break;
1043                 }
1044             }
1045         }
1046     }
1047 
1048   cleanupAndReturn:
1049     if (dirp) {
1050         closedir(dirp);
1051     }
1052     return result;
1053 }
1054 #endif
1055 
1056 #if U_PLATFORM == U_PF_ANDROID
1057 typedef int(system_property_read_callback)(const prop_info* info,
1058                                            void (*callback)(void* cookie,
1059                                                             const char* name,
1060                                                             const char* value,
1061                                                             uint32_t serial),
1062                                            void* cookie);
1063 typedef int(system_property_get)(const char*, char*);
1064 
1065 static char gAndroidTimeZone[PROP_VALUE_MAX] = { '\0' };
1066 
u_property_read(void * cookie,const char * name,const char * value,uint32_t serial)1067 static void u_property_read(void* cookie, const char* name, const char* value,
1068                             uint32_t serial) {
1069     uprv_strcpy((char* )cookie, value);
1070 }
1071 #endif
1072 
1073 U_CAPI void U_EXPORT2
uprv_tzname_clear_cache(void)1074 uprv_tzname_clear_cache(void)
1075 {
1076 #if U_PLATFORM == U_PF_ANDROID
1077     /* Android's timezone is stored in system property. */
1078     gAndroidTimeZone[0] = '\0';
1079     void* libc = dlopen("libc.so", RTLD_NOLOAD);
1080     if (libc) {
1081         /* Android API 26+ has new API to get system property and old API
1082          * (__system_property_get) is deprecated */
1083         system_property_read_callback* property_read_callback =
1084             (system_property_read_callback*)dlsym(
1085                 libc, "__system_property_read_callback");
1086         if (property_read_callback) {
1087             const prop_info* info =
1088                 __system_property_find("persist.sys.timezone");
1089             if (info) {
1090                 property_read_callback(info, &u_property_read, gAndroidTimeZone);
1091             }
1092         } else {
1093             system_property_get* property_get =
1094                 (system_property_get*)dlsym(libc, "__system_property_get");
1095             if (property_get) {
1096                 property_get("persist.sys.timezone", gAndroidTimeZone);
1097             }
1098         }
1099         dlclose(libc);
1100     }
1101 #endif
1102 
1103 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1104     gTimeZoneBufferPtr = NULL;
1105 #endif
1106 }
1107 
1108 U_CAPI const char* U_EXPORT2
uprv_tzname(int n)1109 uprv_tzname(int n)
1110 {
1111     (void)n; // Avoid unreferenced parameter warning.
1112     const char *tzid = NULL;
1113 #if U_PLATFORM_USES_ONLY_WIN32_API
1114     tzid = uprv_detectWindowsTimeZone();
1115 
1116     if (tzid != NULL) {
1117         return tzid;
1118     }
1119 
1120 #ifndef U_TZNAME
1121     // The return value is free'd in timezone.cpp on Windows because
1122     // the other code path returns a pointer to a heap location.
1123     // If we don't have a name already, then tzname wouldn't be any
1124     // better, so just fall back.
1125     return uprv_strdup("");
1126 #endif // !U_TZNAME
1127 
1128 #else
1129 
1130 /*#if U_PLATFORM_IS_DARWIN_BASED
1131     int ret;
1132 
1133     tzid = getenv("TZFILE");
1134     if (tzid != NULL) {
1135         return tzid;
1136     }
1137 #endif*/
1138 
1139 /* This code can be temporarily disabled to test tzname resolution later on. */
1140 #ifndef DEBUG_TZNAME
1141 #if U_PLATFORM == U_PF_ANDROID
1142     tzid = gAndroidTimeZone;
1143 #else
1144     tzid = getenv("TZ");
1145 #endif
1146     if (tzid != NULL && isValidOlsonID(tzid)
1147 #if U_PLATFORM == U_PF_SOLARIS
1148     /* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */
1149         && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1150 #endif
1151     ) {
1152         /* The colon forces tzset() to treat the remainder as zoneinfo path */
1153         if (tzid[0] == ':') {
1154             tzid++;
1155         }
1156         /* This might be a good Olson ID. */
1157         skipZoneIDPrefix(&tzid);
1158         return tzid;
1159     }
1160     /* else U_TZNAME will give a better result. */
1161 #endif
1162 
1163 #if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1164     /* Caller must handle threading issues */
1165     if (gTimeZoneBufferPtr == NULL) {
1166         /*
1167         This is a trick to look at the name of the link to get the Olson ID
1168         because the tzfile contents is underspecified.
1169         This isn't guaranteed to work because it may not be a symlink.
1170         */
1171         int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
1172         if (0 < ret) {
1173             int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
1174             gTimeZoneBuffer[ret] = 0;
1175             char *  tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1176 
1177             if (tzZoneInfoTailPtr != NULL
1178                 && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
1179             {
1180                 return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
1181             }
1182         } else {
1183 #if defined(SEARCH_TZFILE)
1184             DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1185             if (tzInfo != NULL) {
1186                 tzInfo->defaultTZBuffer = NULL;
1187                 tzInfo->defaultTZFileSize = 0;
1188                 tzInfo->defaultTZFilePtr = NULL;
1189                 tzInfo->defaultTZstatus = FALSE;
1190                 tzInfo->defaultTZPosition = 0;
1191 
1192                 gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1193 
1194                 /* Free previously allocated memory */
1195                 if (tzInfo->defaultTZBuffer != NULL) {
1196                     uprv_free(tzInfo->defaultTZBuffer);
1197                 }
1198                 if (tzInfo->defaultTZFilePtr != NULL) {
1199                     fclose(tzInfo->defaultTZFilePtr);
1200                 }
1201                 uprv_free(tzInfo);
1202             }
1203 
1204             if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1205                 return gTimeZoneBufferPtr;
1206             }
1207 #endif
1208         }
1209     }
1210     else {
1211         return gTimeZoneBufferPtr;
1212     }
1213 #endif
1214 #endif
1215 
1216 #ifdef U_TZNAME
1217 #if U_PLATFORM_USES_ONLY_WIN32_API
1218     /* The return value is free'd in timezone.cpp on Windows because
1219      * the other code path returns a pointer to a heap location. */
1220     return uprv_strdup(U_TZNAME[n]);
1221 #else
1222     /*
1223     U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1224     So we remap the abbreviation to an olson ID.
1225 
1226     Since Windows exposes a little more timezone information,
1227     we normally don't use this code on Windows because
1228     uprv_detectWindowsTimeZone should have already given the correct answer.
1229     */
1230     {
1231         struct tm juneSol, decemberSol;
1232         int daylightType;
1233         static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1234         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1235 
1236         /* This probing will tell us when daylight savings occurs.  */
1237         localtime_r(&juneSolstice, &juneSol);
1238         localtime_r(&decemberSolstice, &decemberSol);
1239         if(decemberSol.tm_isdst > 0) {
1240           daylightType = U_DAYLIGHT_DECEMBER;
1241         } else if(juneSol.tm_isdst > 0) {
1242           daylightType = U_DAYLIGHT_JUNE;
1243         } else {
1244           daylightType = U_DAYLIGHT_NONE;
1245         }
1246         tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1247         if (tzid != NULL) {
1248             return tzid;
1249         }
1250     }
1251     return U_TZNAME[n];
1252 #endif
1253 #else
1254     return "";
1255 #endif
1256 }
1257 
1258 /* Get and set the ICU data directory --------------------------------------- */
1259 
1260 static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
1261 static char *gDataDirectory = NULL;
1262 
1263 UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1264 static CharString *gTimeZoneFilesDirectory = NULL;
1265 
1266 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1267  static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
1268  static bool gCorrectedPOSIXLocaleHeapAllocated = false;
1269 #endif
1270 
putil_cleanup(void)1271 static UBool U_CALLCONV putil_cleanup(void)
1272 {
1273     if (gDataDirectory && *gDataDirectory) {
1274         uprv_free(gDataDirectory);
1275     }
1276     gDataDirectory = NULL;
1277     gDataDirInitOnce.reset();
1278 
1279     delete gTimeZoneFilesDirectory;
1280     gTimeZoneFilesDirectory = NULL;
1281     gTimeZoneFilesInitOnce.reset();
1282 
1283 #ifdef SEARCH_TZFILE
1284     delete gSearchTZFileResult;
1285     gSearchTZFileResult = NULL;
1286 #endif
1287 
1288 #if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1289     if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
1290         uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
1291         gCorrectedPOSIXLocale = NULL;
1292         gCorrectedPOSIXLocaleHeapAllocated = false;
1293     }
1294 #endif
1295     return TRUE;
1296 }
1297 
1298 /*
1299  * Set the data directory.
1300  *    Make a copy of the passed string, and set the global data dir to point to it.
1301  */
1302 U_CAPI void U_EXPORT2
u_setDataDirectory(const char * directory)1303 u_setDataDirectory(const char *directory) {
1304     char *newDataDir;
1305     int32_t length;
1306 
1307     if(directory==NULL || *directory==0) {
1308         /* A small optimization to prevent the malloc and copy when the
1309         shared library is used, and this is a way to make sure that NULL
1310         is never returned.
1311         */
1312         newDataDir = (char *)"";
1313     }
1314     else {
1315         length=(int32_t)uprv_strlen(directory);
1316         newDataDir = (char *)uprv_malloc(length + 2);
1317         /* Exit out if newDataDir could not be created. */
1318         if (newDataDir == NULL) {
1319             return;
1320         }
1321         uprv_strcpy(newDataDir, directory);
1322 
1323 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1324         {
1325             char *p;
1326             while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
1327                 *p = U_FILE_SEP_CHAR;
1328             }
1329         }
1330 #endif
1331     }
1332 
1333     if (gDataDirectory && *gDataDirectory) {
1334         uprv_free(gDataDirectory);
1335     }
1336     gDataDirectory = newDataDir;
1337     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1338 }
1339 
1340 U_CAPI UBool U_EXPORT2
uprv_pathIsAbsolute(const char * path)1341 uprv_pathIsAbsolute(const char *path)
1342 {
1343   if(!path || !*path) {
1344     return FALSE;
1345   }
1346 
1347   if(*path == U_FILE_SEP_CHAR) {
1348     return TRUE;
1349   }
1350 
1351 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1352   if(*path == U_FILE_ALT_SEP_CHAR) {
1353     return TRUE;
1354   }
1355 #endif
1356 
1357 #if U_PLATFORM_USES_ONLY_WIN32_API
1358   if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1359        ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1360       path[1] == ':' ) {
1361     return TRUE;
1362   }
1363 #endif
1364 
1365   return FALSE;
1366 }
1367 
1368 /* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1369    (needed for some Darwin ICU build environments) */
1370 #if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR
1371 # if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1372 #  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1373 # endif
1374 #endif
1375 
1376 #if defined(ICU_DATA_DIR_WINDOWS)
1377 // Helper function to get the ICU Data Directory under the Windows directory location.
getIcuDataDirectoryUnderWindowsDirectory(char * directoryBuffer,UINT bufferLength)1378 static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
1379 {
1380     wchar_t windowsPath[MAX_PATH];
1381     char windowsPathUtf8[MAX_PATH];
1382 
1383     UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
1384     if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
1385         // Convert UTF-16 to a UTF-8 string.
1386         UErrorCode status = U_ZERO_ERROR;
1387         int32_t windowsPathUtf8Len = 0;
1388         u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
1389             &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
1390 
1391         if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
1392             (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
1393             // Ensure it always has a separator, so we can append the ICU data path.
1394             if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
1395                 windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
1396                 windowsPathUtf8[windowsPathUtf8Len] = '\0';
1397             }
1398             // Check if the concatenated string will fit.
1399             if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
1400                 uprv_strcpy(directoryBuffer, windowsPathUtf8);
1401                 uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
1402                 return TRUE;
1403             }
1404         }
1405     }
1406 
1407     return FALSE;
1408 }
1409 #endif
1410 
dataDirectoryInitFn()1411 static void U_CALLCONV dataDirectoryInitFn() {
1412     /* If we already have the directory, then return immediately. Will happen if user called
1413      * u_setDataDirectory().
1414      */
1415     if (gDataDirectory) {
1416         return;
1417     }
1418 
1419     const char *path = NULL;
1420 #if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1421     char datadir_path_buffer[PATH_MAX];
1422 #endif
1423 
1424     /*
1425     When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1426     override ICU's data with the ICU_DATA environment variable. This prevents
1427     problems where multiple custom copies of ICU's specific version of data
1428     are installed on a system. Either the application must define the data
1429     directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1430     ICU, set the data with udata_setCommonData or trust that all of the
1431     required data is contained in ICU's data library that contains
1432     the entry point defined by U_ICUDATA_ENTRY_POINT.
1433 
1434     There may also be some platforms where environment variables
1435     are not allowed.
1436     */
1437 #   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1438     /* First try to get the environment variable */
1439 #     if U_PLATFORM_HAS_WINUWP_API == 0  // Windows UWP does not support getenv
1440         path=getenv("ICU_DATA");
1441 #     endif
1442 #   endif
1443 
1444     /* ICU_DATA_DIR may be set as a compile option.
1445      * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1446      * and is used only when data is built in archive mode eliminating the need
1447      * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1448      * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1449      * set their own path.
1450      */
1451 #if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1452     if(path==NULL || *path==0) {
1453 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1454         const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1455 # endif
1456 # ifdef ICU_DATA_DIR
1457         path=ICU_DATA_DIR;
1458 # else
1459         path=U_ICU_DATA_DEFAULT_DIR;
1460 # endif
1461 # if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1462         if (prefix != NULL) {
1463             snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1464             path=datadir_path_buffer;
1465         }
1466 # endif
1467     }
1468 #endif
1469 
1470 #if defined(ICU_DATA_DIR_WINDOWS)
1471     char datadir_path_buffer[MAX_PATH];
1472     if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1473         path = datadir_path_buffer;
1474     }
1475 #endif
1476 
1477     if(path==NULL) {
1478         /* It looks really bad, set it to something. */
1479         path = "";
1480     }
1481 
1482     u_setDataDirectory(path);
1483     return;
1484 }
1485 
1486 U_CAPI const char * U_EXPORT2
u_getDataDirectory(void)1487 u_getDataDirectory(void) {
1488     umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1489     return gDataDirectory;
1490 }
1491 
setTimeZoneFilesDir(const char * path,UErrorCode & status)1492 static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1493     if (U_FAILURE(status)) {
1494         return;
1495     }
1496     gTimeZoneFilesDirectory->clear();
1497     gTimeZoneFilesDirectory->append(path, status);
1498 #if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1499     char *p = gTimeZoneFilesDirectory->data();
1500     while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
1501         *p = U_FILE_SEP_CHAR;
1502     }
1503 #endif
1504 }
1505 
1506 #define TO_STRING(x) TO_STRING_2(x)
1507 #define TO_STRING_2(x) #x
1508 
TimeZoneDataDirInitFn(UErrorCode & status)1509 static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1510     U_ASSERT(gTimeZoneFilesDirectory == NULL);
1511     ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1512     gTimeZoneFilesDirectory = new CharString();
1513     if (gTimeZoneFilesDirectory == NULL) {
1514         status = U_MEMORY_ALLOCATION_ERROR;
1515         return;
1516     }
1517 
1518     const char *dir = "";
1519 
1520 #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1521     char timezonefilesdir_path_buffer[PATH_MAX];
1522     const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR);
1523 #endif
1524 
1525 #if U_PLATFORM_HAS_WINUWP_API == 1
1526 // The UWP version does not support the environment variable setting.
1527 
1528 # if defined(ICU_DATA_DIR_WINDOWS)
1529     // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
1530     char datadir_path_buffer[MAX_PATH];
1531     if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1532         dir = datadir_path_buffer;
1533     }
1534 # endif
1535 
1536 #else
1537     dir = getenv("ICU_TIMEZONE_FILES_DIR");
1538 #endif // U_PLATFORM_HAS_WINUWP_API
1539 
1540 #if defined(U_TIMEZONE_FILES_DIR)
1541     if (dir == NULL) {
1542         // Build time configuration setting.
1543         dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1544     }
1545 #endif
1546 
1547     if (dir == NULL) {
1548         dir = "";
1549     }
1550 
1551 #if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1552     if (prefix != NULL) {
1553         snprintf(timezonefilesdir_path_buffer, PATH_MAX, "%s%s", prefix, dir);
1554         dir = timezonefilesdir_path_buffer;
1555     }
1556 #endif
1557 
1558     setTimeZoneFilesDir(dir, status);
1559 }
1560 
1561 
1562 U_CAPI const char * U_EXPORT2
u_getTimeZoneFilesDirectory(UErrorCode * status)1563 u_getTimeZoneFilesDirectory(UErrorCode *status) {
1564     umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1565     return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1566 }
1567 
1568 U_CAPI void U_EXPORT2
u_setTimeZoneFilesDirectory(const char * path,UErrorCode * status)1569 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1570     umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1571     setTimeZoneFilesDir(path, *status);
1572 
1573     // Note: this function does some extra churn, first setting based on the
1574     //       environment, then immediately replacing with the value passed in.
1575     //       The logic is simpler that way, and performance shouldn't be an issue.
1576 }
1577 
1578 
1579 #if U_POSIX_LOCALE
1580 /* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1581  * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1582  * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1583  */
uprv_getPOSIXIDForCategory(int category)1584 static const char *uprv_getPOSIXIDForCategory(int category)
1585 {
1586     const char* posixID = NULL;
1587     if (category == LC_MESSAGES || category == LC_CTYPE) {
1588         /*
1589         * On Solaris two different calls to setlocale can result in
1590         * different values. Only get this value once.
1591         *
1592         * We must check this first because an application can set this.
1593         *
1594         * LC_ALL can't be used because it's platform dependent. The LANG
1595         * environment variable seems to affect LC_CTYPE variable by default.
1596         * Here is what setlocale(LC_ALL, NULL) can return.
1597         * HPUX can return 'C C C C C C C'
1598         * Solaris can return /en_US/C/C/C/C/C on the second try.
1599         * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1600         *
1601         * The default codepage detection also needs to use LC_CTYPE.
1602         *
1603         * Do not call setlocale(LC_*, "")! Using an empty string instead
1604         * of NULL, will modify the libc behavior.
1605         */
1606         posixID = setlocale(category, NULL);
1607         if ((posixID == 0)
1608             || (uprv_strcmp("C", posixID) == 0)
1609             || (uprv_strcmp("POSIX", posixID) == 0))
1610         {
1611             /* Maybe we got some garbage.  Try something more reasonable */
1612             posixID = getenv("LC_ALL");
1613             /* Solaris speaks POSIX -  See IEEE Std 1003.1-2008
1614              * This is needed to properly handle empty env. variables
1615              */
1616 #if U_PLATFORM == U_PF_SOLARIS
1617             if ((posixID == 0) || (posixID[0] == '\0')) {
1618                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1619                 if ((posixID == 0) || (posixID[0] == '\0')) {
1620 #else
1621             if (posixID == 0) {
1622                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1623                 if (posixID == 0) {
1624 #endif
1625                     posixID = getenv("LANG");
1626                 }
1627             }
1628         }
1629     }
1630     if ((posixID==0)
1631         || (uprv_strcmp("C", posixID) == 0)
1632         || (uprv_strcmp("POSIX", posixID) == 0))
1633     {
1634         /* Nothing worked.  Give it a nice POSIX default value. */
1635         posixID = "en_US_POSIX";
1636         // Note: this test will not catch 'C.UTF-8',
1637         // that will be handled in uprv_getDefaultLocaleID().
1638         // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
1639         // caller which expects to see "en_US_POSIX" in many branches.
1640     }
1641     return posixID;
1642 }
1643 
1644 /* Return just the POSIX id for the default locale, whatever happens to be in
1645  * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1646  */
1647 static const char *uprv_getPOSIXIDForDefaultLocale(void)
1648 {
1649     static const char* posixID = NULL;
1650     if (posixID == 0) {
1651         posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1652     }
1653     return posixID;
1654 }
1655 
1656 #if !U_CHARSET_IS_UTF8
1657 /* Return just the POSIX id for the default codepage, whatever happens to be in
1658  * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1659  */
1660 static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1661 {
1662     static const char* posixID = NULL;
1663     if (posixID == 0) {
1664         posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1665     }
1666     return posixID;
1667 }
1668 #endif
1669 #endif
1670 
1671 /* NOTE: The caller should handle thread safety */
1672 U_CAPI const char* U_EXPORT2
1673 uprv_getDefaultLocaleID()
1674 {
1675 #if U_POSIX_LOCALE
1676 /*
1677   Note that:  (a '!' means the ID is improper somehow)
1678      LC_ALL  ---->     default_loc          codepage
1679 --------------------------------------------------------
1680      ab.CD             ab                   CD
1681      ab@CD             ab__CD               -
1682      ab@CD.EF          ab__CD               EF
1683 
1684      ab_CD.EF@GH       ab_CD_GH             EF
1685 
1686 Some 'improper' ways to do the same as above:
1687   !  ab_CD@GH.EF       ab_CD_GH             EF
1688   !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1689   !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1690 
1691      _CD@GH            _CD_GH               -
1692      _CD.EF@GH         _CD_GH               EF
1693 
1694 The variant cannot have dots in it.
1695 The 'rightmost' variant (@xxx) wins.
1696 The leftmost codepage (.xxx) wins.
1697 */
1698     const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1699 
1700     /* Format: (no spaces)
1701     ll [ _CC ] [ . MM ] [ @ VV]
1702 
1703       l = lang, C = ctry, M = charmap, V = variant
1704     */
1705 
1706     if (gCorrectedPOSIXLocale != nullptr) {
1707         return gCorrectedPOSIXLocale;
1708     }
1709 
1710     // Copy the ID into owned memory.
1711     // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
1712     char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
1713     if (correctedPOSIXLocale == nullptr) {
1714         return nullptr;
1715     }
1716     uprv_strcpy(correctedPOSIXLocale, posixID);
1717 
1718     char *limit;
1719     if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
1720         *limit = 0;
1721     }
1722     if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1723         *limit = 0;
1724     }
1725 
1726     if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
1727         || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
1728       // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
1729       // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
1730       uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
1731     }
1732 
1733     /* Note that we scan the *uncorrected* ID. */
1734     const char *p;
1735     if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
1736         p++;
1737 
1738         /* Take care of any special cases here.. */
1739         if (!uprv_strcmp(p, "nynorsk")) {
1740             p = "NY";
1741             /* Don't worry about no__NY. In practice, it won't appear. */
1742         }
1743 
1744         if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
1745             uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
1746         }
1747         else {
1748             uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1749         }
1750 
1751         const char *q;
1752         if ((q = uprv_strchr(p, '.')) != nullptr) {
1753             /* How big will the resulting string be? */
1754             int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1755             uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
1756             correctedPOSIXLocale[len] = 0;
1757         }
1758         else {
1759             /* Anything following the @ sign */
1760             uprv_strcat(correctedPOSIXLocale, p);
1761         }
1762 
1763         /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1764          * How about 'russian' -> 'ru'?
1765          * Many of the other locales using ISO codes will be handled by the
1766          * canonicalization functions in uloc_getDefault.
1767          */
1768     }
1769 
1770     if (gCorrectedPOSIXLocale == nullptr) {
1771         gCorrectedPOSIXLocale = correctedPOSIXLocale;
1772         gCorrectedPOSIXLocaleHeapAllocated = true;
1773         ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1774         correctedPOSIXLocale = nullptr;
1775     }
1776     posixID = gCorrectedPOSIXLocale;
1777 
1778     if (correctedPOSIXLocale != nullptr) {  /* Was already set - clean up. */
1779         uprv_free(correctedPOSIXLocale);
1780     }
1781 
1782     return posixID;
1783 
1784 #elif U_PLATFORM_USES_ONLY_WIN32_API
1785 #define POSIX_LOCALE_CAPACITY 64
1786     UErrorCode status = U_ZERO_ERROR;
1787     char *correctedPOSIXLocale = nullptr;
1788 
1789     // If we have already figured this out just use the cached value
1790     if (gCorrectedPOSIXLocale != nullptr) {
1791         return gCorrectedPOSIXLocale;
1792     }
1793 
1794     // No cached value, need to determine the current value
1795     static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1796     int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
1797 
1798     // Now we should have a Windows locale name that needs converted to the POSIX style.
1799     if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
1800     {
1801         // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1802         char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1803 
1804         int32_t i;
1805         for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1806         {
1807             if (windowsLocale[i] == '_')
1808             {
1809                 modifiedWindowsLocale[i] = '-';
1810             }
1811             else
1812             {
1813                 modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1814             }
1815 
1816             if (modifiedWindowsLocale[i] == '\0')
1817             {
1818                 break;
1819             }
1820         }
1821 
1822         if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1823         {
1824             // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1825             // locale when tags are dropped
1826             modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1827         }
1828 
1829         // Now normalize the resulting name
1830         correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1831         /* TODO: Should we just exit on memory allocation failure? */
1832         if (correctedPOSIXLocale)
1833         {
1834             int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1835             if (U_SUCCESS(status))
1836             {
1837                 *(correctedPOSIXLocale + posixLen) = 0;
1838                 gCorrectedPOSIXLocale = correctedPOSIXLocale;
1839                 gCorrectedPOSIXLocaleHeapAllocated = true;
1840                 ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1841             }
1842             else
1843             {
1844                 uprv_free(correctedPOSIXLocale);
1845             }
1846         }
1847     }
1848 
1849     // If unable to find a locale we can agree upon, use en-US by default
1850     if (gCorrectedPOSIXLocale == nullptr) {
1851         gCorrectedPOSIXLocale = "en_US";
1852     }
1853     return gCorrectedPOSIXLocale;
1854 
1855 #elif U_PLATFORM == U_PF_OS400
1856     /* locales are process scoped and are by definition thread safe */
1857     static char correctedLocale[64];
1858     const  char *localeID = getenv("LC_ALL");
1859            char *p;
1860 
1861     if (localeID == NULL)
1862         localeID = getenv("LANG");
1863     if (localeID == NULL)
1864         localeID = setlocale(LC_ALL, NULL);
1865     /* Make sure we have something... */
1866     if (localeID == NULL)
1867         return "en_US_POSIX";
1868 
1869     /* Extract the locale name from the path. */
1870     if((p = uprv_strrchr(localeID, '/')) != NULL)
1871     {
1872         /* Increment p to start of locale name. */
1873         p++;
1874         localeID = p;
1875     }
1876 
1877     /* Copy to work location. */
1878     uprv_strcpy(correctedLocale, localeID);
1879 
1880     /* Strip off the '.locale' extension. */
1881     if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1882         *p = 0;
1883     }
1884 
1885     /* Upper case the locale name. */
1886     T_CString_toUpperCase(correctedLocale);
1887 
1888     /* See if we are using the POSIX locale.  Any of the
1889     * following are equivalent and use the same QLGPGCMA
1890     * (POSIX) locale.
1891     * QLGPGCMA2 means UCS2
1892     * QLGPGCMA_4 means UTF-32
1893     * QLGPGCMA_8 means UTF-8
1894     */
1895     if ((uprv_strcmp("C", correctedLocale) == 0) ||
1896         (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1897         (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1898     {
1899         uprv_strcpy(correctedLocale, "en_US_POSIX");
1900     }
1901     else
1902     {
1903         int16_t LocaleLen;
1904 
1905         /* Lower case the lang portion. */
1906         for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1907         {
1908             *p = uprv_tolower(*p);
1909         }
1910 
1911         /* Adjust for Euro.  After '_E' add 'URO'. */
1912         LocaleLen = uprv_strlen(correctedLocale);
1913         if (correctedLocale[LocaleLen - 2] == '_' &&
1914             correctedLocale[LocaleLen - 1] == 'E')
1915         {
1916             uprv_strcat(correctedLocale, "URO");
1917         }
1918 
1919         /* If using Lotus-based locale then convert to
1920          * equivalent non Lotus.
1921          */
1922         else if (correctedLocale[LocaleLen - 2] == '_' &&
1923             correctedLocale[LocaleLen - 1] == 'L')
1924         {
1925             correctedLocale[LocaleLen - 2] = 0;
1926         }
1927 
1928         /* There are separate simplified and traditional
1929          * locales called zh_HK_S and zh_HK_T.
1930          */
1931         else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1932         {
1933             uprv_strcpy(correctedLocale, "zh_HK");
1934         }
1935 
1936         /* A special zh_CN_GBK locale...
1937         */
1938         else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1939         {
1940             uprv_strcpy(correctedLocale, "zh_CN");
1941         }
1942 
1943     }
1944 
1945     return correctedLocale;
1946 #endif
1947 
1948 }
1949 
1950 #if !U_CHARSET_IS_UTF8
1951 #if U_POSIX_LOCALE
1952 /*
1953 Due to various platform differences, one platform may specify a charset,
1954 when they really mean a different charset. Remap the names so that they are
1955 compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1956 here. Before adding anything to this function, please consider adding unique
1957 names to the ICU alias table in the data directory.
1958 */
1959 static const char*
1960 remapPlatformDependentCodepage(const char *locale, const char *name) {
1961     if (locale != NULL && *locale == 0) {
1962         /* Make sure that an empty locale is handled the same way. */
1963         locale = NULL;
1964     }
1965     if (name == NULL) {
1966         return NULL;
1967     }
1968 #if U_PLATFORM == U_PF_AIX
1969     if (uprv_strcmp(name, "IBM-943") == 0) {
1970         /* Use the ASCII compatible ibm-943 */
1971         name = "Shift-JIS";
1972     }
1973     else if (uprv_strcmp(name, "IBM-1252") == 0) {
1974         /* Use the windows-1252 that contains the Euro */
1975         name = "IBM-5348";
1976     }
1977 #elif U_PLATFORM == U_PF_SOLARIS
1978     if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1979         /* Solaris underspecifies the "EUC" name. */
1980         if (uprv_strcmp(locale, "zh_CN") == 0) {
1981             name = "EUC-CN";
1982         }
1983         else if (uprv_strcmp(locale, "zh_TW") == 0) {
1984             name = "EUC-TW";
1985         }
1986         else if (uprv_strcmp(locale, "ko_KR") == 0) {
1987             name = "EUC-KR";
1988         }
1989     }
1990     else if (uprv_strcmp(name, "eucJP") == 0) {
1991         /*
1992         ibm-954 is the best match.
1993         ibm-33722 is the default for eucJP (similar to Windows).
1994         */
1995         name = "eucjis";
1996     }
1997     else if (uprv_strcmp(name, "646") == 0) {
1998         /*
1999          * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
2000          * ISO-8859-1 instead of US-ASCII(646).
2001          */
2002         name = "ISO-8859-1";
2003     }
2004 #elif U_PLATFORM_IS_DARWIN_BASED
2005     if (locale == NULL && *name == 0) {
2006         /*
2007         No locale was specified, and an empty name was passed in.
2008         This usually indicates that nl_langinfo didn't return valid information.
2009         Mac OS X uses UTF-8 by default (especially the locale data and console).
2010         */
2011         name = "UTF-8";
2012     }
2013     else if (uprv_strcmp(name, "CP949") == 0) {
2014         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2015         name = "EUC-KR";
2016     }
2017     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
2018         /*
2019          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2020          */
2021         name = "UTF-8";
2022     }
2023 #elif U_PLATFORM == U_PF_BSD
2024     if (uprv_strcmp(name, "CP949") == 0) {
2025         /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2026         name = "EUC-KR";
2027     }
2028 #elif U_PLATFORM == U_PF_HPUX
2029     if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
2030         /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2031         /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2032         name = "hkbig5";
2033     }
2034     else if (uprv_strcmp(name, "eucJP") == 0) {
2035         /*
2036         ibm-1350 is the best match, but unavailable.
2037         ibm-954 is mostly a superset of ibm-1350.
2038         ibm-33722 is the default for eucJP (similar to Windows).
2039         */
2040         name = "eucjis";
2041     }
2042 #elif U_PLATFORM == U_PF_LINUX
2043     if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
2044         /* Linux underspecifies the "EUC" name. */
2045         if (uprv_strcmp(locale, "korean") == 0) {
2046             name = "EUC-KR";
2047         }
2048         else if (uprv_strcmp(locale, "japanese") == 0) {
2049             /* See comment below about eucJP */
2050             name = "eucjis";
2051         }
2052     }
2053     else if (uprv_strcmp(name, "eucjp") == 0) {
2054         /*
2055         ibm-1350 is the best match, but unavailable.
2056         ibm-954 is mostly a superset of ibm-1350.
2057         ibm-33722 is the default for eucJP (similar to Windows).
2058         */
2059         name = "eucjis";
2060     }
2061     else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
2062             (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
2063         /*
2064          * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2065          */
2066         name = "UTF-8";
2067     }
2068     /*
2069      * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2070      * it by falling back to 'US-ASCII' when NULL is returned from this
2071      * function. So, we don't have to worry about it here.
2072      */
2073 #endif
2074     /* return NULL when "" is passed in */
2075     if (*name == 0) {
2076         name = NULL;
2077     }
2078     return name;
2079 }
2080 
2081 static const char*
2082 getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2083 {
2084     char localeBuf[100];
2085     const char *name = NULL;
2086     char *variant = NULL;
2087 
2088     if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
2089         size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2090 #ifdef U_STRINGI_PATCHES
2091         uprv_strncpy(localeBuf, localeName, localeCapacity-1);
2092         name = uprv_strncpy(buffer, name+1, buffCapacity-1);
2093 #else /* !U_STRINGI_PATCHES */
2094         uprv_strncpy(localeBuf, localeName, localeCapacity);
2095         name = uprv_strncpy(buffer, name+1, buffCapacity);
2096 #endif /* U_STRINGI_PATCHES */
2097         localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
2098         buffer[buffCapacity-1] = 0; /* ensure NULL termination */
2099         if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
2100             *variant = 0;
2101         }
2102         name = remapPlatformDependentCodepage(localeBuf, name);
2103     }
2104     return name;
2105 }
2106 #endif
2107 
2108 static const char*
2109 int_getDefaultCodepage()
2110 {
2111 #if U_PLATFORM == U_PF_OS400
2112     uint32_t ccsid = 37; /* Default to ibm-37 */
2113     static char codepage[64];
2114     Qwc_JOBI0400_t jobinfo;
2115     Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2116 
2117     EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2118         "*                         ", "                ", &error);
2119 
2120     if (error.Bytes_Available == 0) {
2121         if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2122             ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2123         }
2124         else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2125             ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2126         }
2127         /* else use the default */
2128     }
2129     sprintf(codepage,"ibm-%d", ccsid);
2130     return codepage;
2131 
2132 #elif U_PLATFORM == U_PF_OS390
2133     static char codepage[64];
2134 
2135     strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2136     strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
2137     codepage[63] = 0; /* NULL terminate */
2138 
2139     return codepage;
2140 
2141 #elif U_PLATFORM_USES_ONLY_WIN32_API
2142     static char codepage[64];
2143     DWORD codepageNumber = 0;
2144 
2145 #if U_PLATFORM_HAS_WINUWP_API == 1
2146     // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2147     // have folks use Unicode than a "system" code page, however this is the same
2148     // codepage as the system default locale codepage.  (FWIW, the system locale is
2149     // ONLY used for codepage, it should never be used for anything else)
2150     GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2151         (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2152 #else
2153     // Win32 apps can call GetACP
2154     codepageNumber = GetACP();
2155 #endif
2156     // Special case for UTF-8
2157     if (codepageNumber == 65001)
2158     {
2159         return "UTF-8";
2160     }
2161     // Windows codepages can look like windows-1252, so format the found number
2162     // the numbers are eclectic, however all valid system code pages, besides UTF-8
2163     // are between 3 and 19999
2164     if (codepageNumber > 0 && codepageNumber < 20000)
2165     {
2166         sprintf(codepage, "windows-%ld", codepageNumber);
2167         return codepage;
2168     }
2169     // If the codepage number call failed then return UTF-8
2170     return "UTF-8";
2171 
2172 #elif U_POSIX_LOCALE
2173     static char codesetName[100];
2174     const char *localeName = NULL;
2175     const char *name = NULL;
2176 
2177     localeName = uprv_getPOSIXIDForDefaultCodepage();
2178     uprv_memset(codesetName, 0, sizeof(codesetName));
2179     /* On Solaris nl_langinfo returns C locale values unless setlocale
2180      * was called earlier.
2181      */
2182 #if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2183     /* When available, check nl_langinfo first because it usually gives more
2184        useful names. It depends on LC_CTYPE.
2185        nl_langinfo may use the same buffer as setlocale. */
2186     {
2187         const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
2188 #if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2189         /*
2190          * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2191          * instead of ASCII.
2192          */
2193         if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2194             codeset = remapPlatformDependentCodepage(localeName, codeset);
2195         } else
2196 #endif
2197         {
2198             codeset = remapPlatformDependentCodepage(NULL, codeset);
2199         }
2200 
2201         if (codeset != NULL) {
2202             uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2203             codesetName[sizeof(codesetName)-1] = 0;
2204             return codesetName;
2205         }
2206     }
2207 #endif
2208 
2209     /* Use setlocale in a nice way, and then check some environment variables.
2210        Maybe the application used setlocale already.
2211     */
2212     uprv_memset(codesetName, 0, sizeof(codesetName));
2213     name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2214     if (name) {
2215         /* if we can find the codeset name from setlocale, return that. */
2216         return name;
2217     }
2218 
2219     if (*codesetName == 0)
2220     {
2221         /* Everything failed. Return US ASCII (ISO 646). */
2222         (void)uprv_strcpy(codesetName, "US-ASCII");
2223     }
2224     return codesetName;
2225 #else
2226     return "US-ASCII";
2227 #endif
2228 }
2229 
2230 
2231 U_CAPI const char*  U_EXPORT2
2232 uprv_getDefaultCodepage()
2233 {
2234     static char const  *name = NULL;
2235     umtx_lock(NULL);
2236     if (name == NULL) {
2237         name = int_getDefaultCodepage();
2238     }
2239     umtx_unlock(NULL);
2240     return name;
2241 }
2242 #endif  /* !U_CHARSET_IS_UTF8 */
2243 
2244 
2245 /* end of platform-specific implementation -------------- */
2246 
2247 /* version handling --------------------------------------------------------- */
2248 
2249 U_CAPI void U_EXPORT2
2250 u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2251     char *end;
2252     uint16_t part=0;
2253 
2254     if(versionArray==NULL) {
2255         return;
2256     }
2257 
2258     if(versionString!=NULL) {
2259         for(;;) {
2260             versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2261             if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2262                 break;
2263             }
2264             versionString=end+1;
2265         }
2266     }
2267 
2268     while(part<U_MAX_VERSION_LENGTH) {
2269         versionArray[part++]=0;
2270     }
2271 }
2272 
2273 U_CAPI void U_EXPORT2
2274 u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2275     if(versionArray!=NULL && versionString!=NULL) {
2276         char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2277         int32_t len = u_strlen(versionString);
2278         if(len>U_MAX_VERSION_STRING_LENGTH) {
2279             len = U_MAX_VERSION_STRING_LENGTH;
2280         }
2281         u_UCharsToChars(versionString, versionChars, len);
2282         versionChars[len]=0;
2283         u_versionFromString(versionArray, versionChars);
2284     }
2285 }
2286 
2287 U_CAPI void U_EXPORT2
2288 u_versionToString(const UVersionInfo versionArray, char *versionString) {
2289     uint16_t count, part;
2290     uint8_t field;
2291 
2292     if(versionString==NULL) {
2293         return;
2294     }
2295 
2296     if(versionArray==NULL) {
2297         versionString[0]=0;
2298         return;
2299     }
2300 
2301     /* count how many fields need to be written */
2302     for(count=4; count>0 && versionArray[count-1]==0; --count) {
2303     }
2304 
2305     if(count <= 1) {
2306         count = 2;
2307     }
2308 
2309     /* write the first part */
2310     /* write the decimal field value */
2311     field=versionArray[0];
2312     if(field>=100) {
2313         *versionString++=(char)('0'+field/100);
2314         field%=100;
2315     }
2316     if(field>=10) {
2317         *versionString++=(char)('0'+field/10);
2318         field%=10;
2319     }
2320     *versionString++=(char)('0'+field);
2321 
2322     /* write the following parts */
2323     for(part=1; part<count; ++part) {
2324         /* write a dot first */
2325         *versionString++=U_VERSION_DELIMITER;
2326 
2327         /* write the decimal field value */
2328         field=versionArray[part];
2329         if(field>=100) {
2330             *versionString++=(char)('0'+field/100);
2331             field%=100;
2332         }
2333         if(field>=10) {
2334             *versionString++=(char)('0'+field/10);
2335             field%=10;
2336         }
2337         *versionString++=(char)('0'+field);
2338     }
2339 
2340     /* NUL-terminate */
2341     *versionString=0;
2342 }
2343 
2344 U_CAPI void U_EXPORT2
2345 u_getVersion(UVersionInfo versionArray) {
2346     (void)copyright;   // Suppress unused variable warning from clang.
2347     u_versionFromString(versionArray, U_ICU_VERSION);
2348 }
2349 
2350 /**
2351  * icucfg.h dependent code
2352  */
2353 
2354 #if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2355 
2356 #if HAVE_DLFCN_H
2357 #ifdef __MVS__
2358 #ifndef __SUSV3
2359 #define __SUSV3 1
2360 #endif
2361 #endif
2362 #include <dlfcn.h>
2363 #endif /* HAVE_DLFCN_H */
2364 
2365 U_CAPI void * U_EXPORT2
2366 uprv_dl_open(const char *libName, UErrorCode *status) {
2367   void *ret = NULL;
2368   if(U_FAILURE(*status)) return ret;
2369   ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2370   if(ret==NULL) {
2371 #ifdef U_TRACE_DYLOAD
2372     printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2373 #endif
2374     *status = U_MISSING_RESOURCE_ERROR;
2375   }
2376   return ret;
2377 }
2378 
2379 U_CAPI void U_EXPORT2
2380 uprv_dl_close(void *lib, UErrorCode *status) {
2381   if(U_FAILURE(*status)) return;
2382   dlclose(lib);
2383 }
2384 
2385 U_CAPI UVoidFunction* U_EXPORT2
2386 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2387   union {
2388       UVoidFunction *fp;
2389       void *vp;
2390   } uret;
2391   uret.fp = NULL;
2392   if(U_FAILURE(*status)) return uret.fp;
2393   uret.vp = dlsym(lib, sym);
2394   if(uret.vp == NULL) {
2395 #ifdef U_TRACE_DYLOAD
2396     printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2397 #endif
2398     *status = U_MISSING_RESOURCE_ERROR;
2399   }
2400   return uret.fp;
2401 }
2402 
2403 #elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
2404 
2405 /* Windows API implementation. */
2406 // Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
2407 
2408 U_CAPI void * U_EXPORT2
2409 uprv_dl_open(const char *libName, UErrorCode *status) {
2410   HMODULE lib = NULL;
2411 
2412   if(U_FAILURE(*status)) return NULL;
2413 
2414   lib = LoadLibraryA(libName);
2415 
2416   if(lib==NULL) {
2417     *status = U_MISSING_RESOURCE_ERROR;
2418   }
2419 
2420   return (void*)lib;
2421 }
2422 
2423 U_CAPI void U_EXPORT2
2424 uprv_dl_close(void *lib, UErrorCode *status) {
2425   HMODULE handle = (HMODULE)lib;
2426   if(U_FAILURE(*status)) return;
2427 
2428   FreeLibrary(handle);
2429 
2430   return;
2431 }
2432 
2433 U_CAPI UVoidFunction* U_EXPORT2
2434 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2435   HMODULE handle = (HMODULE)lib;
2436   UVoidFunction* addr = NULL;
2437 
2438   if(U_FAILURE(*status) || lib==NULL) return NULL;
2439 
2440   addr = (UVoidFunction*)GetProcAddress(handle, sym);
2441 
2442   if(addr==NULL) {
2443     DWORD lastError = GetLastError();
2444     if(lastError == ERROR_PROC_NOT_FOUND) {
2445       *status = U_MISSING_RESOURCE_ERROR;
2446     } else {
2447       *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2448     }
2449   }
2450 
2451   return addr;
2452 }
2453 
2454 #else
2455 
2456 /* No dynamic loading, null (nonexistent) implementation. */
2457 
2458 U_CAPI void * U_EXPORT2
2459 uprv_dl_open(const char *libName, UErrorCode *status) {
2460     (void)libName;
2461     if(U_FAILURE(*status)) return NULL;
2462     *status = U_UNSUPPORTED_ERROR;
2463     return NULL;
2464 }
2465 
2466 U_CAPI void U_EXPORT2
2467 uprv_dl_close(void *lib, UErrorCode *status) {
2468     (void)lib;
2469     if(U_FAILURE(*status)) return;
2470     *status = U_UNSUPPORTED_ERROR;
2471     return;
2472 }
2473 
2474 U_CAPI UVoidFunction* U_EXPORT2
2475 uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2476   (void)lib;
2477   (void)sym;
2478   if(U_SUCCESS(*status)) {
2479     *status = U_UNSUPPORTED_ERROR;
2480   }
2481   return (UVoidFunction*)NULL;
2482 }
2483 
2484 #endif
2485 
2486 /*
2487  * Hey, Emacs, please set the following:
2488  *
2489  * Local Variables:
2490  * indent-tabs-mode: nil
2491  * End:
2492  *
2493  */
2494