1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1997-2014, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  loclikely.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010feb25
14 *   created by: Markus W. Scherer
15 *
16 *   Code for likely and minimized locale subtags, separated out from other .cpp files
17 *   that then do not depend on resource bundle code and likely-subtags data.
18 */
19 
20 #include "unicode/utypes.h"
21 #include "unicode/locid.h"
22 #include "unicode/putil.h"
23 #include "unicode/uloc.h"
24 #include "unicode/ures.h"
25 #include "unicode/uscript.h"
26 #include "cmemory.h"
27 #include "cstring.h"
28 #include "ulocimp.h"
29 #include "ustr_imp.h"
30 
31 /**
32  * This function looks for the localeID in the likelySubtags resource.
33  *
34  * @param localeID The tag to find.
35  * @param buffer A buffer to hold the matching entry
36  * @param bufferLength The length of the output buffer
37  * @return A pointer to "buffer" if found, or a null pointer if not.
38  */
39 static const char*  U_CALLCONV
findLikelySubtags(const char * localeID,char * buffer,int32_t bufferLength,UErrorCode * err)40 findLikelySubtags(const char* localeID,
41                   char* buffer,
42                   int32_t bufferLength,
43                   UErrorCode* err) {
44     const char* result = NULL;
45 
46     if (!U_FAILURE(*err)) {
47         int32_t resLen = 0;
48         const UChar* s = NULL;
49         UErrorCode tmpErr = U_ZERO_ERROR;
50         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
51         if (U_SUCCESS(tmpErr)) {
52             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
53 
54             if (U_FAILURE(tmpErr)) {
55                 /*
56                  * If a resource is missing, it's not really an error, it's
57                  * just that we don't have any data for that particular locale ID.
58                  */
59                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
60                     *err = tmpErr;
61                 }
62             }
63             else if (resLen >= bufferLength) {
64                 /* The buffer should never overflow. */
65                 *err = U_INTERNAL_PROGRAM_ERROR;
66             }
67             else {
68                 u_UCharsToChars(s, buffer, resLen + 1);
69                 result = buffer;
70             }
71 
72             ures_close(subtags);
73         } else {
74             *err = tmpErr;
75         }
76     }
77 
78     return result;
79 }
80 
81 /**
82  * Append a tag to a buffer, adding the separator if necessary.  The buffer
83  * must be large enough to contain the resulting tag plus any separator
84  * necessary. The tag must not be a zero-length string.
85  *
86  * @param tag The tag to add.
87  * @param tagLength The length of the tag.
88  * @param buffer The output buffer.
89  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
90  **/
91 static void U_CALLCONV
appendTag(const char * tag,int32_t tagLength,char * buffer,int32_t * bufferLength)92 appendTag(
93     const char* tag,
94     int32_t tagLength,
95     char* buffer,
96     int32_t* bufferLength) {
97 
98     if (*bufferLength > 0) {
99         buffer[*bufferLength] = '_';
100         ++(*bufferLength);
101     }
102 
103     uprv_memmove(
104         &buffer[*bufferLength],
105         tag,
106         tagLength);
107 
108     *bufferLength += tagLength;
109 }
110 
111 /**
112  * These are the canonical strings for unknown languages, scripts and regions.
113  **/
114 static const char* const unknownLanguage = "und";
115 static const char* const unknownScript = "Zzzz";
116 static const char* const unknownRegion = "ZZ";
117 
118 /**
119  * Create a tag string from the supplied parameters.  The lang, script and region
120  * parameters may be NULL pointers. If they are, their corresponding length parameters
121  * must be less than or equal to 0.
122  *
123  * If any of the language, script or region parameters are empty, and the alternateTags
124  * parameter is not NULL, it will be parsed for potential language, script and region tags
125  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
126  * it contains no language tag, the default tag for the unknown language is used.
127  *
128  * If the length of the new string exceeds the capacity of the output buffer,
129  * the function copies as many bytes to the output buffer as it can, and returns
130  * the error U_BUFFER_OVERFLOW_ERROR.
131  *
132  * If an illegal argument is provided, the function returns the error
133  * U_ILLEGAL_ARGUMENT_ERROR.
134  *
135  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
136  * the tag string fits in the output buffer, but the null terminator doesn't.
137  *
138  * @param lang The language tag to use.
139  * @param langLength The length of the language tag.
140  * @param script The script tag to use.
141  * @param scriptLength The length of the script tag.
142  * @param region The region tag to use.
143  * @param regionLength The length of the region tag.
144  * @param trailing Any trailing data to append to the new tag.
145  * @param trailingLength The length of the trailing data.
146  * @param alternateTags A string containing any alternate tags.
147  * @param tag The output buffer.
148  * @param tagCapacity The capacity of the output buffer.
149  * @param err A pointer to a UErrorCode for error reporting.
150  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
151  **/
152 static int32_t U_CALLCONV
createTagStringWithAlternates(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,const char * alternateTags,char * tag,int32_t tagCapacity,UErrorCode * err)153 createTagStringWithAlternates(
154     const char* lang,
155     int32_t langLength,
156     const char* script,
157     int32_t scriptLength,
158     const char* region,
159     int32_t regionLength,
160     const char* trailing,
161     int32_t trailingLength,
162     const char* alternateTags,
163     char* tag,
164     int32_t tagCapacity,
165     UErrorCode* err) {
166 
167     if (U_FAILURE(*err)) {
168         goto error;
169     }
170     else if (tag == NULL ||
171              tagCapacity <= 0 ||
172              langLength >= ULOC_LANG_CAPACITY ||
173              scriptLength >= ULOC_SCRIPT_CAPACITY ||
174              regionLength >= ULOC_COUNTRY_CAPACITY) {
175         goto error;
176     }
177     else {
178         /**
179          * ULOC_FULLNAME_CAPACITY will provide enough capacity
180          * that we can build a string that contains the language,
181          * script and region code without worrying about overrunning
182          * the user-supplied buffer.
183          **/
184         char tagBuffer[ULOC_FULLNAME_CAPACITY];
185         int32_t tagLength = 0;
186         int32_t capacityRemaining = tagCapacity;
187         UBool regionAppended = FALSE;
188 
189         if (langLength > 0) {
190             appendTag(
191                 lang,
192                 langLength,
193                 tagBuffer,
194                 &tagLength);
195         }
196         else if (alternateTags == NULL) {
197             /*
198              * Append the value for an unknown language, if
199              * we found no language.
200              */
201             appendTag(
202                 unknownLanguage,
203                 (int32_t)uprv_strlen(unknownLanguage),
204                 tagBuffer,
205                 &tagLength);
206         }
207         else {
208             /*
209              * Parse the alternateTags string for the language.
210              */
211             char alternateLang[ULOC_LANG_CAPACITY];
212             int32_t alternateLangLength = sizeof(alternateLang);
213 
214             alternateLangLength =
215                 uloc_getLanguage(
216                     alternateTags,
217                     alternateLang,
218                     alternateLangLength,
219                     err);
220             if(U_FAILURE(*err) ||
221                 alternateLangLength >= ULOC_LANG_CAPACITY) {
222                 goto error;
223             }
224             else if (alternateLangLength == 0) {
225                 /*
226                  * Append the value for an unknown language, if
227                  * we found no language.
228                  */
229                 appendTag(
230                     unknownLanguage,
231                     (int32_t)uprv_strlen(unknownLanguage),
232                     tagBuffer,
233                     &tagLength);
234             }
235             else {
236                 appendTag(
237                     alternateLang,
238                     alternateLangLength,
239                     tagBuffer,
240                     &tagLength);
241             }
242         }
243 
244         if (scriptLength > 0) {
245             appendTag(
246                 script,
247                 scriptLength,
248                 tagBuffer,
249                 &tagLength);
250         }
251         else if (alternateTags != NULL) {
252             /*
253              * Parse the alternateTags string for the script.
254              */
255             char alternateScript[ULOC_SCRIPT_CAPACITY];
256 
257             const int32_t alternateScriptLength =
258                 uloc_getScript(
259                     alternateTags,
260                     alternateScript,
261                     sizeof(alternateScript),
262                     err);
263 
264             if (U_FAILURE(*err) ||
265                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
266                 goto error;
267             }
268             else if (alternateScriptLength > 0) {
269                 appendTag(
270                     alternateScript,
271                     alternateScriptLength,
272                     tagBuffer,
273                     &tagLength);
274             }
275         }
276 
277         if (regionLength > 0) {
278             appendTag(
279                 region,
280                 regionLength,
281                 tagBuffer,
282                 &tagLength);
283 
284             regionAppended = TRUE;
285         }
286         else if (alternateTags != NULL) {
287             /*
288              * Parse the alternateTags string for the region.
289              */
290             char alternateRegion[ULOC_COUNTRY_CAPACITY];
291 
292             const int32_t alternateRegionLength =
293                 uloc_getCountry(
294                     alternateTags,
295                     alternateRegion,
296                     sizeof(alternateRegion),
297                     err);
298             if (U_FAILURE(*err) ||
299                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
300                 goto error;
301             }
302             else if (alternateRegionLength > 0) {
303                 appendTag(
304                     alternateRegion,
305                     alternateRegionLength,
306                     tagBuffer,
307                     &tagLength);
308 
309                 regionAppended = TRUE;
310             }
311         }
312 
313         {
314             const int32_t toCopy =
315                 tagLength >= tagCapacity ? tagCapacity : tagLength;
316 
317             /**
318              * Copy the partial tag from our internal buffer to the supplied
319              * target.
320              **/
321             uprv_memcpy(
322                 tag,
323                 tagBuffer,
324                 toCopy);
325 
326             capacityRemaining -= toCopy;
327         }
328 
329         if (trailingLength > 0) {
330             if (*trailing != '@' && capacityRemaining > 0) {
331                 tag[tagLength++] = '_';
332                 --capacityRemaining;
333                 if (capacityRemaining > 0 && !regionAppended) {
334                     /* extra separator is required */
335                     tag[tagLength++] = '_';
336                     --capacityRemaining;
337                 }
338             }
339 
340             if (capacityRemaining > 0) {
341                 /*
342                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
343                  * don't know if the user-supplied buffers overlap.
344                  */
345                 const int32_t toCopy =
346                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
347 
348                 uprv_memmove(
349                     &tag[tagLength],
350                     trailing,
351                     toCopy);
352             }
353         }
354 
355         tagLength += trailingLength;
356 
357         return u_terminateChars(
358                     tag,
359                     tagCapacity,
360                     tagLength,
361                     err);
362     }
363 
364 error:
365 
366     /**
367      * An overflow indicates the locale ID passed in
368      * is ill-formed.  If we got here, and there was
369      * no previous error, it's an implicit overflow.
370      **/
371     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
372         U_SUCCESS(*err)) {
373         *err = U_ILLEGAL_ARGUMENT_ERROR;
374     }
375 
376     return -1;
377 }
378 
379 /**
380  * Create a tag string from the supplied parameters.  The lang, script and region
381  * parameters may be NULL pointers. If they are, their corresponding length parameters
382  * must be less than or equal to 0.  If the lang parameter is an empty string, the
383  * default value for an unknown language is written to the output buffer.
384  *
385  * If the length of the new string exceeds the capacity of the output buffer,
386  * the function copies as many bytes to the output buffer as it can, and returns
387  * the error U_BUFFER_OVERFLOW_ERROR.
388  *
389  * If an illegal argument is provided, the function returns the error
390  * U_ILLEGAL_ARGUMENT_ERROR.
391  *
392  * @param lang The language tag to use.
393  * @param langLength The length of the language tag.
394  * @param script The script tag to use.
395  * @param scriptLength The length of the script tag.
396  * @param region The region tag to use.
397  * @param regionLength The length of the region tag.
398  * @param trailing Any trailing data to append to the new tag.
399  * @param trailingLength The length of the trailing data.
400  * @param tag The output buffer.
401  * @param tagCapacity The capacity of the output buffer.
402  * @param err A pointer to a UErrorCode for error reporting.
403  * @return The length of the tag string, which may be greater than tagCapacity.
404  **/
405 static int32_t U_CALLCONV
createTagString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,char * tag,int32_t tagCapacity,UErrorCode * err)406 createTagString(
407     const char* lang,
408     int32_t langLength,
409     const char* script,
410     int32_t scriptLength,
411     const char* region,
412     int32_t regionLength,
413     const char* trailing,
414     int32_t trailingLength,
415     char* tag,
416     int32_t tagCapacity,
417     UErrorCode* err)
418 {
419     return createTagStringWithAlternates(
420                 lang,
421                 langLength,
422                 script,
423                 scriptLength,
424                 region,
425                 regionLength,
426                 trailing,
427                 trailingLength,
428                 NULL,
429                 tag,
430                 tagCapacity,
431                 err);
432 }
433 
434 /**
435  * Parse the language, script, and region subtags from a tag string, and copy the
436  * results into the corresponding output parameters. The buffers are null-terminated,
437  * unless overflow occurs.
438  *
439  * The langLength, scriptLength, and regionLength parameters are input/output
440  * parameters, and must contain the capacity of their corresponding buffers on
441  * input.  On output, they will contain the actual length of the buffers, not
442  * including the null terminator.
443  *
444  * If the length of any of the output subtags exceeds the capacity of the corresponding
445  * buffer, the function copies as many bytes to the output buffer as it can, and returns
446  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
447  * occurs.
448  *
449  * If an illegal argument is provided, the function returns the error
450  * U_ILLEGAL_ARGUMENT_ERROR.
451  *
452  * @param localeID The locale ID to parse.
453  * @param lang The language tag buffer.
454  * @param langLength The length of the language tag.
455  * @param script The script tag buffer.
456  * @param scriptLength The length of the script tag.
457  * @param region The region tag buffer.
458  * @param regionLength The length of the region tag.
459  * @param err A pointer to a UErrorCode for error reporting.
460  * @return The number of chars of the localeID parameter consumed.
461  **/
462 static int32_t U_CALLCONV
parseTagString(const char * localeID,char * lang,int32_t * langLength,char * script,int32_t * scriptLength,char * region,int32_t * regionLength,UErrorCode * err)463 parseTagString(
464     const char* localeID,
465     char* lang,
466     int32_t* langLength,
467     char* script,
468     int32_t* scriptLength,
469     char* region,
470     int32_t* regionLength,
471     UErrorCode* err)
472 {
473     const char* position = localeID;
474     int32_t subtagLength = 0;
475 
476     if(U_FAILURE(*err) ||
477        localeID == NULL ||
478        lang == NULL ||
479        langLength == NULL ||
480        script == NULL ||
481        scriptLength == NULL ||
482        region == NULL ||
483        regionLength == NULL) {
484         goto error;
485     }
486 
487     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
488     u_terminateChars(lang, *langLength, subtagLength, err);
489 
490     /*
491      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
492      * to be an error, because it indicates the user-supplied tag is
493      * not well-formed.
494      */
495     if(U_FAILURE(*err)) {
496         goto error;
497     }
498 
499     *langLength = subtagLength;
500 
501     /*
502      * If no language was present, use the value of unknownLanguage
503      * instead.  Otherwise, move past any separator.
504      */
505     if (*langLength == 0) {
506         uprv_strcpy(
507             lang,
508             unknownLanguage);
509         *langLength = (int32_t)uprv_strlen(lang);
510     }
511     else if (_isIDSeparator(*position)) {
512         ++position;
513     }
514 
515     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
516     u_terminateChars(script, *scriptLength, subtagLength, err);
517 
518     if(U_FAILURE(*err)) {
519         goto error;
520     }
521 
522     *scriptLength = subtagLength;
523 
524     if (*scriptLength > 0) {
525         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
526             /**
527              * If the script part is the "unknown" script, then don't return it.
528              **/
529             *scriptLength = 0;
530         }
531 
532         /*
533          * Move past any separator.
534          */
535         if (_isIDSeparator(*position)) {
536             ++position;
537         }
538     }
539 
540     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
541     u_terminateChars(region, *regionLength, subtagLength, err);
542 
543     if(U_FAILURE(*err)) {
544         goto error;
545     }
546 
547     *regionLength = subtagLength;
548 
549     if (*regionLength > 0) {
550         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
551             /**
552              * If the region part is the "unknown" region, then don't return it.
553              **/
554             *regionLength = 0;
555         }
556     } else if (*position != 0 && *position != '@') {
557         /* back up over consumed trailing separator */
558         --position;
559     }
560 
561 exit:
562 
563     return (int32_t)(position - localeID);
564 
565 error:
566 
567     /**
568      * If we get here, we have no explicit error, it's the result of an
569      * illegal argument.
570      **/
571     if (!U_FAILURE(*err)) {
572         *err = U_ILLEGAL_ARGUMENT_ERROR;
573     }
574 
575     goto exit;
576 }
577 
578 static int32_t U_CALLCONV
createLikelySubtagsString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * variants,int32_t variantsLength,char * tag,int32_t tagCapacity,UErrorCode * err)579 createLikelySubtagsString(
580     const char* lang,
581     int32_t langLength,
582     const char* script,
583     int32_t scriptLength,
584     const char* region,
585     int32_t regionLength,
586     const char* variants,
587     int32_t variantsLength,
588     char* tag,
589     int32_t tagCapacity,
590     UErrorCode* err)
591 {
592     /**
593      * ULOC_FULLNAME_CAPACITY will provide enough capacity
594      * that we can build a string that contains the language,
595      * script and region code without worrying about overrunning
596      * the user-supplied buffer.
597      **/
598     char tagBuffer[ULOC_FULLNAME_CAPACITY];
599     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
600 
601     if(U_FAILURE(*err)) {
602         goto error;
603     }
604 
605     /**
606      * Try the language with the script and region first.
607      **/
608     if (scriptLength > 0 && regionLength > 0) {
609 
610         const char* likelySubtags = NULL;
611 
612         createTagString(
613             lang,
614             langLength,
615             script,
616             scriptLength,
617             region,
618             regionLength,
619             NULL,
620             0,
621             tagBuffer,
622             sizeof(tagBuffer),
623             err);
624         if(U_FAILURE(*err)) {
625             goto error;
626         }
627 
628         likelySubtags =
629             findLikelySubtags(
630                 tagBuffer,
631                 likelySubtagsBuffer,
632                 sizeof(likelySubtagsBuffer),
633                 err);
634         if(U_FAILURE(*err)) {
635             goto error;
636         }
637 
638         if (likelySubtags != NULL) {
639             /* Always use the language tag from the
640                maximal string, since it may be more
641                specific than the one provided. */
642             return createTagStringWithAlternates(
643                         NULL,
644                         0,
645                         NULL,
646                         0,
647                         NULL,
648                         0,
649                         variants,
650                         variantsLength,
651                         likelySubtags,
652                         tag,
653                         tagCapacity,
654                         err);
655         }
656     }
657 
658     /**
659      * Try the language with just the script.
660      **/
661     if (scriptLength > 0) {
662 
663         const char* likelySubtags = NULL;
664 
665         createTagString(
666             lang,
667             langLength,
668             script,
669             scriptLength,
670             NULL,
671             0,
672             NULL,
673             0,
674             tagBuffer,
675             sizeof(tagBuffer),
676             err);
677         if(U_FAILURE(*err)) {
678             goto error;
679         }
680 
681         likelySubtags =
682             findLikelySubtags(
683                 tagBuffer,
684                 likelySubtagsBuffer,
685                 sizeof(likelySubtagsBuffer),
686                 err);
687         if(U_FAILURE(*err)) {
688             goto error;
689         }
690 
691         if (likelySubtags != NULL) {
692             /* Always use the language tag from the
693                maximal string, since it may be more
694                specific than the one provided. */
695             return createTagStringWithAlternates(
696                         NULL,
697                         0,
698                         NULL,
699                         0,
700                         region,
701                         regionLength,
702                         variants,
703                         variantsLength,
704                         likelySubtags,
705                         tag,
706                         tagCapacity,
707                         err);
708         }
709     }
710 
711     /**
712      * Try the language with just the region.
713      **/
714     if (regionLength > 0) {
715 
716         const char* likelySubtags = NULL;
717 
718         createTagString(
719             lang,
720             langLength,
721             NULL,
722             0,
723             region,
724             regionLength,
725             NULL,
726             0,
727             tagBuffer,
728             sizeof(tagBuffer),
729             err);
730         if(U_FAILURE(*err)) {
731             goto error;
732         }
733 
734         likelySubtags =
735             findLikelySubtags(
736                 tagBuffer,
737                 likelySubtagsBuffer,
738                 sizeof(likelySubtagsBuffer),
739                 err);
740         if(U_FAILURE(*err)) {
741             goto error;
742         }
743 
744         if (likelySubtags != NULL) {
745             /* Always use the language tag from the
746                maximal string, since it may be more
747                specific than the one provided. */
748             return createTagStringWithAlternates(
749                         NULL,
750                         0,
751                         script,
752                         scriptLength,
753                         NULL,
754                         0,
755                         variants,
756                         variantsLength,
757                         likelySubtags,
758                         tag,
759                         tagCapacity,
760                         err);
761         }
762     }
763 
764     /**
765      * Finally, try just the language.
766      **/
767     {
768         const char* likelySubtags = NULL;
769 
770         createTagString(
771             lang,
772             langLength,
773             NULL,
774             0,
775             NULL,
776             0,
777             NULL,
778             0,
779             tagBuffer,
780             sizeof(tagBuffer),
781             err);
782         if(U_FAILURE(*err)) {
783             goto error;
784         }
785 
786         likelySubtags =
787             findLikelySubtags(
788                 tagBuffer,
789                 likelySubtagsBuffer,
790                 sizeof(likelySubtagsBuffer),
791                 err);
792         if(U_FAILURE(*err)) {
793             goto error;
794         }
795 
796         if (likelySubtags != NULL) {
797             /* Always use the language tag from the
798                maximal string, since it may be more
799                specific than the one provided. */
800             return createTagStringWithAlternates(
801                         NULL,
802                         0,
803                         script,
804                         scriptLength,
805                         region,
806                         regionLength,
807                         variants,
808                         variantsLength,
809                         likelySubtags,
810                         tag,
811                         tagCapacity,
812                         err);
813         }
814     }
815 
816     return u_terminateChars(
817                 tag,
818                 tagCapacity,
819                 0,
820                 err);
821 
822 error:
823 
824     if (!U_FAILURE(*err)) {
825         *err = U_ILLEGAL_ARGUMENT_ERROR;
826     }
827 
828     return -1;
829 }
830 
831 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
832     {   int32_t count = 0; \
833         int32_t i; \
834         for (i = 0; i < trailingLength; i++) { \
835             if (trailing[i] == '-' || trailing[i] == '_') { \
836                 count = 0; \
837                 if (count > 8) { \
838                     goto error; \
839                 } \
840             } else if (trailing[i] == '@') { \
841                 break; \
842             } else if (count > 8) { \
843                 goto error; \
844             } else { \
845                 count++; \
846             } \
847         } \
848     }
849 
850 static int32_t
_uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)851 _uloc_addLikelySubtags(const char*    localeID,
852          char* maximizedLocaleID,
853          int32_t maximizedLocaleIDCapacity,
854          UErrorCode* err)
855 {
856     char lang[ULOC_LANG_CAPACITY];
857     int32_t langLength = sizeof(lang);
858     char script[ULOC_SCRIPT_CAPACITY];
859     int32_t scriptLength = sizeof(script);
860     char region[ULOC_COUNTRY_CAPACITY];
861     int32_t regionLength = sizeof(region);
862     const char* trailing = "";
863     int32_t trailingLength = 0;
864     int32_t trailingIndex = 0;
865     int32_t resultLength = 0;
866 
867     if(U_FAILURE(*err)) {
868         goto error;
869     }
870     else if (localeID == NULL ||
871              maximizedLocaleID == NULL ||
872              maximizedLocaleIDCapacity <= 0) {
873         goto error;
874     }
875 
876     trailingIndex = parseTagString(
877         localeID,
878         lang,
879         &langLength,
880         script,
881         &scriptLength,
882         region,
883         &regionLength,
884         err);
885     if(U_FAILURE(*err)) {
886         /* Overflow indicates an illegal argument error */
887         if (*err == U_BUFFER_OVERFLOW_ERROR) {
888             *err = U_ILLEGAL_ARGUMENT_ERROR;
889         }
890 
891         goto error;
892     }
893 
894     /* Find the length of the trailing portion. */
895     while (_isIDSeparator(localeID[trailingIndex])) {
896         trailingIndex++;
897     }
898     trailing = &localeID[trailingIndex];
899     trailingLength = (int32_t)uprv_strlen(trailing);
900 
901     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
902 
903     resultLength =
904         createLikelySubtagsString(
905             lang,
906             langLength,
907             script,
908             scriptLength,
909             region,
910             regionLength,
911             trailing,
912             trailingLength,
913             maximizedLocaleID,
914             maximizedLocaleIDCapacity,
915             err);
916 
917     if (resultLength == 0) {
918         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
919 
920         /*
921          * If we get here, we need to return localeID.
922          */
923         uprv_memcpy(
924             maximizedLocaleID,
925             localeID,
926             localIDLength <= maximizedLocaleIDCapacity ?
927                 localIDLength : maximizedLocaleIDCapacity);
928 
929         resultLength =
930             u_terminateChars(
931                 maximizedLocaleID,
932                 maximizedLocaleIDCapacity,
933                 localIDLength,
934                 err);
935     }
936 
937     return resultLength;
938 
939 error:
940 
941     if (!U_FAILURE(*err)) {
942         *err = U_ILLEGAL_ARGUMENT_ERROR;
943     }
944 
945     return -1;
946 }
947 
948 static int32_t
_uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)949 _uloc_minimizeSubtags(const char*    localeID,
950          char* minimizedLocaleID,
951          int32_t minimizedLocaleIDCapacity,
952          UErrorCode* err)
953 {
954     /**
955      * ULOC_FULLNAME_CAPACITY will provide enough capacity
956      * that we can build a string that contains the language,
957      * script and region code without worrying about overrunning
958      * the user-supplied buffer.
959      **/
960     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
961     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
962 
963     char lang[ULOC_LANG_CAPACITY];
964     int32_t langLength = sizeof(lang);
965     char script[ULOC_SCRIPT_CAPACITY];
966     int32_t scriptLength = sizeof(script);
967     char region[ULOC_COUNTRY_CAPACITY];
968     int32_t regionLength = sizeof(region);
969     const char* trailing = "";
970     int32_t trailingLength = 0;
971     int32_t trailingIndex = 0;
972 
973     if(U_FAILURE(*err)) {
974         goto error;
975     }
976     else if (localeID == NULL ||
977              minimizedLocaleID == NULL ||
978              minimizedLocaleIDCapacity <= 0) {
979         goto error;
980     }
981 
982     trailingIndex =
983         parseTagString(
984             localeID,
985             lang,
986             &langLength,
987             script,
988             &scriptLength,
989             region,
990             &regionLength,
991             err);
992     if(U_FAILURE(*err)) {
993 
994         /* Overflow indicates an illegal argument error */
995         if (*err == U_BUFFER_OVERFLOW_ERROR) {
996             *err = U_ILLEGAL_ARGUMENT_ERROR;
997         }
998 
999         goto error;
1000     }
1001 
1002     /* Find the spot where the variants or the keywords begin, if any. */
1003     while (_isIDSeparator(localeID[trailingIndex])) {
1004         trailingIndex++;
1005     }
1006     trailing = &localeID[trailingIndex];
1007     trailingLength = (int32_t)uprv_strlen(trailing);
1008 
1009     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1010 
1011     createTagString(
1012         lang,
1013         langLength,
1014         script,
1015         scriptLength,
1016         region,
1017         regionLength,
1018         NULL,
1019         0,
1020         maximizedTagBuffer,
1021         maximizedTagBufferLength,
1022         err);
1023     if(U_FAILURE(*err)) {
1024         goto error;
1025     }
1026 
1027     /**
1028      * First, we need to first get the maximization
1029      * from AddLikelySubtags.
1030      **/
1031     maximizedTagBufferLength =
1032         uloc_addLikelySubtags(
1033             maximizedTagBuffer,
1034             maximizedTagBuffer,
1035             maximizedTagBufferLength,
1036             err);
1037 
1038     if(U_FAILURE(*err)) {
1039         goto error;
1040     }
1041 
1042     /**
1043      * Start first with just the language.
1044      **/
1045     {
1046         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1047 
1048         const int32_t tagBufferLength =
1049             createLikelySubtagsString(
1050                 lang,
1051                 langLength,
1052                 NULL,
1053                 0,
1054                 NULL,
1055                 0,
1056                 NULL,
1057                 0,
1058                 tagBuffer,
1059                 sizeof(tagBuffer),
1060                 err);
1061 
1062         if(U_FAILURE(*err)) {
1063             goto error;
1064         }
1065         else if (uprv_strnicmp(
1066                     maximizedTagBuffer,
1067                     tagBuffer,
1068                     tagBufferLength) == 0) {
1069 
1070             return createTagString(
1071                         lang,
1072                         langLength,
1073                         NULL,
1074                         0,
1075                         NULL,
1076                         0,
1077                         trailing,
1078                         trailingLength,
1079                         minimizedLocaleID,
1080                         minimizedLocaleIDCapacity,
1081                         err);
1082         }
1083     }
1084 
1085     /**
1086      * Next, try the language and region.
1087      **/
1088     if (regionLength > 0) {
1089 
1090         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1091 
1092         const int32_t tagBufferLength =
1093             createLikelySubtagsString(
1094                 lang,
1095                 langLength,
1096                 NULL,
1097                 0,
1098                 region,
1099                 regionLength,
1100                 NULL,
1101                 0,
1102                 tagBuffer,
1103                 sizeof(tagBuffer),
1104                 err);
1105 
1106         if(U_FAILURE(*err)) {
1107             goto error;
1108         }
1109         else if (uprv_strnicmp(
1110                     maximizedTagBuffer,
1111                     tagBuffer,
1112                     tagBufferLength) == 0) {
1113 
1114             return createTagString(
1115                         lang,
1116                         langLength,
1117                         NULL,
1118                         0,
1119                         region,
1120                         regionLength,
1121                         trailing,
1122                         trailingLength,
1123                         minimizedLocaleID,
1124                         minimizedLocaleIDCapacity,
1125                         err);
1126         }
1127     }
1128 
1129     /**
1130      * Finally, try the language and script.  This is our last chance,
1131      * since trying with all three subtags would only yield the
1132      * maximal version that we already have.
1133      **/
1134     if (scriptLength > 0 && regionLength > 0) {
1135         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1136 
1137         const int32_t tagBufferLength =
1138             createLikelySubtagsString(
1139                 lang,
1140                 langLength,
1141                 script,
1142                 scriptLength,
1143                 NULL,
1144                 0,
1145                 NULL,
1146                 0,
1147                 tagBuffer,
1148                 sizeof(tagBuffer),
1149                 err);
1150 
1151         if(U_FAILURE(*err)) {
1152             goto error;
1153         }
1154         else if (uprv_strnicmp(
1155                     maximizedTagBuffer,
1156                     tagBuffer,
1157                     tagBufferLength) == 0) {
1158 
1159             return createTagString(
1160                         lang,
1161                         langLength,
1162                         script,
1163                         scriptLength,
1164                         NULL,
1165                         0,
1166                         trailing,
1167                         trailingLength,
1168                         minimizedLocaleID,
1169                         minimizedLocaleIDCapacity,
1170                         err);
1171         }
1172     }
1173 
1174     {
1175         /**
1176          * If we got here, return the locale ID parameter.
1177          **/
1178         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1179 
1180         uprv_memcpy(
1181             minimizedLocaleID,
1182             localeID,
1183             localeIDLength <= minimizedLocaleIDCapacity ?
1184                 localeIDLength : minimizedLocaleIDCapacity);
1185 
1186         return u_terminateChars(
1187                     minimizedLocaleID,
1188                     minimizedLocaleIDCapacity,
1189                     localeIDLength,
1190                     err);
1191     }
1192 
1193 error:
1194 
1195     if (!U_FAILURE(*err)) {
1196         *err = U_ILLEGAL_ARGUMENT_ERROR;
1197     }
1198 
1199     return -1;
1200 
1201 
1202 }
1203 
1204 static UBool
do_canonicalize(const char * localeID,char * buffer,int32_t bufferCapacity,UErrorCode * err)1205 do_canonicalize(const char*    localeID,
1206          char* buffer,
1207          int32_t bufferCapacity,
1208          UErrorCode* err)
1209 {
1210     uloc_canonicalize(
1211         localeID,
1212         buffer,
1213         bufferCapacity,
1214         err);
1215 
1216     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1217         *err == U_BUFFER_OVERFLOW_ERROR) {
1218         *err = U_ILLEGAL_ARGUMENT_ERROR;
1219 
1220         return FALSE;
1221     }
1222     else if (U_FAILURE(*err)) {
1223 
1224         return FALSE;
1225     }
1226     else {
1227         return TRUE;
1228     }
1229 }
1230 
1231 U_CAPI int32_t U_EXPORT2
uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)1232 uloc_addLikelySubtags(const char*    localeID,
1233          char* maximizedLocaleID,
1234          int32_t maximizedLocaleIDCapacity,
1235          UErrorCode* err)
1236 {
1237     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1238 
1239     if (!do_canonicalize(
1240         localeID,
1241         localeBuffer,
1242         sizeof(localeBuffer),
1243         err)) {
1244         return -1;
1245     }
1246     else {
1247         return _uloc_addLikelySubtags(
1248                     localeBuffer,
1249                     maximizedLocaleID,
1250                     maximizedLocaleIDCapacity,
1251                     err);
1252     }
1253 }
1254 
1255 U_CAPI int32_t U_EXPORT2
uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)1256 uloc_minimizeSubtags(const char*    localeID,
1257          char* minimizedLocaleID,
1258          int32_t minimizedLocaleIDCapacity,
1259          UErrorCode* err)
1260 {
1261     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1262 
1263     if (!do_canonicalize(
1264         localeID,
1265         localeBuffer,
1266         sizeof(localeBuffer),
1267         err)) {
1268         return -1;
1269     }
1270     else {
1271         return _uloc_minimizeSubtags(
1272                     localeBuffer,
1273                     minimizedLocaleID,
1274                     minimizedLocaleIDCapacity,
1275                     err);
1276     }
1277 }
1278 
1279 // Pairs of (language subtag, + or -) for finding out fast if common languages
1280 // are LTR (minus) or RTL (plus).
1281 static const char* LANG_DIR_STRING =
1282         "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1283 
1284 // Implemented here because this calls uloc_addLikelySubtags().
1285 U_CAPI UBool U_EXPORT2
uloc_isRightToLeft(const char * locale)1286 uloc_isRightToLeft(const char *locale) {
1287     UErrorCode errorCode = U_ZERO_ERROR;
1288     char script[8];
1289     int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1290     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1291             scriptLength == 0) {
1292         // Fastpath: We know the likely scripts and their writing direction
1293         // for some common languages.
1294         errorCode = U_ZERO_ERROR;
1295         char lang[8];
1296         int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1297         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1298                 langLength == 0) {
1299             return FALSE;
1300         }
1301         const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1302         if (langPtr != NULL) {
1303             switch (langPtr[langLength]) {
1304             case '-': return FALSE;
1305             case '+': return TRUE;
1306             default: break;  // partial match of a longer code
1307             }
1308         }
1309         // Otherwise, find the likely script.
1310         errorCode = U_ZERO_ERROR;
1311         char likely[ULOC_FULLNAME_CAPACITY];
1312         (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
1313         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1314             return FALSE;
1315         }
1316         scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
1317         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1318                 scriptLength == 0) {
1319             return FALSE;
1320         }
1321     }
1322     UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1323     return uscript_isRightToLeft(scriptCode);
1324 }
1325 
1326 U_NAMESPACE_BEGIN
1327 
1328 UBool
isRightToLeft() const1329 Locale::isRightToLeft() const {
1330     return uloc_isRightToLeft(getBaseName());
1331 }
1332 
1333 U_NAMESPACE_END
1334