1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1998-2015, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File parse.cpp
12 *
13 * Modification History:
14 *
15 *   Date          Name          Description
16 *   05/26/99     stephen       Creation.
17 *   02/25/00     weiv          Overhaul to write udata
18 *   5/10/01      Ram           removed ustdio dependency
19 *   06/10/2001  Dominic Ludlam <dom@recoil.org> Rewritten
20 *******************************************************************************
21 */
22 
23 // Safer use of UnicodeString.
24 #ifndef UNISTR_FROM_CHAR_EXPLICIT
25 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
26 #endif
27 
28 // Less important, but still a good idea.
29 #ifndef UNISTR_FROM_STRING_EXPLICIT
30 #   define UNISTR_FROM_STRING_EXPLICIT explicit
31 #endif
32 
33 #include <assert.h>
34 #include "parse.h"
35 #include "errmsg.h"
36 #include "uhash.h"
37 #include "cmemory.h"
38 #include "cstring.h"
39 #include "uinvchar.h"
40 #include "read.h"
41 #include "ustr.h"
42 #include "reslist.h"
43 #include "rbt_pars.h"
44 #include "genrb.h"
45 #include "unicode/stringpiece.h"
46 #include "unicode/unistr.h"
47 #include "unicode/ustring.h"
48 #include "unicode/uscript.h"
49 #include "unicode/utf16.h"
50 #include "unicode/putil.h"
51 #include "charstr.h"
52 #include "collationbuilder.h"
53 #include "collationdata.h"
54 #include "collationdatareader.h"
55 #include "collationdatawriter.h"
56 #include "collationfastlatinbuilder.h"
57 #include "collationinfo.h"
58 #include "collationroot.h"
59 #include "collationruleparser.h"
60 #include "collationtailoring.h"
61 #include <stdio.h>
62 
63 /* Number of tokens to read ahead of the current stream position */
64 #define MAX_LOOKAHEAD   3
65 
66 #define CR               0x000D
67 #define LF               0x000A
68 #define SPACE            0x0020
69 #define TAB              0x0009
70 #define ESCAPE           0x005C
71 #define HASH             0x0023
72 #define QUOTE            0x0027
73 #define ZERO             0x0030
74 #define STARTCOMMAND     0x005B
75 #define ENDCOMMAND       0x005D
76 #define OPENSQBRACKET    0x005B
77 #define CLOSESQBRACKET   0x005D
78 
79 using icu::CharString;
80 using icu::LocalMemory;
81 using icu::LocalPointer;
82 using icu::LocalUCHARBUFPointer;
83 using icu::StringPiece;
84 using icu::UnicodeString;
85 
86 struct Lookahead
87 {
88      enum   ETokenType type;
89      struct UString    value;
90      struct UString    comment;
91      uint32_t          line;
92 };
93 
94 /* keep in sync with token defines in read.h */
95 const char *tokenNames[TOK_TOKEN_COUNT] =
96 {
97      "string",             /* A string token, such as "MonthNames" */
98      "'{'",                 /* An opening brace character */
99      "'}'",                 /* A closing brace character */
100      "','",                 /* A comma */
101      "':'",                 /* A colon */
102 
103      "<end of file>",     /* End of the file has been reached successfully */
104      "<end of line>"
105 };
106 
107 /* Just to store "TRUE" */
108 //static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
109 
110 typedef struct {
111     struct Lookahead  lookahead[MAX_LOOKAHEAD + 1];
112     uint32_t          lookaheadPosition;
113     UCHARBUF         *buffer;
114     struct SRBRoot *bundle;
115     const char     *inputdir;
116     uint32_t        inputdirLength;
117     const char     *outputdir;
118     uint32_t        outputdirLength;
119     const char     *filename;
120     UBool           makeBinaryCollation;
121     UBool           omitCollationRules;
122 } ParseState;
123 
124 typedef struct SResource *
125 ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
126 
127 static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
128 
129 /* The nature of the lookahead buffer:
130    There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer.  This provides
131    MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
132    When getToken is called, the current pointer is moved to the next slot and the
133    old slot is filled with the next token from the reader by calling getNextToken.
134    The token values are stored in the slot, which means that token values don't
135    survive a call to getToken, ie.
136 
137    UString *value;
138 
139    getToken(&value, NULL, status);
140    getToken(NULL,   NULL, status);       bad - value is now a different string
141 */
142 static void
143 initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
144 {
145     static uint32_t initTypeStrings = 0;
146     uint32_t i;
147 
148     if (!initTypeStrings)
149     {
150         initTypeStrings = 1;
151     }
152 
153     state->lookaheadPosition   = 0;
154     state->buffer              = buf;
155 
156     resetLineNumber();
157 
158     for (i = 0; i < MAX_LOOKAHEAD; i++)
159     {
160         state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
161         if (U_FAILURE(*status))
162         {
163             return;
164         }
165     }
166 
167     *status = U_ZERO_ERROR;
168 }
169 
170 static void
cleanupLookahead(ParseState * state)171 cleanupLookahead(ParseState* state)
172 {
173     uint32_t i;
174     for (i = 0; i <= MAX_LOOKAHEAD; i++)
175     {
176         ustr_deinit(&state->lookahead[i].value);
177         ustr_deinit(&state->lookahead[i].comment);
178     }
179 
180 }
181 
182 static enum ETokenType
getToken(ParseState * state,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)183 getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
184 {
185     enum ETokenType result;
186     uint32_t          i;
187 
188     result = state->lookahead[state->lookaheadPosition].type;
189 
190     if (tokenValue != NULL)
191     {
192         *tokenValue = &state->lookahead[state->lookaheadPosition].value;
193     }
194 
195     if (linenumber != NULL)
196     {
197         *linenumber = state->lookahead[state->lookaheadPosition].line;
198     }
199 
200     if (comment != NULL)
201     {
202         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
203     }
204 
205     i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
206     state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
207     ustr_setlen(&state->lookahead[i].comment, 0, status);
208     ustr_setlen(&state->lookahead[i].value, 0, status);
209     state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
210 
211     /* printf("getToken, returning %s\n", tokenNames[result]); */
212 
213     return result;
214 }
215 
216 static enum ETokenType
peekToken(ParseState * state,uint32_t lookaheadCount,struct UString ** tokenValue,uint32_t * linenumber,struct UString * comment,UErrorCode * status)217 peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
218 {
219     uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
220 
221     if (U_FAILURE(*status))
222     {
223         return TOK_ERROR;
224     }
225 
226     if (lookaheadCount >= MAX_LOOKAHEAD)
227     {
228         *status = U_INTERNAL_PROGRAM_ERROR;
229         return TOK_ERROR;
230     }
231 
232     if (tokenValue != NULL)
233     {
234         *tokenValue = &state->lookahead[i].value;
235     }
236 
237     if (linenumber != NULL)
238     {
239         *linenumber = state->lookahead[i].line;
240     }
241 
242     if(comment != NULL){
243         ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
244     }
245 
246     return state->lookahead[i].type;
247 }
248 
249 static void
expect(ParseState * state,enum ETokenType expectedToken,struct UString ** tokenValue,struct UString * comment,uint32_t * linenumber,UErrorCode * status)250 expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
251 {
252     uint32_t        line;
253 
254     enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
255 
256     if (linenumber != NULL)
257     {
258         *linenumber = line;
259     }
260 
261     if (U_FAILURE(*status))
262     {
263         return;
264     }
265 
266     if (token != expectedToken)
267     {
268         *status = U_INVALID_FORMAT_ERROR;
269         error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
270     }
271     else
272     {
273         *status = U_ZERO_ERROR;
274     }
275 }
276 
getInvariantString(ParseState * state,uint32_t * line,struct UString * comment,int32_t & stringLength,UErrorCode * status)277 static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment,
278                                 int32_t &stringLength, UErrorCode *status)
279 {
280     struct UString *tokenValue;
281     char           *result;
282 
283     expect(state, TOK_STRING, &tokenValue, comment, line, status);
284 
285     if (U_FAILURE(*status))
286     {
287         return NULL;
288     }
289 
290     if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) {
291         *status = U_INVALID_FORMAT_ERROR;
292         error(*line, "invariant characters required for table keys, binary data, etc.");
293         return NULL;
294     }
295 
296     result = static_cast<char *>(uprv_malloc(tokenValue->fLength+1));
297 
298     if (result == NULL)
299     {
300         *status = U_MEMORY_ALLOCATION_ERROR;
301         return NULL;
302     }
303 
304     u_UCharsToChars(tokenValue->fChars, result, tokenValue->fLength+1);
305     stringLength = tokenValue->fLength;
306     return result;
307 }
308 
309 static struct SResource *
parseUCARules(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)310 parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
311 {
312     struct SResource *result = NULL;
313     struct UString   *tokenValue;
314     FileStream       *file          = NULL;
315     char              filename[256] = { '\0' };
316     char              cs[128]       = { '\0' };
317     uint32_t          line;
318     UBool quoted = FALSE;
319     UCHARBUF *ucbuf=NULL;
320     UChar32   c     = 0;
321     const char* cp  = NULL;
322     UChar *pTarget     = NULL;
323     UChar *target      = NULL;
324     UChar *targetLimit = NULL;
325     int32_t size = 0;
326 
327     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
328 
329     if(isVerbose()){
330         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
331     }
332 
333     if (U_FAILURE(*status))
334     {
335         return NULL;
336     }
337     /* make the filename including the directory */
338     if (state->inputdir != NULL)
339     {
340         uprv_strcat(filename, state->inputdir);
341 
342         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
343         {
344             uprv_strcat(filename, U_FILE_SEP_STRING);
345         }
346     }
347 
348     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
349 
350     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
351 
352     if (U_FAILURE(*status))
353     {
354         return NULL;
355     }
356     uprv_strcat(filename, cs);
357 
358     if(state->omitCollationRules) {
359         return res_none();
360     }
361 
362     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
363 
364     if (U_FAILURE(*status)) {
365         error(line, "An error occurred while opening the input file %s\n", filename);
366         return NULL;
367     }
368 
369     /* We allocate more space than actually required
370     * since the actual size needed for storing UChars
371     * is not known in UTF-8 byte stream
372     */
373     size        = ucbuf_size(ucbuf) + 1;
374     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size);
375     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
376     target      = pTarget;
377     targetLimit = pTarget+size;
378 
379     /* read the rules into the buffer */
380     while (target < targetLimit)
381     {
382         c = ucbuf_getc(ucbuf, status);
383         if(c == QUOTE) {
384             quoted = (UBool)!quoted;
385         }
386         /* weiv (06/26/2002): adding the following:
387          * - preserving spaces in commands [...]
388          * - # comments until the end of line
389          */
390         if (c == STARTCOMMAND && !quoted)
391         {
392             /* preserve commands
393              * closing bracket will be handled by the
394              * append at the end of the loop
395              */
396             while(c != ENDCOMMAND) {
397                 U_APPEND_CHAR32_ONLY(c, target);
398                 c = ucbuf_getc(ucbuf, status);
399             }
400         }
401         else if (c == HASH && !quoted) {
402             /* skip comments */
403             while(c != CR && c != LF) {
404                 c = ucbuf_getc(ucbuf, status);
405             }
406             continue;
407         }
408         else if (c == ESCAPE)
409         {
410             c = unescape(ucbuf, status);
411 
412             if (c == (UChar32)U_ERR)
413             {
414                 uprv_free(pTarget);
415                 T_FileStream_close(file);
416                 return NULL;
417             }
418         }
419         else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
420         {
421             /* ignore spaces carriage returns
422             * and line feed unless in the form \uXXXX
423             */
424             continue;
425         }
426 
427         /* Append UChar * after dissembling if c > 0xffff*/
428         if (c != (UChar32)U_EOF)
429         {
430             U_APPEND_CHAR32_ONLY(c, target);
431         }
432         else
433         {
434             break;
435         }
436     }
437 
438     /* terminate the string */
439     if(target < targetLimit){
440         *target = 0x0000;
441     }
442 
443     result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status);
444 
445 
446     ucbuf_close(ucbuf);
447     uprv_free(pTarget);
448     T_FileStream_close(file);
449 
450     return result;
451 }
452 
453 static struct SResource *
parseTransliterator(ParseState * state,char * tag,uint32_t startline,const struct UString *,UErrorCode * status)454 parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
455 {
456     struct SResource *result = NULL;
457     struct UString   *tokenValue;
458     FileStream       *file          = NULL;
459     char              filename[256] = { '\0' };
460     char              cs[128]       = { '\0' };
461     uint32_t          line;
462     UCHARBUF *ucbuf=NULL;
463     const char* cp  = NULL;
464     UChar *pTarget     = NULL;
465     const UChar *pSource     = NULL;
466     int32_t size = 0;
467 
468     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
469 
470     if(isVerbose()){
471         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
472     }
473 
474     if (U_FAILURE(*status))
475     {
476         return NULL;
477     }
478     /* make the filename including the directory */
479     if (state->inputdir != NULL)
480     {
481         uprv_strcat(filename, state->inputdir);
482 
483         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
484         {
485             uprv_strcat(filename, U_FILE_SEP_STRING);
486         }
487     }
488 
489     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
490 
491     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
492 
493     if (U_FAILURE(*status))
494     {
495         return NULL;
496     }
497     uprv_strcat(filename, cs);
498 
499 
500     ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status);
501 
502     if (U_FAILURE(*status)) {
503         error(line, "An error occurred while opening the input file %s\n", filename);
504         return NULL;
505     }
506 
507     /* We allocate more space than actually required
508     * since the actual size needed for storing UChars
509     * is not known in UTF-8 byte stream
510     */
511     pSource = ucbuf_getBuffer(ucbuf, &size, status);
512     pTarget     = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
513     uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
514 
515 #if !UCONFIG_NO_TRANSLITERATION
516     size = utrans_stripRules(pSource, size, pTarget, status);
517 #else
518     size = 0;
519     fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
520 #endif
521     result = string_open(state->bundle, tag, pTarget, size, NULL, status);
522 
523     ucbuf_close(ucbuf);
524     uprv_free(pTarget);
525     T_FileStream_close(file);
526 
527     return result;
528 }
529 static ArrayResource* dependencyArray = NULL;
530 
531 static struct SResource *
parseDependency(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)532 parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
533 {
534     struct SResource *result = NULL;
535     struct SResource *elem = NULL;
536     struct UString   *tokenValue;
537     uint32_t          line;
538     char              filename[256] = { '\0' };
539     char              cs[128]       = { '\0' };
540 
541     expect(state, TOK_STRING, &tokenValue, NULL, &line, status);
542 
543     if(isVerbose()){
544         printf(" %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
545     }
546 
547     if (U_FAILURE(*status))
548     {
549         return NULL;
550     }
551     /* make the filename including the directory */
552     if (state->outputdir != NULL)
553     {
554         uprv_strcat(filename, state->outputdir);
555 
556         if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
557         {
558             uprv_strcat(filename, U_FILE_SEP_STRING);
559         }
560     }
561 
562     u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
563 
564     if (U_FAILURE(*status))
565     {
566         return NULL;
567     }
568     uprv_strcat(filename, cs);
569     if(!T_FileStream_file_exists(filename)){
570         if(isStrict()){
571             error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
572         }else{
573             warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
574         }
575     }
576     if(dependencyArray==NULL){
577         dependencyArray = array_open(state->bundle, "%%DEPENDENCY", NULL, status);
578     }
579     if(tag!=NULL){
580         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
581     }
582     elem = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status);
583 
584     dependencyArray->add(elem);
585 
586     if (U_FAILURE(*status))
587     {
588         return NULL;
589     }
590     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
591     return result;
592 }
593 static struct SResource *
parseString(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)594 parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
595 {
596     struct UString   *tokenValue;
597     struct SResource *result = NULL;
598 
599 /*    if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0)
600     {
601         return parseUCARules(tag, startline, status);
602     }*/
603     if(isVerbose()){
604         printf(" string %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
605     }
606     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
607 
608     if (U_SUCCESS(*status))
609     {
610         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
611         doesn't survive expect either) */
612 
613         result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
614         if(U_SUCCESS(*status) && result) {
615             expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
616 
617             if (U_FAILURE(*status))
618             {
619                 res_close(result);
620                 return NULL;
621             }
622         }
623     }
624 
625     return result;
626 }
627 
628 static struct SResource *
parseAlias(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)629 parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
630 {
631     struct UString   *tokenValue;
632     struct SResource *result  = NULL;
633 
634     expect(state, TOK_STRING, &tokenValue, NULL, NULL, status);
635 
636     if(isVerbose()){
637         printf(" alias %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
638     }
639 
640     if (U_SUCCESS(*status))
641     {
642         /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
643         doesn't survive expect either) */
644 
645         result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
646 
647         expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
648 
649         if (U_FAILURE(*status))
650         {
651             res_close(result);
652             return NULL;
653         }
654     }
655 
656     return result;
657 }
658 
659 #if !UCONFIG_NO_COLLATION
660 
661 namespace {
662 
resLookup(struct SResource * res,const char * key)663 static struct SResource* resLookup(struct SResource* res, const char* key){
664     if (res == res_none() || !res->isTable()) {
665         return NULL;
666     }
667 
668     TableResource *list = static_cast<TableResource *>(res);
669     SResource *current = list->fFirst;
670     while (current != NULL) {
671         if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
672             return current;
673         }
674         current = current->fNext;
675     }
676     return NULL;
677 }
678 
679 class GenrbImporter : public icu::CollationRuleParser::Importer {
680 public:
GenrbImporter(const char * in,const char * out)681     GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
682     virtual ~GenrbImporter();
683     virtual void getRules(
684             const char *localeID, const char *collationType,
685             UnicodeString &rules,
686             const char *&errorReason, UErrorCode &errorCode) override;
687 
688 private:
689     const char *inputDir;
690     const char *outputDir;
691 };
692 
~GenrbImporter()693 GenrbImporter::~GenrbImporter() {}
694 
695 void
getRules(const char * localeID,const char * collationType,UnicodeString & rules,const char * &,UErrorCode & errorCode)696 GenrbImporter::getRules(
697         const char *localeID, const char *collationType,
698         UnicodeString &rules,
699         const char *& /*errorReason*/, UErrorCode &errorCode) {
700     CharString filename(localeID, errorCode);
701     for(int32_t i = 0; i < filename.length(); i++){
702         if(filename[i] == '-'){
703             filename.data()[i] = '_';
704         }
705     }
706     filename.append(".txt", errorCode);
707     if (U_FAILURE(errorCode)) {
708         return;
709     }
710     CharString inputDirBuf;
711     CharString openFileName;
712     if(inputDir == NULL) {
713         const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR);
714         if (filenameBegin != NULL) {
715             /*
716              * When a filename ../../../data/root.txt is specified,
717              * we presume that the input directory is ../../../data
718              * This is very important when the resource file includes
719              * another file, like UCARules.txt or thaidict.brk.
720              */
721             StringPiece dir = filename.toStringPiece();
722             const char *filenameLimit = filename.data() + filename.length();
723             dir.remove_suffix((int32_t)(filenameLimit - filenameBegin));
724             inputDirBuf.append(dir, errorCode);
725             inputDir = inputDirBuf.data();
726         }
727     }else{
728         int32_t dirlen  = (int32_t)uprv_strlen(inputDir);
729 
730         if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) {
731             /*
732              * append the input dir to openFileName if the first char in
733              * filename is not file separator char and the last char input directory is  not '.'.
734              * This is to support :
735              * genrb -s. /home/icu/data
736              * genrb -s. icu/data
737              * The user cannot mix notations like
738              * genrb -s. /icu/data --- the absolute path specified. -s redundant
739              * user should use
740              * genrb -s. icu/data  --- start from CWD and look in icu/data dir
741              */
742             openFileName.append(inputDir, dirlen, errorCode);
743             if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
744                 openFileName.append(U_FILE_SEP_CHAR, errorCode);
745             }
746         }
747     }
748     openFileName.append(filename, errorCode);
749     if(U_FAILURE(errorCode)) {
750         return;
751     }
752     // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
753     const char* cp = "";
754     LocalUCHARBUFPointer ucbuf(
755             ucbuf_open(openFileName.data(), &cp, getShowWarning(), TRUE, &errorCode));
756     if(errorCode == U_FILE_ACCESS_ERROR) {
757         fprintf(stderr, "couldn't open file %s\n", openFileName.data());
758         return;
759     }
760     if (ucbuf.isNull() || U_FAILURE(errorCode)) {
761         fprintf(stderr, "An error occurred processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode));
762         return;
763     }
764 
765     /* Parse the data into an SRBRoot */
766     LocalPointer<SRBRoot> data(
767             parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), FALSE, FALSE, &errorCode));
768     if (U_FAILURE(errorCode)) {
769         return;
770     }
771 
772     struct SResource *root = data->fRoot;
773     struct SResource *collations = resLookup(root, "collations");
774     if (collations != NULL) {
775       struct SResource *collation = resLookup(collations, collationType);
776       if (collation != NULL) {
777         struct SResource *sequence = resLookup(collation, "Sequence");
778         if (sequence != NULL && sequence->isString()) {
779           // No string pointer aliasing so that we need not hold onto the resource bundle.
780           StringResource *sr = static_cast<StringResource *>(sequence);
781           rules = sr->fString;
782         }
783       }
784     }
785 }
786 
787 // Quick-and-dirty escaping function.
788 // Assumes that we are on an ASCII-based platform.
789 static void
escape(const UChar * s,char * buffer)790 escape(const UChar *s, char *buffer) {
791     int32_t length = u_strlen(s);
792     int32_t i = 0;
793     for (;;) {
794         UChar32 c;
795         U16_NEXT(s, i, length, c);
796         if (c == 0) {
797             *buffer = 0;
798             return;
799         } else if (0x20 <= c && c <= 0x7e) {
800             // printable ASCII
801             *buffer++ = (char)c;  // assumes ASCII-based platform
802         } else {
803             buffer += sprintf(buffer, "\\u%04X", (int)c);
804         }
805     }
806 }
807 
808 }  // namespace
809 
810 #endif  // !UCONFIG_NO_COLLATION
811 
812 static TableResource *
addCollation(ParseState * state,TableResource * result,const char * collationType,uint32_t startline,UErrorCode * status)813 addCollation(ParseState* state, TableResource  *result, const char *collationType,
814              uint32_t startline, UErrorCode *status)
815 {
816     // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
817     struct SResource  *member = NULL;
818     struct UString    *tokenValue;
819     struct UString     comment;
820     enum   ETokenType  token;
821     char               subtag[1024];
822     UnicodeString      rules;
823     UBool              haveRules = FALSE;
824     UVersionInfo       version;
825     uint32_t           line;
826 
827     /* '{' . (name resource)* '}' */
828     version[0]=0; version[1]=0; version[2]=0; version[3]=0;
829 
830     for (;;)
831     {
832         ustr_init(&comment);
833         token = getToken(state, &tokenValue, &comment, &line, status);
834 
835         if (token == TOK_CLOSE_BRACE)
836         {
837             break;
838         }
839 
840         if (token != TOK_STRING)
841         {
842             res_close(result);
843             *status = U_INVALID_FORMAT_ERROR;
844 
845             if (token == TOK_EOF)
846             {
847                 error(startline, "unterminated table");
848             }
849             else
850             {
851                 error(line, "Unexpected token %s", tokenNames[token]);
852             }
853 
854             return NULL;
855         }
856 
857         u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
858 
859         if (U_FAILURE(*status))
860         {
861             res_close(result);
862             return NULL;
863         }
864 
865         member = parseResource(state, subtag, NULL, status);
866 
867         if (U_FAILURE(*status))
868         {
869             res_close(result);
870             return NULL;
871         }
872         if (result == NULL)
873         {
874             // Ignore the parsed resources, continue parsing.
875         }
876         else if (uprv_strcmp(subtag, "Version") == 0 && member->isString())
877         {
878             StringResource *sr = static_cast<StringResource *>(member);
879             char     ver[40];
880             int32_t length = sr->length();
881 
882             if (length >= UPRV_LENGTHOF(ver))
883             {
884                 length = UPRV_LENGTHOF(ver) - 1;
885             }
886 
887             sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV);
888             u_versionFromString(version, ver);
889 
890             result->add(member, line, *status);
891             member = NULL;
892         }
893         else if(uprv_strcmp(subtag, "%%CollationBin")==0)
894         {
895             /* discard duplicate %%CollationBin if any*/
896         }
897         else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString())
898         {
899             StringResource *sr = static_cast<StringResource *>(member);
900             rules = sr->fString;
901             haveRules = TRUE;
902             // Defer building the collator until we have seen
903             // all sub-elements of the collation table, including the Version.
904             /* in order to achieve smaller data files, we can direct genrb */
905             /* to omit collation rules */
906             if(!state->omitCollationRules) {
907                 result->add(member, line, *status);
908                 member = NULL;
909             }
910         }
911         else  // Just copy non-special items.
912         {
913             result->add(member, line, *status);
914             member = NULL;
915         }
916         res_close(member);  // TODO: use LocalPointer
917         if (U_FAILURE(*status))
918         {
919             res_close(result);
920             return NULL;
921         }
922     }
923 
924     if (!haveRules) { return result; }
925 
926 #if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
927     warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
928     (void)collationType;
929 #else
930     // CLDR ticket #3949, ICU ticket #8082:
931     // Do not build collation binary data for for-import-only "private" collation rule strings.
932     if (uprv_strncmp(collationType, "private-", 8) == 0) {
933         if(isVerbose()) {
934             printf("Not building %s~%s collation binary\n", state->filename, collationType);
935         }
936         return result;
937     }
938 
939     if(!state->makeBinaryCollation) {
940         if(isVerbose()) {
941             printf("Not building %s~%s collation binary\n", state->filename, collationType);
942         }
943         return result;
944     }
945     UErrorCode intStatus = U_ZERO_ERROR;
946     UParseError parseError;
947     uprv_memset(&parseError, 0, sizeof(parseError));
948     GenrbImporter importer(state->inputdir, state->outputdir);
949     const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
950     if(U_FAILURE(intStatus)) {
951         error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
952         res_close(result);
953         return NULL;  // TODO: use LocalUResourceBundlePointer for result
954     }
955     icu::CollationBuilder builder(base, intStatus);
956     if(uprv_strncmp(collationType, "search", 6) == 0) {
957         builder.disableFastLatin();  // build fast-Latin table unless search collator
958     }
959     LocalPointer<icu::CollationTailoring> t(
960             builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
961     if(U_FAILURE(intStatus)) {
962         const char *reason = builder.getErrorReason();
963         if(reason == NULL) { reason = ""; }
964         error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s  %s",
965                 state->filename, collationType,
966                 (long)parseError.offset, u_errorName(intStatus), reason);
967         if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
968             // Print pre- and post-context.
969             char preBuffer[100], postBuffer[100];
970             escape(parseError.preContext, preBuffer);
971             escape(parseError.postContext, postBuffer);
972             error(line, "  error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
973         }
974         if(isStrict() || t.isNull()) {
975             *status = intStatus;
976             res_close(result);
977             return NULL;
978         }
979     }
980     icu::LocalMemory<uint8_t> buffer;
981     int32_t capacity = 100000;
982     uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
983     if(dest == NULL) {
984         fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
985                 (long)capacity);
986         *status = U_MEMORY_ALLOCATION_ERROR;
987         res_close(result);
988         return NULL;
989     }
990     int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
991     int32_t totalSize = icu::CollationDataWriter::writeTailoring(
992             *t, *t->settings, indexes, dest, capacity, intStatus);
993     if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
994         intStatus = U_ZERO_ERROR;
995         capacity = totalSize;
996         dest = buffer.allocateInsteadAndCopy(capacity);
997         if(dest == NULL) {
998             fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
999                     (long)capacity);
1000             *status = U_MEMORY_ALLOCATION_ERROR;
1001             res_close(result);
1002             return NULL;
1003         }
1004         totalSize = icu::CollationDataWriter::writeTailoring(
1005                 *t, *t->settings, indexes, dest, capacity, intStatus);
1006     }
1007     if(U_FAILURE(intStatus)) {
1008         fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
1009                 u_errorName(intStatus));
1010         res_close(result);
1011         return NULL;
1012     }
1013     if(isVerbose()) {
1014         printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1015         icu::CollationInfo::printSizes(totalSize, indexes);
1016         if(t->settings->hasReordering()) {
1017             printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
1018             icu::CollationInfo::printReorderRanges(
1019                     *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
1020         }
1021 #if 0  // debugging output
1022     } else {
1023         printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
1024         icu::CollationInfo::printSizes(totalSize, indexes);
1025 #endif
1026     }
1027     struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status);
1028     result->add(collationBin, line, *status);
1029     if (U_FAILURE(*status)) {
1030         res_close(result);
1031         return NULL;
1032     }
1033 #endif
1034     return result;
1035 }
1036 
1037 static UBool
keepCollationType(const char *)1038 keepCollationType(const char * /*type*/) {
1039     return TRUE;
1040 }
1041 
1042 static struct SResource *
parseCollationElements(ParseState * state,char * tag,uint32_t startline,UBool newCollation,UErrorCode * status)1043 parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
1044 {
1045     TableResource  *result = NULL;
1046     struct SResource  *member = NULL;
1047     struct UString    *tokenValue;
1048     struct UString     comment;
1049     enum   ETokenType  token;
1050     char               subtag[1024], typeKeyword[1024];
1051     uint32_t           line;
1052 
1053     result = table_open(state->bundle, tag, NULL, status);
1054 
1055     if (result == NULL || U_FAILURE(*status))
1056     {
1057         return NULL;
1058     }
1059     if(isVerbose()){
1060         printf(" collation elements %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1061     }
1062     if(!newCollation) {
1063         return addCollation(state, result, "(no type)", startline, status);
1064     }
1065     else {
1066         for(;;) {
1067             ustr_init(&comment);
1068             token = getToken(state, &tokenValue, &comment, &line, status);
1069 
1070             if (token == TOK_CLOSE_BRACE)
1071             {
1072                 return result;
1073             }
1074 
1075             if (token != TOK_STRING)
1076             {
1077                 res_close(result);
1078                 *status = U_INVALID_FORMAT_ERROR;
1079 
1080                 if (token == TOK_EOF)
1081                 {
1082                     error(startline, "unterminated table");
1083                 }
1084                 else
1085                 {
1086                     error(line, "Unexpected token %s", tokenNames[token]);
1087                 }
1088 
1089                 return NULL;
1090             }
1091 
1092             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1093 
1094             if (U_FAILURE(*status))
1095             {
1096                 res_close(result);
1097                 return NULL;
1098             }
1099 
1100             if (uprv_strcmp(subtag, "default") == 0)
1101             {
1102                 member = parseResource(state, subtag, NULL, status);
1103 
1104                 if (U_FAILURE(*status))
1105                 {
1106                     res_close(result);
1107                     return NULL;
1108                 }
1109 
1110                 result->add(member, line, *status);
1111             }
1112             else
1113             {
1114                 token = peekToken(state, 0, &tokenValue, &line, &comment, status);
1115                 /* this probably needs to be refactored or recursively use the parser */
1116                 /* first we assume that our collation table won't have the explicit type */
1117                 /* then, we cannot handle aliases */
1118                 if(token == TOK_OPEN_BRACE) {
1119                     token = getToken(state, &tokenValue, &comment, &line, status);
1120                     TableResource *collationRes;
1121                     if (keepCollationType(subtag)) {
1122                         collationRes = table_open(state->bundle, subtag, NULL, status);
1123                     } else {
1124                         collationRes = NULL;
1125                     }
1126                     // need to parse the collation data regardless
1127                     collationRes = addCollation(state, collationRes, subtag, startline, status);
1128                     if (collationRes != NULL) {
1129                         result->add(collationRes, startline, *status);
1130                     }
1131                 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
1132                     /* we could have a table too */
1133                     token = peekToken(state, 1, &tokenValue, &line, &comment, status);
1134                     u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
1135                     if(uprv_strcmp(typeKeyword, "alias") == 0) {
1136                         member = parseResource(state, subtag, NULL, status);
1137                         if (U_FAILURE(*status))
1138                         {
1139                             res_close(result);
1140                             return NULL;
1141                         }
1142 
1143                         result->add(member, line, *status);
1144                     } else {
1145                         res_close(result);
1146                         *status = U_INVALID_FORMAT_ERROR;
1147                         return NULL;
1148                     }
1149                 } else {
1150                     res_close(result);
1151                     *status = U_INVALID_FORMAT_ERROR;
1152                     return NULL;
1153                 }
1154             }
1155 
1156             /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
1157 
1158             /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/
1159 
1160             if (U_FAILURE(*status))
1161             {
1162                 res_close(result);
1163                 return NULL;
1164             }
1165         }
1166     }
1167 }
1168 
1169 /* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
1170    if this weren't special-cased, wouldn't be set until the entire file had been processed. */
1171 static struct SResource *
realParseTable(ParseState * state,TableResource * table,char * tag,uint32_t startline,UErrorCode * status)1172 realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status)
1173 {
1174     struct SResource  *member = NULL;
1175     struct UString    *tokenValue=NULL;
1176     struct UString    comment;
1177     enum   ETokenType token;
1178     char              subtag[1024];
1179     uint32_t          line;
1180     UBool             readToken = FALSE;
1181 
1182     /* '{' . (name resource)* '}' */
1183 
1184     if(isVerbose()){
1185         printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
1186     }
1187     for (;;)
1188     {
1189         ustr_init(&comment);
1190         token = getToken(state, &tokenValue, &comment, &line, status);
1191 
1192         if (token == TOK_CLOSE_BRACE)
1193         {
1194             if (!readToken && isVerbose()) {
1195                 warning(startline, "Encountered empty table");
1196             }
1197             return table;
1198         }
1199 
1200         if (token != TOK_STRING)
1201         {
1202             *status = U_INVALID_FORMAT_ERROR;
1203 
1204             if (token == TOK_EOF)
1205             {
1206                 error(startline, "unterminated table");
1207             }
1208             else
1209             {
1210                 error(line, "unexpected token %s", tokenNames[token]);
1211             }
1212 
1213             return NULL;
1214         }
1215 
1216         if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
1217             u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
1218         } else {
1219             *status = U_INVALID_FORMAT_ERROR;
1220             error(line, "invariant characters required for table keys");
1221             return NULL;
1222         }
1223 
1224         if (U_FAILURE(*status))
1225         {
1226             error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
1227             return NULL;
1228         }
1229 
1230         member = parseResource(state, subtag, &comment, status);
1231 
1232         if (member == NULL || U_FAILURE(*status))
1233         {
1234             error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
1235             return NULL;
1236         }
1237 
1238         table->add(member, line, *status);
1239 
1240         if (U_FAILURE(*status))
1241         {
1242             error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
1243             return NULL;
1244         }
1245         readToken = TRUE;
1246         ustr_deinit(&comment);
1247    }
1248 
1249     /* not reached */
1250     /* A compiler warning will appear if all paths don't contain a return statement. */
1251 /*     *status = U_INTERNAL_PROGRAM_ERROR;
1252      return NULL;*/
1253 }
1254 
1255 static struct SResource *
parseTable(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1256 parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1257 {
1258     if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0)
1259     {
1260         return parseCollationElements(state, tag, startline, FALSE, status);
1261     }
1262     if (tag != NULL && uprv_strcmp(tag, "collations") == 0)
1263     {
1264         return parseCollationElements(state, tag, startline, TRUE, status);
1265     }
1266     if(isVerbose()){
1267         printf(" table %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1268     }
1269 
1270     TableResource *result = table_open(state->bundle, tag, comment, status);
1271 
1272     if (result == NULL || U_FAILURE(*status))
1273     {
1274         return NULL;
1275     }
1276     return realParseTable(state, result, tag, startline,  status);
1277 }
1278 
1279 static struct SResource *
parseArray(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1280 parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1281 {
1282     struct SResource  *member = NULL;
1283     struct UString    *tokenValue;
1284     struct UString    memberComments;
1285     enum   ETokenType token;
1286     UBool             readToken = FALSE;
1287 
1288     ArrayResource  *result = array_open(state->bundle, tag, comment, status);
1289 
1290     if (result == NULL || U_FAILURE(*status))
1291     {
1292         return NULL;
1293     }
1294     if(isVerbose()){
1295         printf(" array %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1296     }
1297 
1298     ustr_init(&memberComments);
1299 
1300     /* '{' . resource [','] '}' */
1301     for (;;)
1302     {
1303         /* reset length */
1304         ustr_setlen(&memberComments, 0, status);
1305 
1306         /* check for end of array, but don't consume next token unless it really is the end */
1307         token = peekToken(state, 0, &tokenValue, NULL, &memberComments, status);
1308 
1309 
1310         if (token == TOK_CLOSE_BRACE)
1311         {
1312             getToken(state, NULL, NULL, NULL, status);
1313             if (!readToken) {
1314                 warning(startline, "Encountered empty array");
1315             }
1316             break;
1317         }
1318 
1319         if (token == TOK_EOF)
1320         {
1321             res_close(result);
1322             *status = U_INVALID_FORMAT_ERROR;
1323             error(startline, "unterminated array");
1324             return NULL;
1325         }
1326 
1327         /* string arrays are a special case */
1328         if (token == TOK_STRING)
1329         {
1330             getToken(state, &tokenValue, &memberComments, NULL, status);
1331             member = string_open(state->bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
1332         }
1333         else
1334         {
1335             member = parseResource(state, NULL, &memberComments, status);
1336         }
1337 
1338         if (member == NULL || U_FAILURE(*status))
1339         {
1340             res_close(result);
1341             return NULL;
1342         }
1343 
1344         result->add(member);
1345 
1346         /* eat optional comma if present */
1347         token = peekToken(state, 0, NULL, NULL, NULL, status);
1348 
1349         if (token == TOK_COMMA)
1350         {
1351             getToken(state, NULL, NULL, NULL, status);
1352         }
1353 
1354         if (U_FAILURE(*status))
1355         {
1356             res_close(result);
1357             return NULL;
1358         }
1359         readToken = TRUE;
1360     }
1361 
1362     ustr_deinit(&memberComments);
1363     return result;
1364 }
1365 
1366 static struct SResource *
parseIntVector(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1367 parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1368 {
1369     enum   ETokenType  token;
1370     char              *string;
1371     int32_t            value;
1372     UBool              readToken = FALSE;
1373     char              *stopstring;
1374     struct UString     memberComments;
1375 
1376     IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
1377 
1378     if (result == NULL || U_FAILURE(*status))
1379     {
1380         return NULL;
1381     }
1382 
1383     if(isVerbose()){
1384         printf(" vector %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1385     }
1386     ustr_init(&memberComments);
1387     /* '{' . string [','] '}' */
1388     for (;;)
1389     {
1390         ustr_setlen(&memberComments, 0, status);
1391 
1392         /* check for end of array, but don't consume next token unless it really is the end */
1393         token = peekToken(state, 0, NULL, NULL,&memberComments, status);
1394 
1395         if (token == TOK_CLOSE_BRACE)
1396         {
1397             /* it's the end, consume the close brace */
1398             getToken(state, NULL, NULL, NULL, status);
1399             if (!readToken) {
1400                 warning(startline, "Encountered empty int vector");
1401             }
1402             ustr_deinit(&memberComments);
1403             return result;
1404         }
1405 
1406         int32_t stringLength;
1407         string = getInvariantString(state, NULL, NULL, stringLength, status);
1408 
1409         if (U_FAILURE(*status))
1410         {
1411             res_close(result);
1412             return NULL;
1413         }
1414 
1415         /* For handling illegal char in the Intvector */
1416         value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
1417         int32_t len = (int32_t)(stopstring-string);
1418 
1419         if(len==stringLength)
1420         {
1421             result->add(value, *status);
1422             uprv_free(string);
1423             token = peekToken(state, 0, NULL, NULL, NULL, status);
1424         }
1425         else
1426         {
1427             uprv_free(string);
1428             *status=U_INVALID_CHAR_FOUND;
1429         }
1430 
1431         if (U_FAILURE(*status))
1432         {
1433             res_close(result);
1434             return NULL;
1435         }
1436 
1437         /* the comma is optional (even though it is required to prevent the reader from concatenating
1438         consecutive entries) so that a missing comma on the last entry isn't an error */
1439         if (token == TOK_COMMA)
1440         {
1441             getToken(state, NULL, NULL, NULL, status);
1442         }
1443         readToken = TRUE;
1444     }
1445 
1446     /* not reached */
1447     /* A compiler warning will appear if all paths don't contain a return statement. */
1448 /*    intvector_close(result, status);
1449     *status = U_INTERNAL_PROGRAM_ERROR;
1450     return NULL;*/
1451 }
1452 
1453 static struct SResource *
parseBinary(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1454 parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1455 {
1456     uint32_t line;
1457     int32_t stringLength;
1458     LocalMemory<char> string(getInvariantString(state, &line, NULL, stringLength, status));
1459     if (string.isNull() || U_FAILURE(*status))
1460     {
1461         return NULL;
1462     }
1463 
1464     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1465     if (U_FAILURE(*status))
1466     {
1467         return NULL;
1468     }
1469 
1470     if(isVerbose()){
1471         printf(" binary %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1472     }
1473 
1474     LocalMemory<uint8_t> value;
1475     int32_t count = 0;
1476     if (stringLength > 0 && value.allocateInsteadAndCopy(stringLength) == NULL)
1477     {
1478         *status = U_MEMORY_ALLOCATION_ERROR;
1479         return NULL;
1480     }
1481 
1482     char toConv[3] = {'\0', '\0', '\0'};
1483     for (int32_t i = 0; i < stringLength;)
1484     {
1485         // Skip spaces (which may have been line endings).
1486         char c0 = string[i++];
1487         if (c0 == ' ') { continue; }
1488         if (i == stringLength) {
1489             *status=U_INVALID_CHAR_FOUND;
1490             error(line, "Encountered invalid binary value (odd number of hex digits)");
1491             return NULL;
1492         }
1493         toConv[0] = c0;
1494         toConv[1] = string[i++];
1495 
1496         char *stopstring;
1497         value[count++] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
1498         uint32_t len=(uint32_t)(stopstring-toConv);
1499 
1500         if(len!=2)
1501         {
1502             *status=U_INVALID_CHAR_FOUND;
1503             error(line, "Encountered invalid binary value (not all pairs of hex digits)");
1504             return NULL;
1505         }
1506     }
1507 
1508     if (count == 0) {
1509         warning(startline, "Encountered empty binary value");
1510         return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
1511     } else {
1512         return bin_open(state->bundle, tag, count, value.getAlias(), NULL, comment, status);
1513     }
1514 }
1515 
1516 static struct SResource *
parseInteger(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1517 parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
1518 {
1519     struct SResource *result = NULL;
1520     int32_t           value;
1521     char             *string;
1522     char             *stopstring;
1523 
1524     int32_t stringLength;
1525     string = getInvariantString(state, NULL, NULL, stringLength, status);
1526 
1527     if (string == NULL || U_FAILURE(*status))
1528     {
1529         return NULL;
1530     }
1531 
1532     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1533 
1534     if (U_FAILURE(*status))
1535     {
1536         uprv_free(string);
1537         return NULL;
1538     }
1539 
1540     if(isVerbose()){
1541         printf(" integer %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1542     }
1543 
1544     if (stringLength == 0)
1545     {
1546         warning(startline, "Encountered empty integer. Default value is 0.");
1547     }
1548 
1549     /* Allow integer support for hexdecimal, octal digit and decimal*/
1550     /* and handle illegal char in the integer*/
1551     value = uprv_strtoul(string, &stopstring, 0);
1552     int32_t len = (int32_t)(stopstring-string);
1553     if(len==stringLength)
1554     {
1555         result = int_open(state->bundle, tag, value, comment, status);
1556     }
1557     else
1558     {
1559         *status=U_INVALID_CHAR_FOUND;
1560     }
1561     uprv_free(string);
1562 
1563     return result;
1564 }
1565 
1566 static struct SResource *
parseImport(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1567 parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1568 {
1569     uint32_t          line;
1570     int32_t stringLength;
1571     LocalMemory<char> filename(getInvariantString(state, &line, NULL, stringLength, status));
1572     if (U_FAILURE(*status))
1573     {
1574         return NULL;
1575     }
1576 
1577     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1578 
1579     if (U_FAILURE(*status))
1580     {
1581         return NULL;
1582     }
1583 
1584     if(isVerbose()){
1585         printf(" import %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1586     }
1587 
1588     /* Open the input file for reading */
1589     CharString fullname;
1590     if (state->inputdir != NULL) {
1591         fullname.append(state->inputdir, *status);
1592     }
1593     fullname.appendPathPart(filename.getAlias(), *status);
1594     if (U_FAILURE(*status)) {
1595         return NULL;
1596     }
1597 
1598     FileStream *file = T_FileStream_open(fullname.data(), "rb");
1599     if (file == NULL)
1600     {
1601         error(line, "couldn't open input file %s", filename.getAlias());
1602         *status = U_FILE_ACCESS_ERROR;
1603         return NULL;
1604     }
1605 
1606     int32_t len  = T_FileStream_size(file);
1607     LocalMemory<uint8_t> data;
1608     if(data.allocateInsteadAndCopy(len) == NULL)
1609     {
1610         *status = U_MEMORY_ALLOCATION_ERROR;
1611         T_FileStream_close (file);
1612         return NULL;
1613     }
1614 
1615     /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len);
1616     T_FileStream_close (file);
1617 
1618     return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status);
1619 }
1620 
1621 static struct SResource *
parseInclude(ParseState * state,char * tag,uint32_t startline,const struct UString * comment,UErrorCode * status)1622 parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
1623 {
1624     struct SResource *result;
1625     int32_t           len=0;
1626     char             *filename;
1627     uint32_t          line;
1628     UChar *pTarget     = NULL;
1629 
1630     UCHARBUF *ucbuf;
1631     char     *fullname = NULL;
1632     const char* cp = NULL;
1633     const UChar* uBuffer = NULL;
1634 
1635     int32_t stringLength;
1636     filename = getInvariantString(state, &line, NULL, stringLength, status);
1637 
1638     if (U_FAILURE(*status))
1639     {
1640         return NULL;
1641     }
1642 
1643     expect(state, TOK_CLOSE_BRACE, NULL, NULL, NULL, status);
1644 
1645     if (U_FAILURE(*status))
1646     {
1647         uprv_free(filename);
1648         return NULL;
1649     }
1650 
1651     if(isVerbose()){
1652         printf(" include %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1653     }
1654 
1655     fullname = (char *) uprv_malloc(state->inputdirLength + stringLength + 2);
1656     /* test for NULL */
1657     if(fullname == NULL)
1658     {
1659         *status = U_MEMORY_ALLOCATION_ERROR;
1660         uprv_free(filename);
1661         return NULL;
1662     }
1663 
1664     if(state->inputdir!=NULL){
1665         if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
1666         {
1667 
1668             uprv_strcpy(fullname, state->inputdir);
1669 
1670             fullname[state->inputdirLength]      = U_FILE_SEP_CHAR;
1671             fullname[state->inputdirLength + 1] = '\0';
1672 
1673             uprv_strcat(fullname, filename);
1674         }
1675         else
1676         {
1677             uprv_strcpy(fullname, state->inputdir);
1678             uprv_strcat(fullname, filename);
1679         }
1680     }else{
1681         uprv_strcpy(fullname,filename);
1682     }
1683 
1684     ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status);
1685 
1686     if (U_FAILURE(*status)) {
1687         error(line, "couldn't open input file %s\n", filename);
1688         return NULL;
1689     }
1690 
1691     uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
1692     result = string_open(state->bundle, tag, uBuffer, len, comment, status);
1693 
1694     ucbuf_close(ucbuf);
1695 
1696     uprv_free(pTarget);
1697 
1698     uprv_free(filename);
1699     uprv_free(fullname);
1700 
1701     return result;
1702 }
1703 
1704 
1705 
1706 
1707 
1708 U_STRING_DECL(k_type_string,    "string",    6);
1709 U_STRING_DECL(k_type_binary,    "binary",    6);
1710 U_STRING_DECL(k_type_bin,       "bin",       3);
1711 U_STRING_DECL(k_type_table,     "table",     5);
1712 U_STRING_DECL(k_type_table_no_fallback,     "table(nofallback)",         17);
1713 U_STRING_DECL(k_type_int,       "int",       3);
1714 U_STRING_DECL(k_type_integer,   "integer",   7);
1715 U_STRING_DECL(k_type_array,     "array",     5);
1716 U_STRING_DECL(k_type_alias,     "alias",     5);
1717 U_STRING_DECL(k_type_intvector, "intvector", 9);
1718 U_STRING_DECL(k_type_import,    "import",    6);
1719 U_STRING_DECL(k_type_include,   "include",   7);
1720 
1721 /* Various non-standard processing plugins that create one or more special resources. */
1722 U_STRING_DECL(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1723 U_STRING_DECL(k_type_plugin_collation,      "process(collation)",        18);
1724 U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)",   23);
1725 U_STRING_DECL(k_type_plugin_dependency,     "process(dependency)",       19);
1726 
1727 typedef enum EResourceType
1728 {
1729     RESTYPE_UNKNOWN,
1730     RESTYPE_STRING,
1731     RESTYPE_BINARY,
1732     RESTYPE_TABLE,
1733     RESTYPE_TABLE_NO_FALLBACK,
1734     RESTYPE_INTEGER,
1735     RESTYPE_ARRAY,
1736     RESTYPE_ALIAS,
1737     RESTYPE_INTVECTOR,
1738     RESTYPE_IMPORT,
1739     RESTYPE_INCLUDE,
1740     RESTYPE_PROCESS_UCA_RULES,
1741     RESTYPE_PROCESS_COLLATION,
1742     RESTYPE_PROCESS_TRANSLITERATOR,
1743     RESTYPE_PROCESS_DEPENDENCY,
1744     RESTYPE_RESERVED
1745 } EResourceType;
1746 
1747 static struct {
1748     const char *nameChars;   /* only used for debugging */
1749     const UChar *nameUChars;
1750     ParseResourceFunction *parseFunction;
1751 } gResourceTypes[] = {
1752     {"Unknown", NULL, NULL},
1753     {"string", k_type_string, parseString},
1754     {"binary", k_type_binary, parseBinary},
1755     {"table", k_type_table, parseTable},
1756     {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */
1757     {"integer", k_type_integer, parseInteger},
1758     {"array", k_type_array, parseArray},
1759     {"alias", k_type_alias, parseAlias},
1760     {"intvector", k_type_intvector, parseIntVector},
1761     {"import", k_type_import, parseImport},
1762     {"include", k_type_include, parseInclude},
1763     {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
1764     {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */},
1765     {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
1766     {"process(dependency)", k_type_plugin_dependency, parseDependency},
1767     {"reserved", NULL, NULL}
1768 };
1769 
initParser()1770 void initParser()
1771 {
1772     U_STRING_INIT(k_type_string,    "string",    6);
1773     U_STRING_INIT(k_type_binary,    "binary",    6);
1774     U_STRING_INIT(k_type_bin,       "bin",       3);
1775     U_STRING_INIT(k_type_table,     "table",     5);
1776     U_STRING_INIT(k_type_table_no_fallback,     "table(nofallback)",         17);
1777     U_STRING_INIT(k_type_int,       "int",       3);
1778     U_STRING_INIT(k_type_integer,   "integer",   7);
1779     U_STRING_INIT(k_type_array,     "array",     5);
1780     U_STRING_INIT(k_type_alias,     "alias",     5);
1781     U_STRING_INIT(k_type_intvector, "intvector", 9);
1782     U_STRING_INIT(k_type_import,    "import",    6);
1783     U_STRING_INIT(k_type_include,   "include",   7);
1784 
1785     U_STRING_INIT(k_type_plugin_uca_rules,      "process(uca_rules)",        18);
1786     U_STRING_INIT(k_type_plugin_collation,      "process(collation)",        18);
1787     U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)",   23);
1788     U_STRING_INIT(k_type_plugin_dependency,     "process(dependency)",       19);
1789 }
1790 
isTable(enum EResourceType type)1791 static inline UBool isTable(enum EResourceType type) {
1792     return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
1793 }
1794 
1795 static enum EResourceType
parseResourceType(ParseState * state,UErrorCode * status)1796 parseResourceType(ParseState* state, UErrorCode *status)
1797 {
1798     struct UString        *tokenValue;
1799     struct UString        comment;
1800     enum   EResourceType  result = RESTYPE_UNKNOWN;
1801     uint32_t              line=0;
1802     ustr_init(&comment);
1803     expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
1804 
1805     if (U_FAILURE(*status))
1806     {
1807         return RESTYPE_UNKNOWN;
1808     }
1809 
1810     *status = U_ZERO_ERROR;
1811 
1812     /* Search for normal types */
1813     result=RESTYPE_UNKNOWN;
1814     while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
1815         if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
1816             break;
1817         }
1818     }
1819     /* Now search for the aliases */
1820     if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
1821         result = RESTYPE_INTEGER;
1822     }
1823     else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
1824         result = RESTYPE_BINARY;
1825     }
1826     else if (result == RESTYPE_RESERVED) {
1827         char tokenBuffer[1024];
1828         u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
1829         tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
1830         *status = U_INVALID_FORMAT_ERROR;
1831         error(line, "unknown resource type '%s'", tokenBuffer);
1832     }
1833 
1834     return result;
1835 }
1836 
1837 /* parse a non-top-level resource */
1838 static struct SResource *
parseResource(ParseState * state,char * tag,const struct UString * comment,UErrorCode * status)1839 parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
1840 {
1841     enum   ETokenType      token;
1842     enum   EResourceType  resType = RESTYPE_UNKNOWN;
1843     ParseResourceFunction *parseFunction = NULL;
1844     struct UString        *tokenValue;
1845     uint32_t                 startline;
1846     uint32_t                 line;
1847 
1848 
1849     token = getToken(state, &tokenValue, NULL, &startline, status);
1850 
1851     if(isVerbose()){
1852         printf(" resource %s at line %i \n",  (tag == NULL) ? "(null)" : tag, (int)startline);
1853     }
1854 
1855     /* name . [ ':' type ] '{' resource '}' */
1856     /* This function parses from the colon onwards.  If the colon is present, parse the
1857     type then try to parse a resource of that type.  If there is no explicit type,
1858     work it out using the lookahead tokens. */
1859     switch (token)
1860     {
1861     case TOK_EOF:
1862         *status = U_INVALID_FORMAT_ERROR;
1863         error(startline, "Unexpected EOF encountered");
1864         return NULL;
1865 
1866     case TOK_ERROR:
1867         *status = U_INVALID_FORMAT_ERROR;
1868         return NULL;
1869 
1870     case TOK_COLON:
1871         resType = parseResourceType(state, status);
1872         expect(state, TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status);
1873 
1874         if (U_FAILURE(*status))
1875         {
1876             return NULL;
1877         }
1878 
1879         break;
1880 
1881     case TOK_OPEN_BRACE:
1882         break;
1883 
1884     default:
1885         *status = U_INVALID_FORMAT_ERROR;
1886         error(startline, "syntax error while reading a resource, expected '{' or ':'");
1887         return NULL;
1888     }
1889 
1890 
1891     if (resType == RESTYPE_UNKNOWN)
1892     {
1893         /* No explicit type, so try to work it out.  At this point, we've read the first '{'.
1894         We could have any of the following:
1895         { {         => array (nested)
1896         { :/}       => array
1897         { string ,  => string array
1898 
1899         { string {  => table
1900 
1901         { string :/{    => table
1902         { string }      => string
1903         */
1904 
1905         token = peekToken(state, 0, NULL, &line, NULL,status);
1906 
1907         if (U_FAILURE(*status))
1908         {
1909             return NULL;
1910         }
1911 
1912         if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
1913         {
1914             resType = RESTYPE_ARRAY;
1915         }
1916         else if (token == TOK_STRING)
1917         {
1918             token = peekToken(state, 1, NULL, &line, NULL, status);
1919 
1920             if (U_FAILURE(*status))
1921             {
1922                 return NULL;
1923             }
1924 
1925             switch (token)
1926             {
1927             case TOK_COMMA:         resType = RESTYPE_ARRAY;  break;
1928             case TOK_OPEN_BRACE:    resType = RESTYPE_TABLE;  break;
1929             case TOK_CLOSE_BRACE:   resType = RESTYPE_STRING; break;
1930             case TOK_COLON:         resType = RESTYPE_TABLE;  break;
1931             default:
1932                 *status = U_INVALID_FORMAT_ERROR;
1933                 error(line, "Unexpected token after string, expected ',', '{' or '}'");
1934                 return NULL;
1935             }
1936         }
1937         else
1938         {
1939             *status = U_INVALID_FORMAT_ERROR;
1940             error(line, "Unexpected token after '{'");
1941             return NULL;
1942         }
1943 
1944         /* printf("Type guessed as %s\n", resourceNames[resType]); */
1945     } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
1946         *status = U_INVALID_FORMAT_ERROR;
1947         error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
1948         return NULL;
1949     }
1950 
1951 
1952     /* We should now know what we need to parse next, so call the appropriate parser
1953     function and return. */
1954     parseFunction = gResourceTypes[resType].parseFunction;
1955     if (parseFunction != NULL) {
1956         return parseFunction(state, tag, startline, comment, status);
1957     }
1958     else {
1959         *status = U_INTERNAL_PROGRAM_ERROR;
1960         error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
1961     }
1962 
1963     return NULL;
1964 }
1965 
1966 /* parse the top-level resource */
1967 struct SRBRoot *
parse(UCHARBUF * buf,const char * inputDir,const char * outputDir,const char * filename,UBool makeBinaryCollation,UBool omitCollationRules,UErrorCode * status)1968 parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
1969       UBool makeBinaryCollation, UBool omitCollationRules, UErrorCode *status)
1970 {
1971     struct UString    *tokenValue;
1972     struct UString    comment;
1973     uint32_t           line;
1974     enum EResourceType bundleType;
1975     enum ETokenType    token;
1976     ParseState state;
1977     uint32_t i;
1978 
1979 
1980     for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
1981     {
1982         ustr_init(&state.lookahead[i].value);
1983         ustr_init(&state.lookahead[i].comment);
1984     }
1985 
1986     initLookahead(&state, buf, status);
1987 
1988     state.inputdir       = inputDir;
1989     state.inputdirLength = (state.inputdir != NULL) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
1990     state.outputdir       = outputDir;
1991     state.outputdirLength = (state.outputdir != NULL) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
1992     state.filename = filename;
1993     state.makeBinaryCollation = makeBinaryCollation;
1994     state.omitCollationRules = omitCollationRules;
1995 
1996     ustr_init(&comment);
1997     expect(&state, TOK_STRING, &tokenValue, &comment, NULL, status);
1998 
1999     state.bundle = new SRBRoot(&comment, FALSE, *status);
2000 
2001     if (state.bundle == NULL || U_FAILURE(*status))
2002     {
2003         delete state.bundle;
2004 
2005         return NULL;
2006     }
2007 
2008 
2009     state.bundle->setLocale(tokenValue->fChars, *status);
2010 
2011     /* The following code is to make Empty bundle work no matter with :table specifer or not */
2012     token = getToken(&state, NULL, NULL, &line, status);
2013     if(token==TOK_COLON) {
2014         *status=U_ZERO_ERROR;
2015         bundleType=parseResourceType(&state, status);
2016 
2017         if(isTable(bundleType))
2018         {
2019             expect(&state, TOK_OPEN_BRACE, NULL, NULL, &line, status);
2020         }
2021         else
2022         {
2023             *status=U_PARSE_ERROR;
2024              error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
2025         }
2026     }
2027     else
2028     {
2029         /* not a colon */
2030         if(token==TOK_OPEN_BRACE)
2031         {
2032             *status=U_ZERO_ERROR;
2033             bundleType=RESTYPE_TABLE;
2034         }
2035         else
2036         {
2037             /* neither colon nor open brace */
2038             *status=U_PARSE_ERROR;
2039             bundleType=RESTYPE_UNKNOWN;
2040             error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
2041         }
2042     }
2043 
2044     if (U_FAILURE(*status))
2045     {
2046         delete state.bundle;
2047         return NULL;
2048     }
2049 
2050     if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
2051         /*
2052          * Parse a top-level table with the table(nofallback) declaration.
2053          * This is the same as a regular table, but also sets the
2054          * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
2055          */
2056         state.bundle->fNoFallback=TRUE;
2057     }
2058     /* top-level tables need not handle special table names like "collations" */
2059     assert(!state.bundle->fIsPoolBundle);
2060     assert(state.bundle->fRoot->fType == URES_TABLE);
2061     TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot);
2062     realParseTable(&state, rootTable, NULL, line, status);
2063     if(dependencyArray!=NULL){
2064         rootTable->add(dependencyArray, 0, *status);
2065         dependencyArray = NULL;
2066     }
2067    if (U_FAILURE(*status))
2068     {
2069         delete state.bundle;
2070         res_close(dependencyArray);
2071         return NULL;
2072     }
2073 
2074     if (getToken(&state, NULL, NULL, &line, status) != TOK_EOF)
2075     {
2076         warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
2077         if(isStrict()){
2078             *status = U_INVALID_FORMAT_ERROR;
2079             return NULL;
2080         }
2081     }
2082 
2083     cleanupLookahead(&state);
2084     ustr_deinit(&comment);
2085     return state.bundle;
2086 }
2087