1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1998-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File ucbuf.cpp
12 *
13 * Modification History:
14 *
15 *   Date        Name        Description
16 *   05/10/01    Ram         Creation.
17 *******************************************************************************
18 */
19 
20 #include "unicode/utypes.h"
21 #include "unicode/putil.h"
22 #include "unicode/uchar.h"
23 #include "unicode/ucnv.h"
24 #include "unicode/ucnv_err.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utf16.h"
27 #include "filestrm.h"
28 #include "cstring.h"
29 #include "cmemory.h"
30 #include "ustrfmt.h"
31 #include "ucbuf.h"
32 #include <stdio.h>
33 
34 #if !UCONFIG_NO_CONVERSION
35 
36 
37 #define MAX_IN_BUF 1000
38 #define MAX_U_BUF 1500
39 #define CONTEXT_LEN 20
40 
41 struct UCHARBUF {
42     UChar* buffer;
43     UChar* currentPos;
44     UChar* bufLimit;
45     int32_t bufCapacity;
46     int32_t remaining;
47     int32_t signatureLength;
48     FileStream* in;
49     UConverter* conv;
50     UBool showWarning; /* makes this API not produce any errors */
51     UBool isBuffered;
52 };
53 
54 U_CAPI UBool U_EXPORT2
ucbuf_autodetect_fs(FileStream * in,const char ** cp,UConverter ** conv,int32_t * signatureLength,UErrorCode * error)55 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
56     char start[8];
57     int32_t numRead;
58 
59     UChar target[1]={ 0 };
60     UChar* pTarget;
61     const char* pStart;
62 
63     /* read a few bytes */
64     numRead=T_FileStream_read(in, start, sizeof(start));
65 
66     *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
67 
68     /* unread the bytes beyond what was consumed for U+FEFF */
69     T_FileStream_rewind(in);
70     if (*signatureLength > 0) {
71         T_FileStream_read(in, start, *signatureLength);
72     }
73 
74     if(*cp==NULL){
75         *conv =NULL;
76         return FALSE;
77     }
78 
79     /* open the converter for the detected Unicode charset */
80     *conv = ucnv_open(*cp,error);
81 
82     /* convert and ignore initial U+FEFF, and the buffer overflow */
83     pTarget = target;
84     pStart = start;
85     ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
86     *signatureLength = (int32_t)(pStart - start);
87     if(*error==U_BUFFER_OVERFLOW_ERROR) {
88         *error=U_ZERO_ERROR;
89     }
90 
91     /* verify that we successfully read exactly U+FEFF */
92     if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
93         *error=U_INTERNAL_PROGRAM_ERROR;
94     }
95 
96 
97     return TRUE;
98 }
ucbuf_isCPKnown(const char * cp)99 static UBool ucbuf_isCPKnown(const char* cp){
100     if(ucnv_compareNames("UTF-8",cp)==0){
101         return TRUE;
102     }
103     if(ucnv_compareNames("UTF-16BE",cp)==0){
104         return TRUE;
105     }
106     if(ucnv_compareNames("UTF-16LE",cp)==0){
107         return TRUE;
108     }
109     if(ucnv_compareNames("UTF-16",cp)==0){
110         return TRUE;
111     }
112     if(ucnv_compareNames("UTF-32",cp)==0){
113         return TRUE;
114     }
115     if(ucnv_compareNames("UTF-32BE",cp)==0){
116         return TRUE;
117     }
118     if(ucnv_compareNames("UTF-32LE",cp)==0){
119         return TRUE;
120     }
121     if(ucnv_compareNames("SCSU",cp)==0){
122         return TRUE;
123     }
124     if(ucnv_compareNames("BOCU-1",cp)==0){
125         return TRUE;
126     }
127     if(ucnv_compareNames("UTF-7",cp)==0){
128         return TRUE;
129     }
130     return FALSE;
131 }
132 
133 U_CAPI FileStream * U_EXPORT2
ucbuf_autodetect(const char * fileName,const char ** cp,UConverter ** conv,int32_t * signatureLength,UErrorCode * error)134 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
135     FileStream* in=NULL;
136     if(error==NULL || U_FAILURE(*error)){
137         return NULL;
138     }
139     if(conv==NULL || cp==NULL || fileName==NULL){
140         *error = U_ILLEGAL_ARGUMENT_ERROR;
141         return NULL;
142     }
143     /* open the file */
144     in= T_FileStream_open(fileName,"rb");
145 
146     if(in == NULL){
147         *error=U_FILE_ACCESS_ERROR;
148         return NULL;
149     }
150 
151     if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
152         return in;
153     } else {
154         ucnv_close(*conv);
155         *conv=NULL;
156         T_FileStream_close(in);
157         return NULL;
158     }
159 }
160 
161 /* fill the uchar buffer */
162 static UCHARBUF*
ucbuf_fillucbuf(UCHARBUF * buf,UErrorCode * error)163 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
164     UChar* pTarget=NULL;
165     UChar* target=NULL;
166     const char* source=NULL;
167     char  carr[MAX_IN_BUF] = {'\0'};
168     char* cbuf =  carr;
169     int32_t inputRead=0;
170     int32_t outputWritten=0;
171     int32_t offset=0;
172     const char* sourceLimit =NULL;
173     int32_t cbufSize=0;
174     pTarget = buf->buffer;
175     /* check if we arrived here without exhausting the buffer*/
176     if(buf->currentPos<buf->bufLimit){
177         offset = (int32_t)(buf->bufLimit-buf->currentPos);
178         memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
179     }
180 
181 #ifdef UCBUF_DEBUG
182     memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
183 #endif
184     if(buf->isBuffered){
185         cbufSize = MAX_IN_BUF;
186         /* read the file */
187         inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
188         buf->remaining-=inputRead;
189 
190     }else{
191         cbufSize = T_FileStream_size(buf->in);
192         cbuf = (char*)uprv_malloc(cbufSize);
193         if (cbuf == NULL) {
194         	*error = U_MEMORY_ALLOCATION_ERROR;
195         	return NULL;
196         }
197         inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
198         buf->remaining-=inputRead;
199     }
200 
201     /* just to be sure...*/
202     if ( 0 == inputRead )
203        buf->remaining = 0;
204 
205     target=pTarget;
206     /* convert the bytes */
207     if(buf->conv){
208         /* set the callback to stop */
209         UConverterToUCallback toUOldAction ;
210         void* toUOldContext;
211         void* toUNewContext=NULL;
212         ucnv_setToUCallBack(buf->conv,
213            UCNV_TO_U_CALLBACK_STOP,
214            toUNewContext,
215            &toUOldAction,
216            (const void**)&toUOldContext,
217            error);
218         /* since state is saved in the converter we add offset to source*/
219         target = pTarget+offset;
220         source = cbuf;
221         sourceLimit = source + inputRead;
222         ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
223                         &source,sourceLimit,NULL,
224                         (UBool)(buf->remaining==0),error);
225 
226         if(U_FAILURE(*error)){
227             char context[CONTEXT_LEN+1];
228             char preContext[CONTEXT_LEN+1];
229             char postContext[CONTEXT_LEN+1];
230             int8_t len = CONTEXT_LEN;
231             int32_t start=0;
232             int32_t stop =0;
233             int32_t pos =0;
234             /* use erro1 to preserve the error code */
235             UErrorCode error1 =U_ZERO_ERROR;
236 
237             if( buf->showWarning==TRUE){
238                 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
239                                " converting input stream to target encoding: %s\n",
240                                u_errorName(*error));
241             }
242 
243 
244             /* now get the context chars */
245             ucnv_getInvalidChars(buf->conv,context,&len,&error1);
246             context[len]= 0 ; /* null terminate the buffer */
247 
248             pos = (int32_t)(source - cbuf - len);
249 
250             /* for pre-context */
251             start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
252             stop  = pos-len;
253 
254             memcpy(preContext,cbuf+start,stop-start);
255             /* null terminate the buffer */
256             preContext[stop-start] = 0;
257 
258             /* for post-context */
259             start = pos+len;
260             stop  = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
261 
262             memcpy(postContext,source,stop-start);
263             /* null terminate the buffer */
264             postContext[stop-start] = 0;
265 
266             if(buf->showWarning ==TRUE){
267                 /* print out the context */
268                 fprintf(stderr,"\tPre-context: %s\n",preContext);
269                 fprintf(stderr,"\tContext: %s\n",context);
270                 fprintf(stderr,"\tPost-context: %s\n", postContext);
271             }
272 
273             /* reset the converter */
274             ucnv_reset(buf->conv);
275 
276             /* set the call back to substitute
277              * and restart conversion
278              */
279             ucnv_setToUCallBack(buf->conv,
280                UCNV_TO_U_CALLBACK_SUBSTITUTE,
281                toUNewContext,
282                &toUOldAction,
283                (const void**)&toUOldContext,
284                &error1);
285 
286             /* reset source and target start positions */
287             target = pTarget+offset;
288             source = cbuf;
289 
290             /* re convert */
291             ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
292                             &source,sourceLimit,NULL,
293                             (UBool)(buf->remaining==0),&error1);
294 
295         }
296         outputWritten = (int32_t)(target - pTarget);
297 
298 #ifdef UCBUF_DEBUG
299         {
300             int i;
301             target = pTarget;
302             for(i=0;i<numRead;i++){
303               /*  printf("%c", (char)(*target++));*/
304             }
305         }
306 #endif
307 
308     }else{
309         u_charsToUChars(cbuf,target+offset,inputRead);
310         outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
311     }
312     buf->currentPos = pTarget;
313     buf->bufLimit=pTarget+outputWritten;
314     *buf->bufLimit=0; /*NUL terminate*/
315     if(cbuf!=carr){
316         uprv_free(cbuf);
317     }
318     return buf;
319 }
320 
321 
322 
323 /* get a UChar from the stream*/
324 U_CAPI int32_t U_EXPORT2
ucbuf_getc(UCHARBUF * buf,UErrorCode * error)325 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
326     if(error==NULL || U_FAILURE(*error)){
327         return FALSE;
328     }
329     if(buf->currentPos>=buf->bufLimit){
330         if(buf->remaining==0){
331             return U_EOF;
332         }
333         buf=ucbuf_fillucbuf(buf,error);
334         if(U_FAILURE(*error)){
335             return U_EOF;
336         }
337     }
338 
339     return *(buf->currentPos++);
340 }
341 
342 /* get a UChar32 from the stream*/
343 U_CAPI int32_t U_EXPORT2
ucbuf_getc32(UCHARBUF * buf,UErrorCode * error)344 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
345     int32_t retVal = (int32_t)U_EOF;
346     if(error==NULL || U_FAILURE(*error)){
347         return FALSE;
348     }
349     if(buf->currentPos+1>=buf->bufLimit){
350         if(buf->remaining==0){
351             return U_EOF;
352         }
353         buf=ucbuf_fillucbuf(buf,error);
354         if(U_FAILURE(*error)){
355             return U_EOF;
356         }
357     }
358     if(U16_IS_LEAD(*(buf->currentPos))){
359         retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]);
360         buf->currentPos+=2;
361     }else{
362         retVal = *(buf->currentPos++);
363     }
364     return retVal;
365 }
366 
367 /* u_unescapeAt() callback to return a UChar*/
368 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)369 _charAt(int32_t offset, void *context) {
370     return ((UCHARBUF*) context)->currentPos[offset];
371 }
372 
373 /* getc and escape it */
374 U_CAPI int32_t U_EXPORT2
ucbuf_getcx32(UCHARBUF * buf,UErrorCode * error)375 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
376     int32_t length;
377     int32_t offset;
378     UChar32 c32,c1,c2;
379     if(error==NULL || U_FAILURE(*error)){
380         return FALSE;
381     }
382     /* Fill the buffer if it is empty */
383     if (buf->currentPos >=buf->bufLimit-2) {
384         ucbuf_fillucbuf(buf,error);
385     }
386 
387     /* Get the next character in the buffer */
388     if (buf->currentPos < buf->bufLimit) {
389         c1 = *(buf->currentPos)++;
390     } else {
391         c1 = U_EOF;
392     }
393 
394     c2 = *(buf->currentPos);
395 
396     /* If it isn't a backslash, return it */
397     if (c1 != 0x005C) {
398         return c1;
399     }
400 
401     /* Determine the amount of data in the buffer */
402     length = (int32_t)(buf->bufLimit - buf->currentPos);
403 
404     /* The longest escape sequence is \Uhhhhhhhh; make sure
405        we have at least that many characters */
406     if (length < 10) {
407 
408         /* fill the buffer */
409         ucbuf_fillucbuf(buf,error);
410         length = (int32_t)(buf->bufLimit - buf->buffer);
411     }
412 
413     /* Process the escape */
414     offset = 0;
415     c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
416 
417     /* check if u_unescapeAt unescaped and converted
418      * to c32 or not
419      */
420     if(c32==(UChar32)0xFFFFFFFF){
421         if(buf->showWarning) {
422             char context[CONTEXT_LEN+1];
423             int32_t len = CONTEXT_LEN;
424             if(length < len) {
425                 len = length;
426             }
427             context[len]= 0 ; /* null terminate the buffer */
428             u_UCharsToChars( buf->currentPos, context, len);
429             fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
430         }
431         *error= U_ILLEGAL_ESCAPE_SEQUENCE;
432         return c1;
433     }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
434         /* Update the current buffer position */
435         buf->currentPos += offset;
436     }else{
437         /* unescaping failed so we just return
438          * c1 and not consume the buffer
439          * this is useful for rules with escapes
440          * in resource bundles
441          * eg: \' \\ \"
442          */
443         return c1;
444     }
445 
446     return c32;
447 }
448 
449 U_CAPI UCHARBUF* U_EXPORT2
ucbuf_open(const char * fileName,const char ** cp,UBool showWarning,UBool buffered,UErrorCode * error)450 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
451 
452     FileStream* in = NULL;
453     int32_t fileSize=0;
454     const char* knownCp;
455     if(error==NULL || U_FAILURE(*error)){
456         return NULL;
457     }
458     if(cp==NULL || fileName==NULL){
459         *error = U_ILLEGAL_ARGUMENT_ERROR;
460         return FALSE;
461     }
462     if (!uprv_strcmp(fileName, "-")) {
463         in = T_FileStream_stdin();
464     }else{
465         in = T_FileStream_open(fileName, "rb");
466     }
467 
468     if(in!=NULL){
469         UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
470         fileSize = T_FileStream_size(in);
471         if(buf == NULL){
472             *error = U_MEMORY_ALLOCATION_ERROR;
473             T_FileStream_close(in);
474             return NULL;
475         }
476         buf->in=in;
477         buf->conv=NULL;
478         buf->showWarning = showWarning;
479         buf->isBuffered = buffered;
480         buf->signatureLength=0;
481         if(*cp==NULL || **cp=='\0'){
482             /* don't have code page name... try to autodetect */
483             ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
484         }else if(ucbuf_isCPKnown(*cp)){
485             /* discard BOM */
486             ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
487         }
488         if(U_SUCCESS(*error) && buf->conv==NULL) {
489             buf->conv=ucnv_open(*cp,error);
490         }
491         if(U_FAILURE(*error)){
492             ucnv_close(buf->conv);
493             uprv_free(buf);
494             T_FileStream_close(in);
495             return NULL;
496         }
497 
498         if((buf->conv==NULL) && (buf->showWarning==TRUE)){
499             fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
500         }
501         buf->remaining=fileSize-buf->signatureLength;
502         if(buf->isBuffered){
503             buf->bufCapacity=MAX_U_BUF;
504         }else{
505             buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;
506         }
507         buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
508         if (buf->buffer == NULL) {
509             *error = U_MEMORY_ALLOCATION_ERROR;
510             ucbuf_close(buf);
511             return NULL;
512         }
513         buf->currentPos=buf->buffer;
514         buf->bufLimit=buf->buffer;
515         if(U_FAILURE(*error)){
516             fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
517             ucbuf_close(buf);
518             return NULL;
519         }
520         ucbuf_fillucbuf(buf,error);
521         if(U_FAILURE(*error)){
522             ucbuf_close(buf);
523             return NULL;
524         }
525         return buf;
526     }
527     *error =U_FILE_ACCESS_ERROR;
528     return NULL;
529 }
530 
531 
532 
533 /* TODO: this method will fail if at the
534  * beginning of buffer and the uchar to unget
535  * is from the previous buffer. Need to implement
536  * system to take care of that situation.
537  */
538 U_CAPI void U_EXPORT2
ucbuf_ungetc(int32_t c,UCHARBUF * buf)539 ucbuf_ungetc(int32_t c,UCHARBUF* buf){
540     /* decrement currentPos pointer
541      * if not at the beginning of buffer
542      */
543     if(buf->currentPos!=buf->buffer){
544         if(*(buf->currentPos-1)==c){
545             buf->currentPos--;
546         } else {
547             /* ungetc failed - did not match. */
548         }
549     } else {
550        /* ungetc failed - beginning of buffer. */
551     }
552 }
553 
554 /* frees the resources of UChar* buffer */
555 static void
ucbuf_closebuf(UCHARBUF * buf)556 ucbuf_closebuf(UCHARBUF* buf){
557     uprv_free(buf->buffer);
558     buf->buffer = NULL;
559 }
560 
561 /* close the buf and release resources*/
562 U_CAPI void U_EXPORT2
ucbuf_close(UCHARBUF * buf)563 ucbuf_close(UCHARBUF* buf){
564     if(buf!=NULL){
565         if(buf->conv){
566             ucnv_close(buf->conv);
567         }
568         T_FileStream_close(buf->in);
569         ucbuf_closebuf(buf);
570         uprv_free(buf);
571     }
572 }
573 
574 /* rewind the buf and file stream */
575 U_CAPI void U_EXPORT2
ucbuf_rewind(UCHARBUF * buf,UErrorCode * error)576 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
577     if(error==NULL || U_FAILURE(*error)){
578         return;
579     }
580     if(buf){
581         buf->currentPos=buf->buffer;
582         buf->bufLimit=buf->buffer;
583         T_FileStream_rewind(buf->in);
584         buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
585 
586         ucnv_resetToUnicode(buf->conv);
587         if(buf->signatureLength>0) {
588             UChar target[1]={ 0 };
589             UChar* pTarget;
590             char start[8];
591             const char* pStart;
592             int32_t numRead;
593 
594             /* read the signature bytes */
595             numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
596 
597             /* convert and ignore initial U+FEFF, and the buffer overflow */
598             pTarget = target;
599             pStart = start;
600             ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
601             if(*error==U_BUFFER_OVERFLOW_ERROR) {
602                 *error=U_ZERO_ERROR;
603             }
604 
605             /* verify that we successfully read exactly U+FEFF */
606             if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
607                 *error=U_INTERNAL_PROGRAM_ERROR;
608             }
609         }
610     }
611 }
612 
613 
614 U_CAPI int32_t U_EXPORT2
ucbuf_size(UCHARBUF * buf)615 ucbuf_size(UCHARBUF* buf){
616     if(buf){
617         if(buf->isBuffered){
618             return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
619         }else{
620             return (int32_t)(buf->bufLimit - buf->buffer);
621         }
622     }
623     return 0;
624 }
625 
626 U_CAPI const UChar* U_EXPORT2
ucbuf_getBuffer(UCHARBUF * buf,int32_t * len,UErrorCode * error)627 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
628     if(error==NULL || U_FAILURE(*error)){
629         return NULL;
630     }
631     if(buf==NULL || len==NULL){
632         *error = U_ILLEGAL_ARGUMENT_ERROR;
633         return NULL;
634     }
635     *len = (int32_t)(buf->bufLimit - buf->buffer);
636     return buf->buffer;
637 }
638 
639 U_CAPI const char* U_EXPORT2
ucbuf_resolveFileName(const char * inputDir,const char * fileName,char * target,int32_t * len,UErrorCode * status)640 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
641     int32_t requiredLen = 0;
642     int32_t dirlen =  0;
643     int32_t filelen = 0;
644     if(status==NULL || U_FAILURE(*status)){
645         return NULL;
646     }
647 
648     if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
649         *status = U_ILLEGAL_ARGUMENT_ERROR;
650         return NULL;
651     }
652 
653 
654     dirlen  = (int32_t)uprv_strlen(inputDir);
655     filelen = (int32_t)uprv_strlen(fileName);
656     if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
657         requiredLen = dirlen + filelen + 2;
658         if((*len < requiredLen) || target==NULL){
659             *len = requiredLen;
660             *status = U_BUFFER_OVERFLOW_ERROR;
661             return NULL;
662         }
663 
664         target[0] = '\0';
665         /*
666          * append the input dir to openFileName if the first char in
667          * filename is not file separation char and the last char input directory is  not '.'.
668          * This is to support :
669          * genrb -s. /home/icu/data
670          * genrb -s. icu/data
671          * The user cannot mix notations like
672          * genrb -s. /icu/data --- the absolute path specified. -s redundant
673          * user should use
674          * genrb -s. icu/data  --- start from CWD and look in icu/data dir
675          */
676         if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
677             uprv_strcpy(target, inputDir);
678             target[dirlen]     = U_FILE_SEP_CHAR;
679         }
680         target[dirlen + 1] = '\0';
681     } else {
682         requiredLen = dirlen + filelen + 1;
683         if((*len < requiredLen) || target==NULL){
684             *len = requiredLen;
685             *status = U_BUFFER_OVERFLOW_ERROR;
686             return NULL;
687         }
688 
689         uprv_strcpy(target, inputDir);
690     }
691 
692     uprv_strcat(target, fileName);
693     return target;
694 }
695 /*
696  * Unicode TR 13 says any of the below chars is
697  * a new line char in a readline function in addition
698  * to CR+LF combination which needs to be
699  * handled separately
700  */
ucbuf_isCharNewLine(UChar c)701 static UBool ucbuf_isCharNewLine(UChar c){
702     switch(c){
703     case 0x000A: /* LF  */
704     case 0x000D: /* CR  */
705     case 0x000C: /* FF  */
706     case 0x0085: /* NEL */
707     case 0x2028: /* LS  */
708     case 0x2029: /* PS  */
709         return TRUE;
710     default:
711         return FALSE;
712     }
713 }
714 
715 U_CAPI const UChar* U_EXPORT2
ucbuf_readline(UCHARBUF * buf,int32_t * len,UErrorCode * err)716 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
717     UChar* temp = buf->currentPos;
718     UChar* savePos =NULL;
719     UChar c=0x0000;
720     if(buf->isBuffered){
721         /* The input is buffered we have to do more
722         * for returning a pointer U_TRUNCATED_CHAR_FOUND
723         */
724         for(;;){
725             c = *temp++;
726             if(buf->remaining==0){
727                 return NULL; /* end of file is reached return NULL */
728             }
729             if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
730                 *err= U_TRUNCATED_CHAR_FOUND;
731                 return NULL;
732             }else{
733                 ucbuf_fillucbuf(buf,err);
734                 if(U_FAILURE(*err)){
735                     return NULL;
736                 }
737             }
738             /*
739              * According to TR 13 readLine functions must interpret
740              * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
741              */
742             /* Windows CR LF */
743             if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){
744                 *len = (int32_t)(temp++ - buf->currentPos);
745                 savePos = buf->currentPos;
746                 buf->currentPos = temp;
747                 return savePos;
748             }
749             /* else */
750 
751             if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){  /* Unipad inserts 2028 line separators! */
752                 *len = (int32_t)(temp - buf->currentPos);
753                 savePos = buf->currentPos;
754                 buf->currentPos = temp;
755                 return savePos;
756             }
757         }
758     }else{
759     /* we know that all input is read into the internal
760     * buffer so we can safely return pointers
761         */
762         for(;;){
763             c = *temp++;
764 
765             if(buf->currentPos==buf->bufLimit){
766                 return NULL; /* end of file is reached return NULL */
767             }
768             /* Windows CR LF */
769             if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){
770                 *len = (int32_t)(temp++ - buf->currentPos);
771                 savePos = buf->currentPos;
772                 buf->currentPos = temp;
773                 return savePos;
774             }
775             /* else */
776             if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) {  /* Unipad inserts 2028 line separators! */
777                 *len = (int32_t)(temp - buf->currentPos);
778                 savePos = buf->currentPos;
779                 buf->currentPos = temp;
780                 return savePos;
781             }
782         }
783     }
784     /* not reached */
785     /* A compiler warning will appear if all paths don't contain a return statement. */
786 /*    return NULL;*/
787 }
788 #endif
789