1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2001-2012, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  ustr_wcs.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2004sep07
14 *   created by: Markus W. Scherer
15 *
16 *   u_strToWCS() and u_strFromWCS() functions
17 *   moved here from ustrtrns.c for better modularization.
18 */
19 
20 #include "unicode/utypes.h"
21 #include "unicode/ustring.h"
22 #include "cstring.h"
23 #include "cwchar.h"
24 #include "cmemory.h"
25 #include "ustr_imp.h"
26 #include "ustr_cnv.h"
27 
28 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
29 
30 #define _STACK_BUFFER_CAPACITY 1000
31 #define _BUFFER_CAPACITY_MULTIPLIER 2
32 
33 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
34 // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
35 // Then we could change this to work only with wchar_t buffers.
36 static inline UBool
u_growAnyBufferFromStatic(void * context,void ** pBuffer,int32_t * pCapacity,int32_t reqCapacity,int32_t length,int32_t size)37 u_growAnyBufferFromStatic(void *context,
38                        void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
39                        int32_t length, int32_t size) {
40     // Use char* not void* to avoid the compiler's strict-aliasing assumptions
41     // and related warnings.
42     char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
43     if(newBuffer!=NULL) {
44         if(length>0) {
45             uprv_memcpy(newBuffer, *pBuffer, length*size);
46         }
47         *pCapacity=reqCapacity;
48     } else {
49         *pCapacity=0;
50     }
51 
52     /* release the old pBuffer if it was not statically allocated */
53     if(*pBuffer!=(char *)context) {
54         uprv_free(*pBuffer);
55     }
56 
57     *pBuffer=newBuffer;
58     return (UBool)(newBuffer!=NULL);
59 }
60 
61 /* helper function */
62 static wchar_t*
_strToWCS(wchar_t * dest,int32_t destCapacity,int32_t * pDestLength,const UChar * src,int32_t srcLength,UErrorCode * pErrorCode)63 _strToWCS(wchar_t *dest,
64            int32_t destCapacity,
65            int32_t *pDestLength,
66            const UChar *src,
67            int32_t srcLength,
68            UErrorCode *pErrorCode){
69 
70     char stackBuffer [_STACK_BUFFER_CAPACITY];
71     char* tempBuf = stackBuffer;
72     int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
73     char* tempBufLimit = stackBuffer + tempBufCapacity;
74     UConverter* conv = NULL;
75     char* saveBuf = tempBuf;
76     wchar_t* intTarget=NULL;
77     int32_t intTargetCapacity=0;
78     int count=0,retVal=0;
79 
80     const UChar *pSrcLimit =NULL;
81     const UChar *pSrc = src;
82 
83     conv = u_getDefaultConverter(pErrorCode);
84 
85     if(U_FAILURE(*pErrorCode)){
86         return NULL;
87     }
88 
89     if(srcLength == -1){
90         srcLength = u_strlen(pSrc);
91     }
92 
93     pSrcLimit = pSrc + srcLength;
94 
95     for(;;) {
96         /* reset the error state */
97         *pErrorCode = U_ZERO_ERROR;
98 
99         /* convert to chars using default converter */
100         ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
101         count =(tempBuf - saveBuf);
102 
103         /* This should rarely occur */
104         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
105             tempBuf = saveBuf;
106 
107             /* we dont have enough room on the stack grow the buffer */
108             int32_t newCapacity = 2 * srcLength;
109             if(newCapacity <= tempBufCapacity) {
110                 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
111             }
112             if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
113                     newCapacity, count, 1)) {
114                 goto cleanup;
115             }
116 
117            saveBuf = tempBuf;
118            tempBufLimit = tempBuf + tempBufCapacity;
119            tempBuf = tempBuf + count;
120 
121         } else {
122             break;
123         }
124     }
125 
126     if(U_FAILURE(*pErrorCode)){
127         goto cleanup;
128     }
129 
130     /* done with conversion null terminate the char buffer */
131     if(count>=tempBufCapacity){
132         tempBuf = saveBuf;
133         /* we dont have enough room on the stack grow the buffer */
134         if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
135                 count+1, count, 1)) {
136             goto cleanup;
137         }
138        saveBuf = tempBuf;
139     }
140 
141     saveBuf[count]=0;
142 
143 
144     /* allocate more space than required
145      * here we assume that every char requires
146      * no more than 2 wchar_ts
147      */
148     intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
149     intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
150 
151     if(intTarget){
152 
153         int32_t nulLen = 0;
154         int32_t remaining = intTargetCapacity;
155         wchar_t* pIntTarget=intTarget;
156         tempBuf = saveBuf;
157 
158         /* now convert the mbs to wcs */
159         for(;;){
160 
161             /* we can call the system API since we are sure that
162              * there is atleast 1 null in the input
163              */
164             retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
165 
166             if(retVal==-1){
167                 *pErrorCode = U_INVALID_CHAR_FOUND;
168                 break;
169             }else if(retVal== remaining){/* should never occur */
170                 int numWritten = (pIntTarget-intTarget);
171                 u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
172                                           &intTargetCapacity,
173                                           intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
174                                           numWritten,
175                                           sizeof(wchar_t));
176                 pIntTarget = intTarget;
177                 remaining=intTargetCapacity;
178 
179                 if(nulLen!=count){ /*there are embedded nulls*/
180                     pIntTarget+=numWritten;
181                     remaining-=numWritten;
182                 }
183 
184             }else{
185                 int32_t nulVal;
186                 /*scan for nulls */
187                 /* we donot check for limit since tempBuf is null terminated */
188                 while(tempBuf[nulLen++] != 0){
189                 }
190                 nulVal = (nulLen < srcLength) ? 1 : 0;
191                 pIntTarget = pIntTarget + retVal+nulVal;
192                 remaining -=(retVal+nulVal);
193 
194                 /* check if we have reached the source limit*/
195                 if(nulLen>=(count)){
196                     break;
197                 }
198             }
199         }
200         count = (int32_t)(pIntTarget-intTarget);
201 
202         if(0 < count && count <= destCapacity){
203             uprv_memcpy(dest,intTarget,count*sizeof(wchar_t));
204         }
205 
206         if(pDestLength){
207             *pDestLength = count;
208         }
209 
210         /* free the allocated memory */
211         uprv_free(intTarget);
212 
213     }else{
214         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
215     }
216 cleanup:
217     /* are we still using stack buffer */
218     if(stackBuffer != saveBuf){
219         uprv_free(saveBuf);
220     }
221     u_terminateWChars(dest,destCapacity,count,pErrorCode);
222 
223     u_releaseDefaultConverter(conv);
224 
225     return dest;
226 }
227 #endif
228 
229 U_CAPI wchar_t* U_EXPORT2
u_strToWCS(wchar_t * dest,int32_t destCapacity,int32_t * pDestLength,const UChar * src,int32_t srcLength,UErrorCode * pErrorCode)230 u_strToWCS(wchar_t *dest,
231            int32_t destCapacity,
232            int32_t *pDestLength,
233            const UChar *src,
234            int32_t srcLength,
235            UErrorCode *pErrorCode){
236 
237     /* args check */
238     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
239         return NULL;
240     }
241 
242     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
243         (destCapacity<0) || (dest == NULL && destCapacity > 0)
244     ) {
245         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
246         return NULL;
247     }
248 
249 #ifdef U_WCHAR_IS_UTF16
250     /* wchar_t is UTF-16 just do a memcpy */
251     if(srcLength == -1){
252         srcLength = u_strlen(src);
253     }
254     if(0 < srcLength && srcLength <= destCapacity){
255         uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
256     }
257     if(pDestLength){
258        *pDestLength = srcLength;
259     }
260 
261     u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
262 
263     return dest;
264 
265 #elif defined U_WCHAR_IS_UTF32
266 
267     return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
268                                   src, srcLength, pErrorCode);
269 
270 #else
271 
272     return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
273 
274 #endif
275 
276 }
277 
278 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
279 /* helper function */
280 static UChar*
_strFromWCS(UChar * dest,int32_t destCapacity,int32_t * pDestLength,const wchar_t * src,int32_t srcLength,UErrorCode * pErrorCode)281 _strFromWCS( UChar   *dest,
282              int32_t destCapacity,
283              int32_t *pDestLength,
284              const wchar_t *src,
285              int32_t srcLength,
286              UErrorCode *pErrorCode)
287 {
288     int32_t retVal =0, count =0 ;
289     UConverter* conv = NULL;
290     UChar* pTarget = NULL;
291     UChar* pTargetLimit = NULL;
292     UChar* target = NULL;
293 
294     UChar uStack [_STACK_BUFFER_CAPACITY];
295 
296     wchar_t wStack[_STACK_BUFFER_CAPACITY];
297     wchar_t* pWStack = wStack;
298 
299 
300     char cStack[_STACK_BUFFER_CAPACITY];
301     int32_t cStackCap = _STACK_BUFFER_CAPACITY;
302     char* pCSrc=cStack;
303     char* pCSave=pCSrc;
304     char* pCSrcLimit=NULL;
305 
306     const wchar_t* pSrc = src;
307     const wchar_t* pSrcLimit = NULL;
308 
309     if(srcLength ==-1){
310         /* if the wchar_t source is null terminated we can safely
311          * assume that there are no embedded nulls, this is a fast
312          * path for null terminated strings.
313          */
314         for(;;){
315             /* convert wchars  to chars */
316             retVal = uprv_wcstombs(pCSrc,src, cStackCap);
317 
318             if(retVal == -1){
319                 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
320                 goto cleanup;
321             }else if(retVal >= (cStackCap-1)){
322                 /* Should rarely occur */
323                 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
324                     cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
325                 pCSave = pCSrc;
326             }else{
327                 /* converted every thing */
328                 pCSrc = pCSrc+retVal;
329                 break;
330             }
331         }
332 
333     }else{
334         /* here the source is not null terminated
335          * so it may have nulls embeded and we need to
336          * do some extra processing
337          */
338         int32_t remaining =cStackCap;
339 
340         pSrcLimit = src + srcLength;
341 
342         for(;;){
343             register int32_t nulLen = 0;
344 
345             /* find nulls in the string */
346             while(nulLen<srcLength && pSrc[nulLen++]!=0){
347             }
348 
349             if((pSrc+nulLen) < pSrcLimit){
350                 /* check if we have enough room in pCSrc */
351                 if(remaining < (nulLen * MB_CUR_MAX)){
352                     /* should rarely occur */
353                     int32_t len = (pCSrc-pCSave);
354                     pCSrc = pCSave;
355                     /* we do not have enough room so grow the buffer*/
356                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
357                            _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
358 
359                     pCSave = pCSrc;
360                     pCSrc = pCSave+len;
361                     remaining = cStackCap-(pCSrc - pCSave);
362                 }
363 
364                 /* we have found a null  so convert the
365                  * chunk from begining of non-null char to null
366                  */
367                 retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
368 
369                 if(retVal==-1){
370                     /* an error occurred bail out */
371                     *pErrorCode = U_ILLEGAL_CHAR_FOUND;
372                     goto cleanup;
373                 }
374 
375                 pCSrc += retVal+1 /* already null terminated */;
376 
377                 pSrc += nulLen; /* skip past the null */
378                 srcLength-=nulLen; /* decrement the srcLength */
379                 remaining -= (pCSrc-pCSave);
380 
381 
382             }else{
383                 /* the source is not null terminated and we are
384                  * end of source so we copy the source to a temp buffer
385                  * null terminate it and convert wchar_ts to chars
386                  */
387                 if(nulLen >= _STACK_BUFFER_CAPACITY){
388                     /* Should rarely occcur */
389                     /* allocate new buffer buffer */
390                     pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
391                     if(pWStack==NULL){
392                         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
393                         goto cleanup;
394                     }
395                 }
396                 if(nulLen>0){
397                     /* copy the contents to tempStack */
398                     uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t));
399                 }
400 
401                 /* null terminate the tempBuffer */
402                 pWStack[nulLen] =0 ;
403 
404                 if(remaining < (nulLen * MB_CUR_MAX)){
405                     /* Should rarely occur */
406                     int32_t len = (pCSrc-pCSave);
407                     pCSrc = pCSave;
408                     /* we do not have enough room so grow the buffer*/
409                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
410                            cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
411 
412                     pCSave = pCSrc;
413                     pCSrc = pCSave+len;
414                     remaining = cStackCap-(pCSrc - pCSave);
415                 }
416                 /* convert to chars */
417                 retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
418 
419                 pCSrc += retVal;
420                 pSrc  += nulLen;
421                 srcLength-=nulLen; /* decrement the srcLength */
422                 break;
423             }
424         }
425     }
426 
427     /* OK..now we have converted from wchar_ts to chars now
428      * convert chars to UChars
429      */
430     pCSrcLimit = pCSrc;
431     pCSrc = pCSave;
432     pTarget = target= dest;
433     pTargetLimit = dest + destCapacity;
434 
435     conv= u_getDefaultConverter(pErrorCode);
436 
437     if(U_FAILURE(*pErrorCode)|| conv==NULL){
438         goto cleanup;
439     }
440 
441     for(;;) {
442 
443         *pErrorCode = U_ZERO_ERROR;
444 
445         /* convert to stack buffer*/
446         ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
447 
448         /* increment count to number written to stack */
449         count+= pTarget - target;
450 
451         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
452             target = uStack;
453             pTarget = uStack;
454             pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
455         } else {
456             break;
457         }
458 
459     }
460 
461     if(pDestLength){
462         *pDestLength =count;
463     }
464 
465     u_terminateUChars(dest,destCapacity,count,pErrorCode);
466 
467 cleanup:
468 
469     if(cStack != pCSave){
470         uprv_free(pCSave);
471     }
472 
473     if(wStack != pWStack){
474         uprv_free(pWStack);
475     }
476 
477     u_releaseDefaultConverter(conv);
478 
479     return dest;
480 }
481 #endif
482 
483 U_CAPI UChar* U_EXPORT2
u_strFromWCS(UChar * dest,int32_t destCapacity,int32_t * pDestLength,const wchar_t * src,int32_t srcLength,UErrorCode * pErrorCode)484 u_strFromWCS(UChar   *dest,
485              int32_t destCapacity,
486              int32_t *pDestLength,
487              const wchar_t *src,
488              int32_t srcLength,
489              UErrorCode *pErrorCode)
490 {
491 
492     /* args check */
493     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
494         return NULL;
495     }
496 
497     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
498         (destCapacity<0) || (dest == NULL && destCapacity > 0)
499     ) {
500         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
501         return NULL;
502     }
503 
504 #ifdef U_WCHAR_IS_UTF16
505     /* wchar_t is UTF-16 just do a memcpy */
506     if(srcLength == -1){
507         srcLength = u_strlen(src);
508     }
509     if(0 < srcLength && srcLength <= destCapacity){
510         uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
511     }
512     if(pDestLength){
513        *pDestLength = srcLength;
514     }
515 
516     u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
517 
518     return dest;
519 
520 #elif defined U_WCHAR_IS_UTF32
521 
522     return u_strFromUTF32(dest, destCapacity, pDestLength,
523                           (UChar32*)src, srcLength, pErrorCode);
524 
525 #else
526 
527     return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
528 
529 #endif
530 
531 }
532 
533 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
534