1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2001-2012, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  ustr_wcs.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2004sep07
16 *   created by: Markus W. Scherer
17 *
18 *   u_strToWCS() and u_strFromWCS() functions
19 *   moved here from ustrtrns.c for better modularization.
20 */
21 
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
24 #include "cstring.h"
25 #include "cwchar.h"
26 #include "cmemory.h"
27 #include "ustr_imp.h"
28 #include "ustr_cnv.h"
29 
30 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
31 
32 #define _STACK_BUFFER_CAPACITY 1000
33 #define _BUFFER_CAPACITY_MULTIPLIER 2
34 
35 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
36 // TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
37 // Then we could change this to work only with wchar_t buffers.
38 static inline UBool
u_growAnyBufferFromStatic(void * context,void ** pBuffer,int32_t * pCapacity,int32_t reqCapacity,int32_t length,int32_t size)39 u_growAnyBufferFromStatic(void *context,
40                        void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
41                        int32_t length, int32_t size) {
42     // Use char* not void* to avoid the compiler's strict-aliasing assumptions
43     // and related warnings.
44     char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
45     if(newBuffer!=NULL) {
46         if(length>0) {
47             uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
48         }
49         *pCapacity=reqCapacity;
50     } else {
51         *pCapacity=0;
52     }
53 
54     /* release the old pBuffer if it was not statically allocated */
55     if(*pBuffer!=(char *)context) {
56         uprv_free(*pBuffer);
57     }
58 
59     *pBuffer=newBuffer;
60     return (UBool)(newBuffer!=NULL);
61 }
62 
63 /* helper function */
64 static wchar_t*
_strToWCS(wchar_t * dest,int32_t destCapacity,int32_t * pDestLength,const UChar * src,int32_t srcLength,UErrorCode * pErrorCode)65 _strToWCS(wchar_t *dest,
66            int32_t destCapacity,
67            int32_t *pDestLength,
68            const UChar *src,
69            int32_t srcLength,
70            UErrorCode *pErrorCode){
71 
72     char stackBuffer [_STACK_BUFFER_CAPACITY];
73     char* tempBuf = stackBuffer;
74     int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
75     char* tempBufLimit = stackBuffer + tempBufCapacity;
76     UConverter* conv = NULL;
77     char* saveBuf = tempBuf;
78     wchar_t* intTarget=NULL;
79     int32_t intTargetCapacity=0;
80     int count=0,retVal=0;
81 
82     const UChar *pSrcLimit =NULL;
83     const UChar *pSrc = src;
84 
85     conv = u_getDefaultConverter(pErrorCode);
86 
87     if(U_FAILURE(*pErrorCode)){
88         return NULL;
89     }
90 
91     if(srcLength == -1){
92         srcLength = u_strlen(pSrc);
93     }
94 
95     pSrcLimit = pSrc + srcLength;
96 
97     for(;;) {
98         /* reset the error state */
99         *pErrorCode = U_ZERO_ERROR;
100 
101         /* convert to chars using default converter */
102         ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
103         count =(tempBuf - saveBuf);
104 
105         /* This should rarely occur */
106         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
107             tempBuf = saveBuf;
108 
109             /* we dont have enough room on the stack grow the buffer */
110             int32_t newCapacity = 2 * srcLength;
111             if(newCapacity <= tempBufCapacity) {
112                 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
113             }
114             if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
115                     newCapacity, count, 1)) {
116                 goto cleanup;
117             }
118 
119            saveBuf = tempBuf;
120            tempBufLimit = tempBuf + tempBufCapacity;
121            tempBuf = tempBuf + count;
122 
123         } else {
124             break;
125         }
126     }
127 
128     if(U_FAILURE(*pErrorCode)){
129         goto cleanup;
130     }
131 
132     /* done with conversion null terminate the char buffer */
133     if(count>=tempBufCapacity){
134         tempBuf = saveBuf;
135         /* we dont have enough room on the stack grow the buffer */
136         if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
137                 count+1, count, 1)) {
138             goto cleanup;
139         }
140        saveBuf = tempBuf;
141     }
142 
143     saveBuf[count]=0;
144 
145 
146     /* allocate more space than required
147      * here we assume that every char requires
148      * no more than 2 wchar_ts
149      */
150     intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
151     intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
152 
153     if(intTarget){
154 
155         int32_t nulLen = 0;
156         int32_t remaining = intTargetCapacity;
157         wchar_t* pIntTarget=intTarget;
158         tempBuf = saveBuf;
159 
160         /* now convert the mbs to wcs */
161         for(;;){
162 
163             /* we can call the system API since we are sure that
164              * there is atleast 1 null in the input
165              */
166             retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
167 
168             if(retVal==-1){
169                 *pErrorCode = U_INVALID_CHAR_FOUND;
170                 break;
171             }else if(retVal== remaining){/* should never occur */
172                 int numWritten = (pIntTarget-intTarget);
173                 u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
174                                           &intTargetCapacity,
175                                           intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
176                                           numWritten,
177                                           sizeof(wchar_t));
178                 pIntTarget = intTarget;
179                 remaining=intTargetCapacity;
180 
181                 if(nulLen!=count){ /*there are embedded nulls*/
182                     pIntTarget+=numWritten;
183                     remaining-=numWritten;
184                 }
185 
186             }else{
187                 int32_t nulVal;
188                 /*scan for nulls */
189                 /* we donot check for limit since tempBuf is null terminated */
190                 while(tempBuf[nulLen++] != 0){
191                 }
192                 nulVal = (nulLen < srcLength) ? 1 : 0;
193                 pIntTarget = pIntTarget + retVal+nulVal;
194                 remaining -=(retVal+nulVal);
195 
196                 /* check if we have reached the source limit*/
197                 if(nulLen>=(count)){
198                     break;
199                 }
200             }
201         }
202         count = (int32_t)(pIntTarget-intTarget);
203 
204         if(0 < count && count <= destCapacity){
205             uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
206         }
207 
208         if(pDestLength){
209             *pDestLength = count;
210         }
211 
212         /* free the allocated memory */
213         uprv_free(intTarget);
214 
215     }else{
216         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
217     }
218 cleanup:
219     /* are we still using stack buffer */
220     if(stackBuffer != saveBuf){
221         uprv_free(saveBuf);
222     }
223     u_terminateWChars(dest,destCapacity,count,pErrorCode);
224 
225     u_releaseDefaultConverter(conv);
226 
227     return dest;
228 }
229 #endif
230 
231 U_CAPI wchar_t* U_EXPORT2
u_strToWCS(wchar_t * dest,int32_t destCapacity,int32_t * pDestLength,const UChar * src,int32_t srcLength,UErrorCode * pErrorCode)232 u_strToWCS(wchar_t *dest,
233            int32_t destCapacity,
234            int32_t *pDestLength,
235            const UChar *src,
236            int32_t srcLength,
237            UErrorCode *pErrorCode){
238 
239     /* args check */
240     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
241         return NULL;
242     }
243 
244     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
245         (destCapacity<0) || (dest == NULL && destCapacity > 0)
246     ) {
247         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
248         return NULL;
249     }
250 
251 #ifdef U_WCHAR_IS_UTF16
252     /* wchar_t is UTF-16 just do a memcpy */
253     if(srcLength == -1){
254         srcLength = u_strlen(src);
255     }
256     if(0 < srcLength && srcLength <= destCapacity){
257         u_memcpy((UChar *)dest, src, srcLength);
258     }
259     if(pDestLength){
260        *pDestLength = srcLength;
261     }
262 
263     u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode);
264 
265     return dest;
266 
267 #elif defined U_WCHAR_IS_UTF32
268 
269     return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
270                                   src, srcLength, pErrorCode);
271 
272 #else
273 
274     return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
275 
276 #endif
277 
278 }
279 
280 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
281 /* helper function */
282 static UChar*
_strFromWCS(UChar * dest,int32_t destCapacity,int32_t * pDestLength,const wchar_t * src,int32_t srcLength,UErrorCode * pErrorCode)283 _strFromWCS( UChar   *dest,
284              int32_t destCapacity,
285              int32_t *pDestLength,
286              const wchar_t *src,
287              int32_t srcLength,
288              UErrorCode *pErrorCode)
289 {
290     int32_t retVal =0, count =0 ;
291     UConverter* conv = NULL;
292     UChar* pTarget = NULL;
293     UChar* pTargetLimit = NULL;
294     UChar* target = NULL;
295 
296     UChar uStack [_STACK_BUFFER_CAPACITY];
297 
298     wchar_t wStack[_STACK_BUFFER_CAPACITY];
299     wchar_t* pWStack = wStack;
300 
301 
302     char cStack[_STACK_BUFFER_CAPACITY];
303     int32_t cStackCap = _STACK_BUFFER_CAPACITY;
304     char* pCSrc=cStack;
305     char* pCSave=pCSrc;
306     char* pCSrcLimit=NULL;
307 
308     const wchar_t* pSrc = src;
309     const wchar_t* pSrcLimit = NULL;
310 
311     if(srcLength ==-1){
312         /* if the wchar_t source is null terminated we can safely
313          * assume that there are no embedded nulls, this is a fast
314          * path for null terminated strings.
315          */
316         for(;;){
317             /* convert wchars  to chars */
318             retVal = uprv_wcstombs(pCSrc,src, cStackCap);
319 
320             if(retVal == -1){
321                 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
322                 goto cleanup;
323             }else if(retVal >= (cStackCap-1)){
324                 /* Should rarely occur */
325                 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
326                     cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
327                 pCSave = pCSrc;
328             }else{
329                 /* converted every thing */
330                 pCSrc = pCSrc+retVal;
331                 break;
332             }
333         }
334 
335     }else{
336         /* here the source is not null terminated
337          * so it may have nulls embeded and we need to
338          * do some extra processing
339          */
340         int32_t remaining =cStackCap;
341 
342         pSrcLimit = src + srcLength;
343 
344         for(;;){
345             int32_t nulLen = 0;
346 
347             /* find nulls in the string */
348             while(nulLen<srcLength && pSrc[nulLen++]!=0){
349             }
350 
351             if((pSrc+nulLen) < pSrcLimit){
352                 /* check if we have enough room in pCSrc */
353                 if(remaining < (nulLen * MB_CUR_MAX)){
354                     /* should rarely occur */
355                     int32_t len = (pCSrc-pCSave);
356                     pCSrc = pCSave;
357                     /* we do not have enough room so grow the buffer*/
358                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
359                            _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
360 
361                     pCSave = pCSrc;
362                     pCSrc = pCSave+len;
363                     remaining = cStackCap-(pCSrc - pCSave);
364                 }
365 
366                 /* we have found a null  so convert the
367                  * chunk from begining of non-null char to null
368                  */
369                 retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
370 
371                 if(retVal==-1){
372                     /* an error occurred bail out */
373                     *pErrorCode = U_ILLEGAL_CHAR_FOUND;
374                     goto cleanup;
375                 }
376 
377                 pCSrc += retVal+1 /* already null terminated */;
378 
379                 pSrc += nulLen; /* skip past the null */
380                 srcLength-=nulLen; /* decrement the srcLength */
381                 remaining -= (pCSrc-pCSave);
382 
383 
384             }else{
385                 /* the source is not null terminated and we are
386                  * end of source so we copy the source to a temp buffer
387                  * null terminate it and convert wchar_ts to chars
388                  */
389                 if(nulLen >= _STACK_BUFFER_CAPACITY){
390                     /* Should rarely occcur */
391                     /* allocate new buffer buffer */
392                     pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
393                     if(pWStack==NULL){
394                         *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
395                         goto cleanup;
396                     }
397                 }
398                 if(nulLen>0){
399                     /* copy the contents to tempStack */
400                     uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
401                 }
402 
403                 /* null terminate the tempBuffer */
404                 pWStack[nulLen] =0 ;
405 
406                 if(remaining < (nulLen * MB_CUR_MAX)){
407                     /* Should rarely occur */
408                     int32_t len = (pCSrc-pCSave);
409                     pCSrc = pCSave;
410                     /* we do not have enough room so grow the buffer*/
411                     u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
412                            cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
413 
414                     pCSave = pCSrc;
415                     pCSrc = pCSave+len;
416                     remaining = cStackCap-(pCSrc - pCSave);
417                 }
418                 /* convert to chars */
419                 retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
420 
421                 pCSrc += retVal;
422                 pSrc  += nulLen;
423                 srcLength-=nulLen; /* decrement the srcLength */
424                 break;
425             }
426         }
427     }
428 
429     /* OK..now we have converted from wchar_ts to chars now
430      * convert chars to UChars
431      */
432     pCSrcLimit = pCSrc;
433     pCSrc = pCSave;
434     pTarget = target= dest;
435     pTargetLimit = dest + destCapacity;
436 
437     conv= u_getDefaultConverter(pErrorCode);
438 
439     if(U_FAILURE(*pErrorCode)|| conv==NULL){
440         goto cleanup;
441     }
442 
443     for(;;) {
444 
445         *pErrorCode = U_ZERO_ERROR;
446 
447         /* convert to stack buffer*/
448         ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
449 
450         /* increment count to number written to stack */
451         count+= pTarget - target;
452 
453         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
454             target = uStack;
455             pTarget = uStack;
456             pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
457         } else {
458             break;
459         }
460 
461     }
462 
463     if(pDestLength){
464         *pDestLength =count;
465     }
466 
467     u_terminateUChars(dest,destCapacity,count,pErrorCode);
468 
469 cleanup:
470 
471     if(cStack != pCSave){
472         uprv_free(pCSave);
473     }
474 
475     if(wStack != pWStack){
476         uprv_free(pWStack);
477     }
478 
479     u_releaseDefaultConverter(conv);
480 
481     return dest;
482 }
483 #endif
484 
485 U_CAPI UChar* U_EXPORT2
u_strFromWCS(UChar * dest,int32_t destCapacity,int32_t * pDestLength,const wchar_t * src,int32_t srcLength,UErrorCode * pErrorCode)486 u_strFromWCS(UChar   *dest,
487              int32_t destCapacity,
488              int32_t *pDestLength,
489              const wchar_t *src,
490              int32_t srcLength,
491              UErrorCode *pErrorCode)
492 {
493 
494     /* args check */
495     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
496         return NULL;
497     }
498 
499     if( (src==NULL && srcLength!=0) || srcLength < -1 ||
500         (destCapacity<0) || (dest == NULL && destCapacity > 0)
501     ) {
502         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
503         return NULL;
504     }
505 
506 #ifdef U_WCHAR_IS_UTF16
507     /* wchar_t is UTF-16 just do a memcpy */
508     if(srcLength == -1){
509         srcLength = u_strlen((const UChar *)src);
510     }
511     if(0 < srcLength && srcLength <= destCapacity){
512         u_memcpy(dest, (const UChar *)src, srcLength);
513     }
514     if(pDestLength){
515        *pDestLength = srcLength;
516     }
517 
518     u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
519 
520     return dest;
521 
522 #elif defined U_WCHAR_IS_UTF32
523 
524     return u_strFromUTF32(dest, destCapacity, pDestLength,
525                           (UChar32*)src, srcLength, pErrorCode);
526 
527 #else
528 
529     return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
530 
531 #endif
532 
533 }
534 
535 #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
536