1 #include "rar.hpp"
2 #define MBFUNCTIONS
3
4 #if defined(_UNIX) && defined(MBFUNCTIONS)
5
6 static bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success);
7 static void CharToWideMap(const char *Src,wchar *Dest,size_t DestSize,bool &Success);
8
9 // In Unix we map high ASCII characters which cannot be converted to Unicode
10 // to 0xE000 - 0xE0FF private use Unicode area.
11 static const uint MapAreaStart=0xE000;
12
13 // Mapped string marker. Initially we used 0xFFFF for this purpose,
14 // but it causes MSVC2008 swprintf to fail (it treats 0xFFFF as error marker).
15 // While we could workaround it, it is safer to use another character.
16 static const uint MappedStringMark=0xFFFE;
17
18 #endif
19
WideToChar(const wchar * Src,char * Dest,size_t DestSize)20 bool WideToChar(const wchar *Src,char *Dest,size_t DestSize)
21 {
22 bool RetCode=true;
23 *Dest=0; // Set 'Dest' to zero just in case the conversion will fail.
24
25 #ifdef _WIN_ALL
26 if (WideCharToMultiByte(CP_ACP,0,Src,-1,Dest,(int)DestSize,NULL,NULL)==0)
27 RetCode=false;
28
29 // wcstombs is broken in Android NDK r9.
30 #elif defined(_APPLE) || defined(_ANDROID)
31 WideToUtf(Src,Dest,DestSize);
32
33 #elif defined(MBFUNCTIONS)
34 if (!WideToCharMap(Src,Dest,DestSize,RetCode))
35 {
36 mbstate_t ps; // Use thread safe external state based functions.
37 memset (&ps, 0, sizeof(ps));
38 const wchar *SrcParam=Src; // wcsrtombs can change the pointer.
39 size_t ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps);
40 if (ResultingSize==(size_t)-1)
41 RetCode=false;
42 if (ResultingSize==0 && *Src!=0)
43 RetCode=false;
44 }
45 #else
46 for (int I=0;I<DestSize;I++)
47 {
48 Dest[I]=(char)Src[I];
49 if (Src[I]==0)
50 break;
51 }
52 #endif
53 if (DestSize>0)
54 Dest[DestSize-1]=0;
55
56 // We tried to return the empty string if conversion is failed,
57 // but it does not work well. WideCharToMultiByte returns 'failed' code
58 // and partially converted string even if we wanted to convert only a part
59 // of string and passed DestSize smaller than required for fully converted
60 // string. Such call is the valid behavior in RAR code and we do not expect
61 // the empty string in this case.
62
63 return RetCode;
64 }
65
66
CharToWide(const char * Src,wchar * Dest,size_t DestSize)67 bool CharToWide(const char *Src,wchar *Dest,size_t DestSize)
68 {
69 bool RetCode=true;
70 *Dest=0; // Set 'Dest' to zero just in case the conversion will fail.
71
72 #ifdef _WIN_ALL
73 if (MultiByteToWideChar(CP_ACP,0,Src,-1,Dest,(int)DestSize)==0)
74 RetCode=false;
75
76 // mbstowcs is broken in Android NDK r9.
77 #elif defined(_APPLE) || defined(_ANDROID)
78 UtfToWide(Src,Dest,DestSize);
79
80 #elif defined(MBFUNCTIONS)
81 mbstate_t ps;
82 memset (&ps, 0, sizeof(ps));
83 const char *SrcParam=Src; // mbsrtowcs can change the pointer.
84 size_t ResultingSize=mbsrtowcs(Dest,&SrcParam,DestSize,&ps);
85 if (ResultingSize==(size_t)-1)
86 RetCode=false;
87 if (ResultingSize==0 && *Src!=0)
88 RetCode=false;
89
90 if (RetCode==false && DestSize>1)
91 CharToWideMap(Src,Dest,DestSize,RetCode);
92 #else
93 for (int I=0;I<DestSize;I++)
94 {
95 Dest[I]=(wchar_t)Src[I];
96 if (Src[I]==0)
97 break;
98 }
99 #endif
100 if (DestSize>0)
101 Dest[DestSize-1]=0;
102
103 // We tried to return the empty string if conversion is failed,
104 // but it does not work well. MultiByteToWideChar returns 'failed' code
105 // even if we wanted to convert only a part of string and passed DestSize
106 // smaller than required for fully converted string. Such call is the valid
107 // behavior in RAR code and we do not expect the empty string in this case.
108
109 return RetCode;
110 }
111
112
113 #if defined(_UNIX) && defined(MBFUNCTIONS) && !defined(_ANDROID)
114 // Convert and restore mapped inconvertible Unicode characters.
115 // We use it for extended ASCII names in Unix.
WideToCharMap(const wchar * Src,char * Dest,size_t DestSize,bool & Success)116 bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success)
117 {
118 // String with inconvertible characters mapped to private use Unicode area
119 // must have the mark code somewhere.
120 if (wcschr(Src,(wchar)MappedStringMark)==NULL)
121 return false;
122
123 Success=true;
124 uint SrcPos=0,DestPos=0;
125 while (DestPos<DestSize-MB_CUR_MAX)
126 {
127 if (Src[SrcPos]==0)
128 {
129 Dest[DestPos]=0;
130 break;
131 }
132 if (uint(Src[SrcPos])==MappedStringMark)
133 {
134 SrcPos++;
135 continue;
136 }
137 // For security reasons do not retore low ASCII codes, so mapping cannot
138 // be used to hide control codes like path separators.
139 if (uint(Src[SrcPos])>=MapAreaStart+0x80 && uint(Src[SrcPos])<MapAreaStart+0x100)
140 Dest[DestPos++]=char(uint(Src[SrcPos++])-MapAreaStart);
141 else
142 {
143 mbstate_t ps;
144 memset(&ps,0,sizeof(ps));
145 if (wcrtomb(Dest+DestPos,Src[SrcPos],&ps)==-1)
146 Success=false;
147 SrcPos++;
148 memset(&ps,0,sizeof(ps));
149 int Length=mbrlen(Dest+DestPos,MB_CUR_MAX,&ps);
150 DestPos+=Max(Length,1);
151 }
152 }
153 return true;
154 }
155 #endif
156
157
158 #if defined(_UNIX) && defined(MBFUNCTIONS) && !defined(_ANDROID)
159 // Convert and map inconvertible Unicode characters.
160 // We use it for extended ASCII names in Unix.
CharToWideMap(const char * Src,wchar * Dest,size_t DestSize,bool & Success)161 void CharToWideMap(const char *Src,wchar *Dest,size_t DestSize,bool &Success)
162 {
163 // Map inconvertible characters to private use Unicode area 0xE000.
164 // Mark such string by placing special non-character code before
165 // first inconvertible character.
166 Success=false;
167 bool MarkAdded=false;
168 uint SrcPos=0,DestPos=0;
169 while (DestPos<DestSize)
170 {
171 if (Src[SrcPos]==0)
172 {
173 Dest[DestPos]=0;
174 Success=true;
175 break;
176 }
177 mbstate_t ps;
178 memset(&ps,0,sizeof(ps));
179 if (mbrtowc(Dest+DestPos,Src+SrcPos,MB_CUR_MAX,&ps)==-1)
180 {
181 // For security reasons we do not want to map low ASCII characters,
182 // so we do not have additional .. and path separator codes.
183 if (byte(Src[SrcPos])>=0x80)
184 {
185 if (!MarkAdded)
186 {
187 Dest[DestPos++]=MappedStringMark;
188 MarkAdded=true;
189 if (DestPos>=DestSize)
190 break;
191 }
192 Dest[DestPos++]=byte(Src[SrcPos++])+MapAreaStart;
193 }
194 else
195 break;
196 }
197 else
198 {
199 memset(&ps,0,sizeof(ps));
200 int Length=mbrlen(Src+SrcPos,MB_CUR_MAX,&ps);
201 SrcPos+=Max(Length,1);
202 DestPos++;
203 }
204 }
205 }
206 #endif
207
208
209 // SrcSize is in wide characters, not in bytes.
WideToRaw(const wchar * Src,byte * Dest,size_t SrcSize)210 byte* WideToRaw(const wchar *Src,byte *Dest,size_t SrcSize)
211 {
212 for (size_t I=0;I<SrcSize;I++,Src++)
213 {
214 Dest[I*2]=(byte)*Src;
215 Dest[I*2+1]=(byte)(*Src>>8);
216 if (*Src==0)
217 break;
218 }
219 return Dest;
220 }
221
222
RawToWide(const byte * Src,wchar * Dest,size_t DestSize)223 wchar* RawToWide(const byte *Src,wchar *Dest,size_t DestSize)
224 {
225 for (size_t I=0;I<DestSize;I++)
226 if ((Dest[I]=Src[I*2]+(Src[I*2+1]<<8))==0)
227 break;
228 return Dest;
229 }
230
231
WideToUtf(const wchar * Src,char * Dest,size_t DestSize)232 void WideToUtf(const wchar *Src,char *Dest,size_t DestSize)
233 {
234 long dsize=(long)DestSize;
235 dsize--;
236 while (*Src!=0 && --dsize>=0)
237 {
238 uint c=*(Src++);
239 if (c<0x80)
240 *(Dest++)=c;
241 else
242 if (c<0x800 && --dsize>=0)
243 {
244 *(Dest++)=(0xc0|(c>>6));
245 *(Dest++)=(0x80|(c&0x3f));
246 }
247 else
248 {
249 if (c>=0xd800 && c<=0xdbff && *Src>=0xdc00 && *Src<=0xdfff) // Surrogate pair.
250 {
251 c=((c-0xd800)<<10)+(*Src-0xdc00)+0x10000;
252 Src++;
253 }
254 if (c<0x10000 && (dsize-=2)>=0)
255 {
256 *(Dest++)=(0xe0|(c>>12));
257 *(Dest++)=(0x80|((c>>6)&0x3f));
258 *(Dest++)=(0x80|(c&0x3f));
259 }
260 else
261 if (c < 0x200000 && (dsize-=3)>=0)
262 {
263 *(Dest++)=(0xf0|(c>>18));
264 *(Dest++)=(0x80|((c>>12)&0x3f));
265 *(Dest++)=(0x80|((c>>6)&0x3f));
266 *(Dest++)=(0x80|(c&0x3f));
267 }
268 }
269 }
270 *Dest=0;
271 }
272
273
WideToUtfSize(const wchar * Src)274 size_t WideToUtfSize(const wchar *Src)
275 {
276 size_t Size=0;
277 for (;*Src!=0;Src++)
278 if (*Src<0x80)
279 Size++;
280 else
281 if (*Src<0x800)
282 Size+=2;
283 else
284 if (*Src<0x10000)
285 {
286 if (Src[0]>=0xd800 && Src[0]<=0xdbff && Src[1]>=0xdc00 && Src[1]<=0xdfff)
287 {
288 Size+=4; // 4 output bytes for Unicode surrogate pair.
289 Src++;
290 }
291 else
292 Size+=3;
293 }
294 else
295 if (*Src<0x200000)
296 Size+=4;
297 return Size+1; // Include terminating zero.
298 }
299
300
301 // Dest can be NULL if we only need to check validity of Src.
UtfToWide(const char * Src,wchar * Dest,size_t DestSize)302 bool UtfToWide(const char *Src,wchar *Dest,size_t DestSize)
303 {
304 bool Success=true;
305 long dsize=(long)DestSize;
306 dsize--;
307 while (*Src!=0)
308 {
309 uint c=byte(*(Src++)),d;
310 if (c<0x80)
311 d=c;
312 else
313 if ((c>>5)==6)
314 {
315 if ((*Src&0xc0)!=0x80)
316 {
317 Success=false;
318 break;
319 }
320 d=((c&0x1f)<<6)|(*Src&0x3f);
321 Src++;
322 }
323 else
324 if ((c>>4)==14)
325 {
326 if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80)
327 {
328 Success=false;
329 break;
330 }
331 d=((c&0xf)<<12)|((Src[0]&0x3f)<<6)|(Src[1]&0x3f);
332 Src+=2;
333 }
334 else
335 if ((c>>3)==30)
336 {
337 if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80 || (Src[2]&0xc0)!=0x80)
338 {
339 Success=false;
340 break;
341 }
342 d=((c&7)<<18)|((Src[0]&0x3f)<<12)|((Src[1]&0x3f)<<6)|(Src[2]&0x3f);
343 Src+=3;
344 }
345 else
346 {
347 Success=false;
348 break;
349 }
350 if (Dest!=NULL && --dsize<0)
351 break;
352 if (d>0xffff)
353 {
354 if (Dest!=NULL && --dsize<0)
355 break;
356 if (d>0x10ffff) // UTF-8 must end at 0x10ffff according to RFC 3629.
357 {
358 Success=false;
359 continue;
360 }
361 if (Dest!=NULL)
362 if (sizeof(*Dest)==2) // Use the surrogate pair for 2 byte Unicode.
363 {
364 *(Dest++)=((d-0x10000)>>10)+0xd800;
365 *(Dest++)=(d&0x3ff)+0xdc00;
366 }
367 else
368 *(Dest++)=d;
369 }
370 else
371 if (Dest!=NULL)
372 *(Dest++)=d;
373 }
374 if (Dest!=NULL)
375 *Dest=0;
376 return Success;
377 }
378
379
wcsicomp(const wchar * s1,const wchar * s2)380 int wcsicomp(const wchar *s1,const wchar *s2)
381 {
382 #ifdef _WIN_ALL
383 return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,-1,s2,-1)-2;
384 #else
385 while (true)
386 {
387 wchar u1 = towupper(*s1);
388 wchar u2 = towupper(*s2);
389 if (u1 != u2)
390 return u1 < u2 ? -1 : 1;
391 if (*s1==0)
392 break;
393 s1++;
394 s2++;
395 }
396 return 0;
397 #endif
398 }
399
400
wcsnicomp(const wchar * s1,const wchar * s2,size_t n)401 int wcsnicomp(const wchar *s1,const wchar *s2,size_t n)
402 {
403 #ifdef _WIN_ALL
404 // If we specify 'n' exceeding the actual string length, CompareString goes
405 // beyond the trailing zero and compares garbage. So we need to limit 'n'
406 // to real string length.
407 size_t l1=Min(wcslen(s1)+1,n);
408 size_t l2=Min(wcslen(s2)+1,n);
409 return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,(int)l1,s2,(int)l2)-2;
410 #else
411 if (n==0)
412 return 0;
413 while (true)
414 {
415 wchar u1 = towupper(*s1);
416 wchar u2 = towupper(*s2);
417 if (u1 != u2)
418 return u1 < u2 ? -1 : 1;
419 if (*s1==0 || --n==0)
420 break;
421 s1++;
422 s2++;
423 }
424 return 0;
425 #endif
426 }
427
428
wcscasestr(const wchar_t * str,const wchar_t * search)429 const wchar_t* wcscasestr(const wchar_t *str, const wchar_t *search)
430 {
431 for (size_t i=0;str[i]!=0;i++)
432 for (size_t j=0;;j++)
433 {
434 if (search[j]==0)
435 return str+i;
436 if (tolowerw(str[i+j])!=tolowerw(search[j]))
437 break;
438 }
439 return NULL;
440 }
441
442
443 #ifndef SFX_MODULE
wcslower(wchar * s)444 wchar* wcslower(wchar *s)
445 {
446 #ifdef _WIN_ALL
447 CharLower(s);
448 #else
449 for (wchar *c=s;*c!=0;c++)
450 *c=towlower(*c);
451 #endif
452 return s;
453 }
454 #endif
455
456
457 #ifndef SFX_MODULE
wcsupper(wchar * s)458 wchar* wcsupper(wchar *s)
459 {
460 #ifdef _WIN_ALL
461 CharUpper(s);
462 #else
463 for (wchar *c=s;*c!=0;c++)
464 *c=towupper(*c);
465 #endif
466 return s;
467 }
468 #endif
469
470
471
472
toupperw(int ch)473 int toupperw(int ch)
474 {
475 #ifdef _WIN_ALL
476 // CharUpper is more reliable than towupper in Windows, which seems to be
477 // C locale dependent even in Unicode version. For example, towupper failed
478 // to convert lowercase Russian characters.
479 return (int)CharUpper((wchar *)ch);
480 #else
481 return towupper(ch);
482 #endif
483 }
484
485
tolowerw(int ch)486 int tolowerw(int ch)
487 {
488 #ifdef _WIN_ALL
489 // CharLower is more reliable than towlower in Windows.
490 // See comment for towupper above.
491 return (int)CharLower((wchar *)ch);
492 #else
493 return towlower(ch);
494 #endif
495 }
496
497
atoiw(const wchar * s)498 int atoiw(const wchar *s)
499 {
500 return (int)atoilw(s);
501 }
502
503
atoilw(const wchar * s)504 int64 atoilw(const wchar *s)
505 {
506 int sign=1;
507 if (*s=='-')
508 {
509 s++;
510 sign=-1;
511 }
512 int64 n=0;
513 while (*s>='0' && *s<='9')
514 {
515 n=n*10+(*s-'0');
516 s++;
517 }
518 return sign*n;
519 }
520
521
522 #ifdef DBCS_SUPPORTED
523 SupportDBCS gdbcs;
524
SupportDBCS()525 SupportDBCS::SupportDBCS()
526 {
527 Init();
528 }
529
530
Init()531 void SupportDBCS::Init()
532 {
533 CPINFO CPInfo;
534 GetCPInfo(CP_ACP,&CPInfo);
535 DBCSMode=CPInfo.MaxCharSize > 1;
536 for (uint I=0;I<ASIZE(IsLeadByte);I++)
537 IsLeadByte[I]=IsDBCSLeadByte(I)!=0;
538 }
539
540
charnext(const char * s)541 char* SupportDBCS::charnext(const char *s)
542 {
543 // Zero cannot be the trail byte. So if next byte after the lead byte
544 // is 0, the string is corrupt and we'll better return the pointer to 0,
545 // to break string processing loops.
546 return (char *)(IsLeadByte[(byte)*s] && s[1]!=0 ? s+2:s+1);
547 }
548
549
strlend(const char * s)550 size_t SupportDBCS::strlend(const char *s)
551 {
552 size_t Length=0;
553 while (*s!=0)
554 {
555 if (IsLeadByte[(byte)*s])
556 s+=2;
557 else
558 s++;
559 Length++;
560 }
561 return(Length);
562 }
563
564
strchrd(const char * s,int c)565 char* SupportDBCS::strchrd(const char *s, int c)
566 {
567 while (*s!=0)
568 if (IsLeadByte[(byte)*s])
569 s+=2;
570 else
571 if (*s==c)
572 return((char *)s);
573 else
574 s++;
575 return(NULL);
576 }
577
578
copychrd(char * dest,const char * src)579 void SupportDBCS::copychrd(char *dest,const char *src)
580 {
581 dest[0]=src[0];
582 if (IsLeadByte[(byte)src[0]])
583 dest[1]=src[1];
584 }
585
586
strrchrd(const char * s,int c)587 char* SupportDBCS::strrchrd(const char *s, int c)
588 {
589 const char *found=NULL;
590 while (*s!=0)
591 if (IsLeadByte[(byte)*s])
592 s+=2;
593 else
594 {
595 if (*s==c)
596 found=s;
597 s++;
598 }
599 return((char *)found);
600 }
601 #endif
602