1 #include "rar.hpp"
2
3 #if defined(_EMX) && !defined(_DJGPP)
4 #include "unios2.cpp"
5 #endif
6
WideToChar(const wchar * Src,char * Dest,size_t DestSize)7 bool WideToChar(const wchar *Src,char *Dest,size_t DestSize)
8 {
9 bool RetCode=true;
10 *Dest=0; // Set 'Dest' to zero just in case the conversion will fail.
11
12 #ifdef _WIN_ALL
13 if (WideCharToMultiByte(CP_ACP,0,Src,-1,Dest,(int)DestSize,NULL,NULL)==0)
14 RetCode=false;
15
16 #elif defined(_APPLE)
17 WideToUtf(Src,Dest,DestSize);
18
19 #elif defined(MBFUNCTIONS)
20 size_t ResultingSize=wcstombs(Dest,Src,DestSize);
21 if (ResultingSize==(size_t)-1)
22 RetCode=false;
23 if (ResultingSize==0 && *Src!=0)
24 RetCode=false;
25
26 if ((!RetCode || *Dest==0 && *Src!=0) && DestSize>NM && wcslen(Src)<NM)
27 {
28 /* Workaround for strange Linux Unicode functions bug.
29 Some of wcstombs and mbstowcs implementations in some situations
30 (we are yet to find out what it depends on) can return an empty
31 string and success code if buffer size value is too large.
32 */
33 return(WideToChar(Src,Dest,NM));
34 }
35
36 #else
37 if (UnicodeEnabled())
38 {
39 #if defined(_EMX) && !defined(_DJGPP)
40 int len=Min(wcslen(Src)+1,DestSize-1);
41 if (uni_fromucs((UniChar*)Src,len,Dest,(size_t*)&DestSize)==-1 ||
42 DestSize>len*2)
43 RetCode=false;
44 Dest[DestSize]=0;
45 #endif
46 }
47 else
48 for (int I=0;I<DestSize;I++)
49 {
50 Dest[I]=(char)Src[I];
51 if (Src[I]==0)
52 break;
53 }
54 #endif
55
56 // We tried to return the zero terminated string if conversion is failed,
57 // but it does not work well. WideCharToMultiByte returns 'failed' code
58 // and partially converted string even if we wanted to convert only a part
59 // of string and passed DestSize smaller than required for fully converted
60 // string. Such call is the valid behavior in RAR code and we do not expect
61 // the empty string in this case.
62
63 return(RetCode);
64 }
65
66
CharToWide(const char * Src,wchar * Dest,size_t DestSize)67 bool CharToWide(const char *Src,wchar *Dest,size_t DestSize)
68 {
69 bool RetCode=true;
70 *Dest=0; // Set 'Dest' to zero just in case the conversion will fail.
71
72 #ifdef _WIN_ALL
73 if (MultiByteToWideChar(CP_ACP,0,Src,-1,Dest,(int)DestSize)==0)
74 RetCode=false;
75
76 #elif defined(_APPLE)
77 UtfToWide(Src,Dest,DestSize);
78
79 #elif defined(MBFUNCTIONS)
80 size_t ResultingSize=mbstowcs(Dest,Src,DestSize);
81 if (ResultingSize==(size_t)-1)
82 RetCode=false;
83 if (ResultingSize==0 && *Src!=0)
84 RetCode=false;
85
86 if ((!RetCode || *Dest==0 && *Src!=0) && DestSize>NM && strlen(Src)<NM)
87 {
88 /* Workaround for strange Linux Unicode functions bug.
89 Some of wcstombs and mbstowcs implementations in some situations
90 (we are yet to find out what it depends on) can return an empty
91 string and success code if buffer size value is too large.
92 */
93 return(CharToWide(Src,Dest,NM));
94 }
95 #else
96 if (UnicodeEnabled())
97 {
98 #if defined(_EMX) && !defined(_DJGPP)
99 int len=Min(strlen(Src)+1,DestSize-1);
100 if (uni_toucs((char*)Src,len,(UniChar*)Dest,(size_t*)&DestSize)==-1 ||
101 DestSize>len)
102 DestSize=0;
103 RetCode=false;
104 #endif
105 }
106 else
107 for (int I=0;I<DestSize;I++)
108 {
109 Dest[I]=(wchar_t)Src[I];
110 if (Src[I]==0)
111 break;
112 }
113 #endif
114
115 // We tried to return the zero terminated string if conversion is failed,
116 // but it does not work well. MultiByteToWideChar returns 'failed' code
117 // even if we wanted to convert only a part of string and passed DestSize
118 // smaller than required for fully converted string. Such call is the valid
119 // behavior in RAR code and we do not expect the empty string in this case.
120
121 return(RetCode);
122 }
123
124
125 // SrcSize is in wide characters, not in bytes.
WideToRaw(const wchar * Src,byte * Dest,size_t SrcSize)126 byte* WideToRaw(const wchar *Src,byte *Dest,size_t SrcSize)
127 {
128 for (size_t I=0;I<SrcSize;I++,Src++)
129 {
130 Dest[I*2]=(byte)*Src;
131 Dest[I*2+1]=(byte)(*Src>>8);
132 if (*Src==0)
133 break;
134 }
135 return(Dest);
136 }
137
138
RawToWide(const byte * Src,wchar * Dest,size_t DestSize)139 wchar* RawToWide(const byte *Src,wchar *Dest,size_t DestSize)
140 {
141 for (size_t I=0;I<DestSize;I++)
142 if ((Dest[I]=Src[I*2]+(Src[I*2+1]<<8))==0)
143 break;
144 return(Dest);
145 }
146
147
WideToUtf(const wchar * Src,char * Dest,size_t DestSize)148 void WideToUtf(const wchar *Src,char *Dest,size_t DestSize)
149 {
150 long dsize=(long)DestSize;
151 dsize--;
152 while (*Src!=0 && --dsize>=0)
153 {
154 uint c=*(Src++);
155 if (c<0x80)
156 *(Dest++)=c;
157 else
158 if (c<0x800 && --dsize>=0)
159 {
160 *(Dest++)=(0xc0|(c>>6));
161 *(Dest++)=(0x80|(c&0x3f));
162 }
163 else
164 if (c<0x10000 && (dsize-=2)>=0)
165 {
166 *(Dest++)=(0xe0|(c>>12));
167 *(Dest++)=(0x80|((c>>6)&0x3f));
168 *(Dest++)=(0x80|(c&0x3f));
169 }
170 else
171 if (c < 0x200000 && (dsize-=3)>=0)
172 {
173 *(Dest++)=(0xf0|(c>>18));
174 *(Dest++)=(0x80|((c>>12)&0x3f));
175 *(Dest++)=(0x80|((c>>6)&0x3f));
176 *(Dest++)=(0x80|(c&0x3f));
177 }
178 }
179 *Dest=0;
180 }
181
182
UtfToWide(const char * Src,wchar * Dest,size_t DestSize)183 void UtfToWide(const char *Src,wchar *Dest,size_t DestSize)
184 {
185 long dsize=(long)DestSize;
186 dsize--;
187 while (*Src!=0)
188 {
189 uint c=(byte)*(Src++),d;
190 if (c<0x80)
191 d=c;
192 else
193 if ((c>>5)==6)
194 {
195 if ((*Src&0xc0)!=0x80)
196 break;
197 d=((c&0x1f)<<6)|(*Src&0x3f);
198 Src++;
199 }
200 else
201 if ((c>>4)==14)
202 {
203 if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80)
204 break;
205 d=((c&0xf)<<12)|((Src[0]&0x3f)<<6)|(Src[1]&0x3f);
206 Src+=2;
207 }
208 else
209 if ((c>>3)==30)
210 {
211 if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80 || (Src[2]&0xc0)!=0x80)
212 break;
213 d=((c&7)<<18)|((Src[0]&0x3f)<<12)|((Src[1]&0x3f)<<6)|(Src[2]&0x3f);
214 Src+=3;
215 }
216 else
217 break;
218 if (--dsize<0)
219 break;
220 if (d>0xffff)
221 {
222 if (--dsize<0 || d>0x10ffff)
223 break;
224 *(Dest++)=((d-0x10000)>>10)+0xd800;
225 *(Dest++)=(d&0x3ff)+0xdc00;
226 }
227 else
228 *(Dest++)=d;
229 }
230 *Dest=0;
231 }
232
233
UnicodeEnabled()234 bool UnicodeEnabled()
235 {
236 #ifdef UNICODE_SUPPORTED
237 #ifdef _EMX
238 return(uni_ready);
239 #else
240 return(true);
241 #endif
242 #else
243 return(false);
244 #endif
245 }
246
247
wcsicomp(const wchar * s1,const wchar * s2)248 int wcsicomp(const wchar *s1,const wchar *s2)
249 {
250 char Ansi1[NM*sizeof(wchar)],Ansi2[NM*sizeof(wchar)];
251 WideToChar(s1,Ansi1,sizeof(Ansi1));
252 WideToChar(s2,Ansi2,sizeof(Ansi2));
253 return(stricomp(Ansi1,Ansi2));
254 }
255
256
wcsnicomp_w2c(const wchar * s1,const wchar * s2,size_t n)257 static int wcsnicomp_w2c(const wchar *s1,const wchar *s2,size_t n)
258 {
259 char Ansi1[NM*2],Ansi2[NM*2];
260 GetAsciiName(s1,Ansi1,ASIZE(Ansi1));
261 GetAsciiName(s2,Ansi2,ASIZE(Ansi2));
262 return(stricomp(Ansi1,Ansi2));
263 }
264
265
wcsnicomp(const wchar * s1,const wchar * s2,size_t n)266 int wcsnicomp(const wchar *s1,const wchar *s2,size_t n)
267 {
268 return(wcsnicomp_w2c(s1,s2,n));
269 }
270
271
272 #ifndef SFX_MODULE
wcslower(wchar * Str)273 wchar* wcslower(wchar *Str)
274 {
275 for (wchar *ChPtr=Str;*ChPtr;ChPtr++)
276 if (*ChPtr<128)
277 *ChPtr=loctolower((byte)*ChPtr);
278 return(Str);
279 }
280 #endif
281
282
283 #ifndef SFX_MODULE
wcsupper(wchar * Str)284 wchar* wcsupper(wchar *Str)
285 {
286 for (wchar *ChPtr=Str;*ChPtr;ChPtr++)
287 if (*ChPtr<128)
288 *ChPtr=loctoupper((byte)*ChPtr);
289 return(Str);
290 }
291 #endif
292
293
toupperw(int ch)294 int toupperw(int ch)
295 {
296 return((ch<128) ? loctoupper(ch):ch);
297 }
298
299
tolowerw(int ch)300 int tolowerw(int ch)
301 {
302 #ifdef _WIN_ALL
303 return((int)(LPARAM)CharLowerW((wchar *)(uint)ch));
304 #else
305 return((ch<128) ? loctolower(ch):ch);
306 #endif
307 }
308
309
atoiw(const wchar * s)310 int atoiw(const wchar *s)
311 {
312 int n=0;
313 while (*s>='0' && *s<='9')
314 {
315 n=n*10+(*s-'0');
316 s++;
317 }
318 return(n);
319 }
320
321
322 #ifdef DBCS_SUPPORTED
323 SupportDBCS gdbcs;
324
SupportDBCS()325 SupportDBCS::SupportDBCS()
326 {
327 Init();
328 }
329
330
Init()331 void SupportDBCS::Init()
332 {
333 CPINFO CPInfo;
334 GetCPInfo(CP_ACP,&CPInfo);
335 DBCSMode=CPInfo.MaxCharSize > 1;
336 for (uint I=0;I<ASIZE(IsLeadByte);I++)
337 IsLeadByte[I]=IsDBCSLeadByte(I)!=0;
338 }
339
340
charnext(const char * s)341 char* SupportDBCS::charnext(const char *s)
342 {
343 // Zero cannot be the trail byte. So if next byte after the lead byte
344 // is 0, the string is corrupt and we'll better return the pointer to 0,
345 // to break string processing loops.
346 return (char *)(IsLeadByte[(byte)*s] && s[1]!=0 ? s+2:s+1);
347 }
348
349
strlend(const char * s)350 size_t SupportDBCS::strlend(const char *s)
351 {
352 size_t Length=0;
353 while (*s!=0)
354 {
355 if (IsLeadByte[(byte)*s])
356 s+=2;
357 else
358 s++;
359 Length++;
360 }
361 return(Length);
362 }
363
364
strchrd(const char * s,int c)365 char* SupportDBCS::strchrd(const char *s, int c)
366 {
367 while (*s!=0)
368 if (IsLeadByte[(byte)*s])
369 s+=2;
370 else
371 if (*s==c)
372 return((char *)s);
373 else
374 s++;
375 return(NULL);
376 }
377
378
copychrd(char * dest,const char * src)379 void SupportDBCS::copychrd(char *dest,const char *src)
380 {
381 dest[0]=src[0];
382 if (IsLeadByte[(byte)src[0]])
383 dest[1]=src[1];
384 }
385
386
strrchrd(const char * s,int c)387 char* SupportDBCS::strrchrd(const char *s, int c)
388 {
389 const char *found=NULL;
390 while (*s!=0)
391 if (IsLeadByte[(byte)*s])
392 s+=2;
393 else
394 {
395 if (*s==c)
396 found=s;
397 s++;
398 }
399 return((char *)found);
400 }
401 #endif
402