xref: /reactos/sdk/tools/unicode/fold.c (revision 845faec4)
1 /*
2  * String folding
3  *
4  * Copyright 2003 Jon Griffiths
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19  */
20 
21 #include "wine/unicode.h"
22 
23 static inline WCHAR to_unicode_digit( WCHAR ch )
24 {
25     extern const WCHAR wine_digitmap[] DECLSPEC_HIDDEN;
26     return ch + wine_digitmap[wine_digitmap[ch >> 8] + (ch & 0xff)];
27 }
28 
29 static inline WCHAR to_unicode_native( WCHAR ch )
30 {
31     extern const WCHAR wine_compatmap[] DECLSPEC_HIDDEN;
32     return ch + wine_compatmap[wine_compatmap[ch >> 8] + (ch & 0xff)];
33 }
34 
35 static const WCHAR wine_ligatures[] =
36 {
37     0x00c6, 0x00de, 0x00df, 0x00e6, 0x00fe, 0x0132, 0x0133, 0x0152,
38     0x0153, 0x01c4, 0x01c5, 0x01c6, 0x01c7, 0x01c8, 0x01c9, 0x01ca,
39     0x01cb, 0x01cc, 0x01e2, 0x01e3, 0x01f1, 0x01f2, 0x01f3, 0x01fc,
40     0x01fd, 0x05f0, 0x05f1, 0x05f2, 0xfb00, 0xfb01, 0xfb02, 0xfb03,
41     0xfb04, 0xfb05, 0xfb06
42 };
43 
44 /* Unicode expanded ligatures */
45 static const WCHAR wine_expanded_ligatures[][4] =
46 {
47     { 'A','E','\0',1 },
48     { 'T','H','\0',1 },
49     { 's','s','\0',1 },
50     { 'a','e','\0',1 },
51     { 't','h','\0',1 },
52     { 'I','J','\0',1 },
53     { 'i','j','\0',1 },
54     { 'O','E','\0',1 },
55     { 'o','e','\0',1 },
56     { 'D',0x017d,'\0',1 },
57     { 'D',0x017e,'\0',1 },
58     { 'd',0x017e,'\0',1 },
59     { 'L','J','\0',1 },
60     { 'L','j','\0',1 },
61     { 'l','j','\0',1 },
62     { 'N','J','\0',1 },
63     { 'N','j','\0',1 },
64     { 'n','j','\0',1 },
65     { 0x0100,0x0112,'\0',1 },
66     { 0x0101,0x0113,'\0',1 },
67     { 'D','Z','\0',1 },
68     { 'D','z','\0',1 },
69     { 'd','z','\0',1 },
70     { 0x00c1,0x00c9,'\0',1 },
71     { 0x00e1,0x00e9,'\0',1 },
72     { 0x05d5,0x05d5,'\0',1 },
73     { 0x05d5,0x05d9,'\0',1 },
74     { 0x05d9,0x05d9,'\0',1 },
75     { 'f','f','\0',1 },
76     { 'f','i','\0',1 },
77     { 'f','l','\0',1 },
78     { 'f','f','i',2 },
79     { 'f','f','l',2 },
80     { 0x017f,'t','\0',1 },
81     { 's','t','\0',1 }
82 };
83 
84 static inline int get_ligature_len( WCHAR wc )
85 {
86     int low = 0, high = sizeof(wine_ligatures)/sizeof(WCHAR) -1;
87     while (low <= high)
88     {
89         int pos = (low + high) / 2;
90         if (wine_ligatures[pos] < wc)
91             low = pos + 1;
92         else if (wine_ligatures[pos] > wc)
93             high = pos - 1;
94         else
95             return wine_expanded_ligatures[pos][3];
96     }
97     return 0;
98 }
99 
100 static inline const WCHAR* get_ligature( WCHAR wc )
101 {
102     static const WCHAR empty_ligature[] = { '\0','\0','\0', 0 };
103     int low = 0, high = sizeof(wine_ligatures)/sizeof(WCHAR) -1;
104     while (low <= high)
105     {
106         int pos = (low + high) / 2;
107         if (wine_ligatures[pos] < wc)
108             low = pos + 1;
109         else if (wine_ligatures[pos] > wc)
110             high = pos - 1;
111         else
112             return wine_expanded_ligatures[pos];
113     }
114     return empty_ligature;
115 }
116 
117 /* fold a unicode string */
118 int wine_fold_string( int flags, const WCHAR *src, int srclen, WCHAR *dst, int dstlen )
119 {
120     WCHAR *dstbase = dst;
121     const WCHAR *expand;
122     int i;
123 
124     if (srclen == -1)
125         srclen = strlenW(src) + 1; /* Include terminating NUL in count */
126 
127     if (!dstlen)
128     {
129         /* Calculate the required size for dst */
130         dstlen = srclen;
131 
132         if (flags & MAP_EXPAND_LIGATURES)
133         {
134             while (srclen--)
135             {
136                 dstlen += get_ligature_len(*src);
137                 src++;
138             }
139         }
140         else if (flags & MAP_COMPOSITE)
141         {
142             /* FIXME */
143         }
144         else if (flags & MAP_PRECOMPOSED)
145         {
146             /* FIXME */
147         }
148         return dstlen;
149     }
150 
151     if (srclen > dstlen)
152         return 0;
153 
154     dstlen -= srclen;
155 
156     /* Actually perform the mapping(s) specified */
157     for (i = 0; i < srclen; i++)
158     {
159         WCHAR ch = *src;
160 
161         if (flags & MAP_EXPAND_LIGATURES)
162         {
163             expand = get_ligature(ch);
164             if (expand[0])
165             {
166                 if (!dstlen--)
167                     return 0;
168                 dst[0] = expand[0];
169                 if (expand[2])
170                 {
171                     if (!dstlen--)
172                         return 0;
173                     *++dst = expand[1];
174                     ch = expand[2];
175                 }
176                 else
177                     ch = expand[1];
178                 dst++;
179             }
180         }
181         else if (flags & MAP_COMPOSITE)
182         {
183             /* FIXME */
184         }
185         else if (flags & MAP_PRECOMPOSED)
186         {
187             /* FIXME */
188         }
189         if (flags & MAP_FOLDDIGITS)
190             ch = to_unicode_digit(ch);
191         if (flags & MAP_FOLDCZONE)
192             ch = to_unicode_native(ch);
193 
194         *dst++ = ch;
195         src++;
196     }
197     return dst - dstbase;
198 }
199