1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 #include "udm_config.h"
19 #include <stdio.h>
20 #include "udm_common.h" /* For assert() */
21 #include "udm_uniconv.h"
22
23
24 UDM_API(void)
UdmConvInit(UDM_CONV * cnv,UDM_CHARSET * from,UDM_CHARSET * to)25 UdmConvInit(UDM_CONV *cnv, UDM_CHARSET *from, UDM_CHARSET *to)
26 {
27 cnv->from= from;
28 cnv->to= to;
29 }
30
31
32 size_t
UdmConvSizeNeeded(const UDM_CONV * cnv,size_t srclen,int flags)33 UdmConvSizeNeeded(const UDM_CONV *cnv, size_t srclen, int flags)
34 {
35 return srclen * 14; /* TOODO34 */
36 }
37
38
39 UDM_API(int)
UdmConv(UDM_CONV * c,char * d,size_t dlen,const char * s,size_t slen,int flags)40 UdmConv(UDM_CONV *c,
41 char *d, size_t dlen,
42 const char *s, size_t slen,
43 int flags)
44 {
45 udm_wc_t wc;
46 int res; /* is 16 is enough? */
47 char *d_o= d;
48 const char *s_e= s+slen;
49 char *d_e= d+dlen;
50 int (*mb_wc)(udm_mbstate_t *mbstate,
51 const struct udm_cset_st *cs_arg, udm_wc_t *wc_arg,
52 const unsigned char *s_arg,
53 const unsigned char *e_arg, int flags);
54 int (*wc_mb)(udm_mbstate_t *mbstate,
55 const struct udm_cset_st *cs, udm_wc_t wc_arg,
56 unsigned char *s_arg,
57 unsigned char *e_arg, int flags);
58 udm_mbstate_t istate= 0;
59 udm_mbstate_t ostate= 0;
60 /*
61 if (slen && s[0] != 0)
62 fprintf(stderr, "> %s %s %d '%.*s'\n", c->from->name, c->to->name, slen, slen, s);
63 */
64 mb_wc= c->from->cset->mb_wc;
65 wc_mb= c->to->cset->wc_mb;
66
67 while(s <s_e && d <d_e)
68 {
69 res= mb_wc(&istate, c->from, &wc, (const unsigned char*) s,
70 (const unsigned char*) s_e, flags);
71 if (res > 0)
72 {
73 s+=res;
74 }
75 else if ((res == UDM_CHARSET_ILSEQ) ||
76 (res == UDM_CHARSET_ILSEQ2) ||
77 (res == UDM_CHARSET_ILSEQ3) ||
78 (res == UDM_CHARSET_ILSEQ4) ||
79 (res == UDM_CHARSET_ILSEQ5) ||
80 (res == UDM_CHARSET_ILSEQ6))
81 {
82 /* Move at least one character */
83 s+= ((res == UDM_CHARSET_ILSEQ) ? 1 : - res);
84 wc= '?';
85 res= wc_mb(&ostate, c->to, wc, (unsigned char*) d,
86 (unsigned char*) d_e, flags);
87 if (res <= 0)
88 goto outaway;
89 }
90 else if (res != UDM_CHARSET_CACHEDUNI)
91 goto outaway;
92
93 res= wc_mb(&istate, c->to, wc, (unsigned char*) d,
94 (unsigned char*) d_e, flags);
95 if (res > 0)
96 {
97 d+= res;
98 }
99 else if (res == UDM_CHARSET_ILUNI)
100 {
101 if (flags & (UDM_RECODE_HTML_NONASCII|UDM_RECODE_HTML_NONASCII_HEX))
102 {
103 if ( d_e - d > 8)
104 {
105 if (flags & UDM_RECODE_HTML_OUT_SPECIAL)
106 {
107 switch (wc)
108 {
109 case '&': d+= sprintf(d, "&"); continue;
110 case '"': d+= sprintf(d, """); continue;
111 case '<': d+= sprintf(d, "<"); continue;
112 case '>': d+= sprintf(d, ">"); continue;
113 }
114 }
115 if (flags & UDM_RECODE_HTML_NONASCII_HEX)
116 res= sprintf(d, "&#x%X;", wc);
117 else
118 res= sprintf(d, "&#%d;", wc);
119 d+= res;
120 }
121 else
122 break;
123 }
124 else
125 {
126 wc= '?';
127 res= wc_mb(&ostate, c->to, wc, (unsigned char*) d,
128 (unsigned char*) d_e, flags);
129 if (res <= 0)
130 goto outaway;
131 }
132 }
133 else
134 goto outaway;
135 }
136
137 outaway:
138
139 /*
140 if (d - d_o && d_o[0] != 0)
141 fprintf(stderr, "< %s %s %d '%.*s'\n", c->from->name, c->to->name, d - d_o, d - d_o, d_o);
142 */
143 return d - d_o;
144 }
145
146
147 size_t
UdmConvLCase(UDM_UNIDATA * unidata,UDM_CONV * cnv,int cnvflags,char * dst,size_t dstlen,const char * src,size_t srclen)148 UdmConvLCase(UDM_UNIDATA *unidata,
149 UDM_CONV *cnv, int cnvflags,
150 char *dst, size_t dstlen,
151 const char *src, size_t srclen)
152 {
153 #if 0
154 {
155 size_t len= UDM_MIN(srclen, dstlen);
156 memcpy(dst, src, len);
157 cs->cset->lcase(unidata, cs, dst, len);
158 return len;
159 }
160 #else
161 {
162 size_t conv_length;
163 size_t simple_len= 0;
164
165 size_t len= UDM_MIN(srclen, dstlen);
166 for ( ; simple_len < len; simple_len++)
167 {
168 if (*src >= 'A' && *src <= 'Z' && *src != 'I')
169 *dst++= *src++ - 'A' + 'a';
170 else if (*src >= 'a' && *src <= 'z')
171 *dst++= *src++;
172 else if (*src >= '0' && *src <= '9')
173 *dst++= *src++;
174 else
175 break;
176 }
177 dstlen-= simple_len;
178 srclen-= simple_len;
179
180 if (srclen && dstlen)
181 conv_length= UdmStrToLowerExt(unidata, cnv, dst, dstlen, src, srclen, cnvflags);
182 else
183 conv_length= 0;
184 return simple_len + conv_length;
185 }
186 #endif
187 }
188
189
190
191 UDM_API(const char *)
UdmCsGroup(UDM_CHARSET * cs)192 UdmCsGroup(UDM_CHARSET *cs)
193 {
194 switch(cs->family)
195 {
196 case UDM_CHARSET_ARABIC : return "Arabic";
197 case UDM_CHARSET_ARMENIAN : return "Armenian";
198 case UDM_CHARSET_BALTIC : return "Baltic";
199 case UDM_CHARSET_CELTIC : return "Celtic";
200 case UDM_CHARSET_CENTRAL : return "Central Eur";
201 case UDM_CHARSET_CHINESE_SIMPLIFIED : return "Chinese Simplified";
202 case UDM_CHARSET_CHINESE_TRADITIONAL: return "Chinese Traditional";
203 case UDM_CHARSET_CYRILLIC : return "Cyrillic";
204 case UDM_CHARSET_GREEK : return "Greek";
205 case UDM_CHARSET_HEBREW : return "Hebrew";
206 case UDM_CHARSET_ICELANDIC : return "Icelandic";
207 case UDM_CHARSET_JAPANESE : return "Japanese";
208 case UDM_CHARSET_KOREAN : return "Korean";
209 case UDM_CHARSET_NORDIC : return "Nordic";
210 case UDM_CHARSET_SOUTHERN : return "South Eur";
211 case UDM_CHARSET_THAI : return "Thai";
212 case UDM_CHARSET_TURKISH : return "Turkish";
213 case UDM_CHARSET_UNICODE : return "Unicode";
214 case UDM_CHARSET_VIETNAMESE : return "Vietnamese";
215 case UDM_CHARSET_WESTERN : return "Western";
216 case UDM_CHARSET_GEORGIAN : return "Georgian";
217 case UDM_CHARSET_INDIAN : return "Indian";
218 default : return "Unknown";
219 }
220 }
221