1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; either version 2 of the License, or
6    (at your option) any later version.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 
18 #include "udm_config.h"
19 #include <stdio.h>
20 #include "udm_common.h" /* For assert() */
21 #include "udm_uniconv.h"
22 
23 
24 UDM_API(void)
UdmConvInit(UDM_CONV * cnv,UDM_CHARSET * from,UDM_CHARSET * to)25 UdmConvInit(UDM_CONV *cnv, UDM_CHARSET *from, UDM_CHARSET *to)
26 {
27   cnv->from= from;
28   cnv->to= to;
29 }
30 
31 
32 size_t
UdmConvSizeNeeded(const UDM_CONV * cnv,size_t srclen,int flags)33 UdmConvSizeNeeded(const UDM_CONV *cnv, size_t srclen, int flags)
34 {
35   return srclen * 14; /* TOODO34 */
36 }
37 
38 
39 UDM_API(int)
UdmConv(UDM_CONV * c,char * d,size_t dlen,const char * s,size_t slen,int flags)40 UdmConv(UDM_CONV *c,
41         char *d, size_t dlen,
42         const char *s, size_t slen,
43         int flags)
44 {
45   udm_wc_t   wc;
46   int        res; /* is 16 is enough? */
47   char	     *d_o= d;
48   const char *s_e= s+slen;
49   char	     *d_e= d+dlen;
50   int        (*mb_wc)(udm_mbstate_t *mbstate,
51                       const struct udm_cset_st *cs_arg, udm_wc_t *wc_arg,
52                       const unsigned char *s_arg,
53                       const unsigned char *e_arg, int flags);
54   int        (*wc_mb)(udm_mbstate_t *mbstate,
55                       const struct udm_cset_st *cs, udm_wc_t wc_arg,
56                       unsigned char *s_arg,
57                       unsigned char *e_arg, int flags);
58   udm_mbstate_t istate= 0;
59   udm_mbstate_t ostate= 0;
60 /*
61 if (slen && s[0] != 0)
62   fprintf(stderr, "> %s %s %d '%.*s'\n", c->from->name, c->to->name, slen, slen, s);
63 */
64   mb_wc= c->from->cset->mb_wc;
65   wc_mb= c->to->cset->wc_mb;
66 
67   while(s <s_e && d <d_e)
68   {
69     res= mb_wc(&istate, c->from, &wc, (const unsigned char*) s,
70                                       (const unsigned char*) s_e, flags);
71     if (res > 0)
72     {
73       s+=res;
74     }
75     else if ((res == UDM_CHARSET_ILSEQ)  ||
76              (res == UDM_CHARSET_ILSEQ2) ||
77              (res == UDM_CHARSET_ILSEQ3) ||
78              (res == UDM_CHARSET_ILSEQ4) ||
79              (res == UDM_CHARSET_ILSEQ5) ||
80              (res == UDM_CHARSET_ILSEQ6))
81     {
82       /* Move at least one character */
83       s+= ((res == UDM_CHARSET_ILSEQ) ? 1 : - res);
84       wc= '?';
85       res= wc_mb(&ostate, c->to, wc, (unsigned char*) d,
86                                      (unsigned char*) d_e, flags);
87       if (res <= 0)
88         goto outaway;
89     }
90     else if (res != UDM_CHARSET_CACHEDUNI)
91       goto outaway;
92 
93     res= wc_mb(&istate, c->to, wc, (unsigned char*) d,
94                                    (unsigned char*) d_e, flags);
95     if (res > 0)
96     {
97       d+= res;
98     }
99     else if (res == UDM_CHARSET_ILUNI)
100     {
101       if (flags & (UDM_RECODE_HTML_NONASCII|UDM_RECODE_HTML_NONASCII_HEX))
102       {
103         if ( d_e - d > 8)
104         {
105           if (flags & UDM_RECODE_HTML_OUT_SPECIAL)
106           {
107             switch (wc)
108             {
109             case '&': d+= sprintf(d, "&amp;"); continue;
110             case '"': d+= sprintf(d, "&quot;"); continue;
111             case '<': d+= sprintf(d, "&lt;");  continue;
112             case '>': d+= sprintf(d, "&gt;");  continue;
113             }
114           }
115           if (flags & UDM_RECODE_HTML_NONASCII_HEX)
116             res= sprintf(d, "&#x%X;", wc);
117           else
118             res= sprintf(d, "&#%d;", wc);
119           d+= res;
120         }
121         else
122           break;
123       }
124       else
125       {
126         wc= '?';
127         res= wc_mb(&ostate, c->to, wc, (unsigned char*) d,
128                                        (unsigned char*) d_e, flags);
129         if (res <= 0)
130          goto outaway;
131       }
132     }
133     else
134       goto outaway;
135   }
136 
137 outaway:
138 
139 /*
140 if (d - d_o  && d_o[0] != 0)
141   fprintf(stderr, "< %s %s %d '%.*s'\n", c->from->name, c->to->name, d - d_o, d - d_o, d_o);
142 */
143   return d - d_o;
144 }
145 
146 
147 size_t
UdmConvLCase(UDM_UNIDATA * unidata,UDM_CONV * cnv,int cnvflags,char * dst,size_t dstlen,const char * src,size_t srclen)148 UdmConvLCase(UDM_UNIDATA *unidata,
149              UDM_CONV *cnv, int cnvflags,
150              char *dst, size_t dstlen,
151              const char *src, size_t srclen)
152 {
153 #if 0
154   {
155     size_t len= UDM_MIN(srclen, dstlen);
156     memcpy(dst, src, len);
157     cs->cset->lcase(unidata, cs, dst, len);
158     return len;
159   }
160 #else
161   {
162     size_t conv_length;
163     size_t simple_len= 0;
164 
165     size_t len= UDM_MIN(srclen, dstlen);
166     for ( ; simple_len < len; simple_len++)
167     {
168       if (*src >= 'A' && *src <= 'Z' && *src != 'I')
169         *dst++= *src++ - 'A' + 'a';
170       else if (*src >= 'a' && *src <= 'z')
171         *dst++= *src++;
172       else if (*src >= '0' && *src <= '9')
173         *dst++= *src++;
174       else
175         break;
176     }
177     dstlen-= simple_len;
178     srclen-= simple_len;
179 
180     if (srclen && dstlen)
181       conv_length= UdmStrToLowerExt(unidata, cnv, dst, dstlen, src, srclen, cnvflags);
182     else
183       conv_length= 0;
184     return simple_len + conv_length;
185   }
186 #endif
187 }
188 
189 
190 
191 UDM_API(const char *)
UdmCsGroup(UDM_CHARSET * cs)192 UdmCsGroup(UDM_CHARSET *cs)
193 {
194   switch(cs->family)
195   {
196     case UDM_CHARSET_ARABIC             : return "Arabic";
197     case UDM_CHARSET_ARMENIAN           : return "Armenian";
198     case UDM_CHARSET_BALTIC             : return "Baltic";
199     case UDM_CHARSET_CELTIC             : return "Celtic";
200     case UDM_CHARSET_CENTRAL            : return "Central Eur";
201     case UDM_CHARSET_CHINESE_SIMPLIFIED : return "Chinese Simplified";
202     case UDM_CHARSET_CHINESE_TRADITIONAL: return "Chinese Traditional";
203     case UDM_CHARSET_CYRILLIC           : return "Cyrillic";
204     case UDM_CHARSET_GREEK              : return "Greek";
205     case UDM_CHARSET_HEBREW             : return "Hebrew";
206     case UDM_CHARSET_ICELANDIC          : return "Icelandic";
207     case UDM_CHARSET_JAPANESE           : return "Japanese";
208     case UDM_CHARSET_KOREAN             : return "Korean";
209     case UDM_CHARSET_NORDIC             : return "Nordic";
210     case UDM_CHARSET_SOUTHERN           : return "South Eur";
211     case UDM_CHARSET_THAI               : return "Thai";
212     case UDM_CHARSET_TURKISH            : return "Turkish";
213     case UDM_CHARSET_UNICODE            : return "Unicode";
214     case UDM_CHARSET_VIETNAMESE         : return "Vietnamese";
215     case UDM_CHARSET_WESTERN            : return "Western";
216     case UDM_CHARSET_GEORGIAN           : return "Georgian";
217     case UDM_CHARSET_INDIAN             : return "Indian";
218     default                             : return "Unknown";
219   }
220 }
221