1 /* Copyright (C) 2011 Edward Der-Hua Liu, Hsin-Chu, Taiwan
2  *
3  * This library is free software; you can redistribute it and/or
4  * modify it under the terms of the GNU Lesser General Public
5  * License as published by the Free Software Foundation version 2.1
6  * of the License.
7  *
8  * This library is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * Lesser General Public License for more details.
12  *
13  * You should have received a copy of the GNU Lesser General Public
14  * License along with this library; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
16  */
17 
18 #include "hime.h"
19 
20 #if !HIME_IME
utf8_big5_n(char * s,int len,char out[])21 void utf8_big5_n (char *s, int len, char out[]) {
22     out[0] = 0;
23 
24     GError *err = NULL;
25     gsize rn, wn;
26     char *big5 = g_locale_from_utf8 (s, len, &rn, &wn, &err);
27 
28     if (err || !big5) {
29         dbg ("utf8_big5 convert error\n");
30         //    abort();
31         return;
32     }
33 
34     strcpy (out, big5);
35     g_free (big5);
36 }
37 
utf8_big5(char * s,char out[])38 void utf8_big5 (char *s, char out[]) {
39     utf8_big5_n (s, strlen (s), out);
40 }
41 #endif
42 
utf8_sz(char * s)43 int utf8_sz (char *s) {
44     if (!(*s & 0x80))
45         return 1;
46 
47     if ((*s & 0xe0) == 0xc0)
48         return 2;
49 
50     if ((*s & 0xf0) == 0xe0)
51         return 3;
52 
53     if ((*s & 0xf8) == 0xf0)
54         return 4;
55 
56     p_err ("bad utf8 char %x %c%c%c", *s, *s, *(s + 1), *(s + 2));
57     return -1;
58 }
59 
utf8cpy(char * t,char * s)60 int utf8cpy (char *t, char *s) {
61     int utf8sz = utf8_sz (s);
62 
63     memcpy (t, s, utf8sz);
64     t[utf8sz] = 0;
65     return utf8sz;
66 }
67 
68 // copy N utf-8 chars
utf8cpyN(char * t,char * s,int N)69 void utf8cpyN (char *t, char *s, int N) {
70     int len = utf8_tlen (s, N);
71 
72     memcpy (t, s, len);
73 
74     t[len] = 0;
75 }
76 
u8cpy(char * t,char * s)77 int u8cpy (char *t, char *s) {
78     int utf8sz = utf8_sz (s);
79 
80     memcpy (t, s, utf8sz);
81     return utf8sz;
82 }
83 
utf8_tlen(char * s,int N)84 int utf8_tlen (char *s, int N) {
85     int i;
86     char *p = s;
87 
88     for (i = 0; i < N; i++) {
89         int len = utf8_sz (p);
90         p += len;
91     }
92 
93     return p - s;
94 }
95 
96 int utf8_to_big5 (char *in, char *out, int outN);
utf8_putchar_fp(FILE * fp,char * s)97 void utf8_putchar_fp (FILE *fp, char *s) {
98     int i;
99     int len = utf8_sz (s);
100     for (i = 0; i < len; i++)
101         fputc (s[i], fp);
102 }
103 
utf8_putchar(char * s)104 void utf8_putchar (char *s) {
105     utf8_putchar_fp (stdout, s);
106 }
107 
utf8_putcharn(char * s,int n)108 void utf8_putcharn (char *s, int n) {
109     int i, ofs;
110 
111     for (ofs = i = 0; i < n; i++) {
112         utf8_putchar (&s[ofs]);
113         ofs += utf8_sz (&s[ofs]);
114     }
115 }
116 
utf8_eq(char * a,char * b)117 gboolean utf8_eq (char *a, char *b) {
118     int ta = utf8_sz (a);
119     int tb = utf8_sz (b);
120 
121     if (ta != tb)
122         return FALSE;
123 
124     return !memcmp (a, b, ta);
125 }
126 
utf8_str_eq(char * a,char * b,int len)127 gboolean utf8_str_eq (char *a, char *b, int len) {
128     int ta = utf8_tlen (a, len);
129     int tb = utf8_tlen (b, len);
130 
131     if (ta != tb)
132         return FALSE;
133 
134     return !memcmp (a, b, ta);
135 }
136 
utf8_str_N(char * str)137 int utf8_str_N (char *str) {
138     int N = 0;
139 
140     while (*str) {
141         str += utf8_sz (str);
142         N++;
143     }
144 
145     return N;
146 }
147 
148 // copy at most n utf-8 chars
utf8cpyn(char * t,char * s,int n)149 void utf8cpyn (char *t, char *s, int n) {
150     int tn = 0;
151     int i;
152 
153     for (i = 0; i < n && *s; i++) {
154         int sz = utf8_sz (s);
155 
156         memcpy (t + tn, s, sz);
157         tn += sz;
158         s += sz;
159     }
160 
161     t[tn] = 0;
162 }
163 
164 // copy at most utf-8 bytes
utf8cpy_bytes(char * t,char * s,int n)165 void utf8cpy_bytes (char *t, char *s, int n) {
166     int tn = 0;
167     int i;
168 
169     for (i = 0; tn < n && *s; i++) {
170         int sz = utf8_sz (s);
171 
172         memcpy (t + tn, s, sz);
173         tn += sz;
174         s += sz;
175     }
176 
177     t[tn] = 0;
178 }
179 
180 char utf8_sigature[] = "\xef\xbb\xbf";
181 
skip_utf8_sigature(FILE * fp)182 void skip_utf8_sigature (FILE *fp) {
183     char tt[3];
184 
185     tt[0] = 0;
186     fread (tt, 1, 3, fp);
187     if (memcmp (tt, utf8_sigature, 3)) {
188         //		fseek(fp, 0, SEEK_SET);
189         rewind (fp);
190     }
191 }
192