1 /*
2 *
3 * CLEX File Manager
4 *
5 * Copyright (C) 2001-2018 Vlado Potisk <vlado_potisk@clex.sk>
6 *
7 * CLEX is free software without warranty of any kind; see the
8 * GNU General Public License as set out in the "COPYING" document
9 * which accompanies the CLEX File Manager package.
10 *
11 * CLEX can be downloaded from http://www.clex.sk
12 *
13 */
14
15 /* multibyte and wide string functions */
16
17 #include "clexheaders.h"
18
19 #include <stdlib.h> /* mbstowcs() */
20 #include <string.h> /* strlen() */
21 #include <wctype.h> /* iswprint() in WCW macro */
22
23 #include "mbwstring.h"
24
25 /* wc_cols() returns width (in display columns) of a substring */
26 int
wc_cols(const wchar_t * str,int from,int to)27 wc_cols(const wchar_t *str, int from, int to /* negative = till end */)
28 {
29 int i, cols;
30 wchar_t ch;
31
32 for (cols = 0, i = from; to < 0 || i < to; i++) {
33 if ((ch = str[i]) == L'\0')
34 break;
35 cols += WCW(ch);
36 }
37 return cols;
38 }
39
40 /*
41 * multibyte to wide string conversion with error recovery,
42 * the result is returned as exit value and also stored in
43 * the USTRINGW structure 'dst'
44 */
45 const wchar_t *
usw_convert2w(const char * str,USTRINGW * dst)46 usw_convert2w(const char *str, USTRINGW *dst)
47 {
48 int len, max, i, conv;
49 const char *src;
50 mbstate_t mbstate;
51
52 /* try the easy way first */
53 len = mbstowcs(0,str,0);
54 if (len >= 0) {
55 usw_setsize(dst,len + 1);
56 mbstowcs(PUSTR(dst),str,len + 1);
57 return PUSTR(dst);
58 }
59
60 /* there was an error, make a char-by-char conversion with error recovery */
61 src = str;
62 max = usw_setsize(dst,strlen(src) + 1);
63 memset(&mbstate,0,sizeof(mbstate));
64 for (i = 0; /* until return */; i++) {
65 if (i == max)
66 max = usw_resize(dst,max + ALLOC_UNIT);
67 conv = mbsrtowcs(PUSTR(dst) + i,&src,1,&mbstate);
68 if (conv == -1) {
69 /* invalid sequence */
70 src++;
71 PUSTR(dst)[i] = lang_data.repl;
72 memset(&mbstate,0,sizeof(mbstate));
73 }
74 else if (src == 0)
75 return PUSTR(dst); /* conversion completed */
76 }
77
78 /* NOTREACHED */
79 return 0;
80 }
81
82 /* NOTE: the result is overwritten with each successive function call */
83 const wchar_t *
convert2w(const char * str)84 convert2w(const char *str)
85 {
86 static USTRINGW local = UNULL;
87
88 return usw_convert2w(str,&local);
89 }
90
91 /* wide to multibyte string conversion with error recovery */
92 const char *
us_convert2mb(const wchar_t * str,USTRING * dst)93 us_convert2mb(const wchar_t *str,USTRING *dst)
94 {
95 int len, max, i, conv;
96 const wchar_t *src;
97 mbstate_t mbstate;
98
99 /* try the easy way first */
100 len = wcstombs(0,str,0);
101 if (len >= 0) {
102 us_setsize(dst,len + 1);
103 wcstombs(PUSTR(dst),str,len + 1);
104 return PUSTR(dst);
105 }
106
107 /* there was an error, make a char-by-char conversion with error recovery */
108 src = str;
109 max = us_setsize(dst,wcslen(src) + 1);
110 memset(&mbstate,0,sizeof(mbstate));
111 for (i = 0; /* until return */; i++) {
112 if (i == max)
113 max = us_resize(dst,max + ALLOC_UNIT);
114 conv = wcsrtombs(PUSTR(dst) + i,&src,1,&mbstate);
115 if (conv == -1) {
116 /* invalid sequence */
117 src++;
118 PUSTR(dst)[i] = '?';
119 memset(&mbstate,0,sizeof(mbstate));
120 }
121 else if (src == 0)
122 return PUSTR(dst); /* conversion completed */
123 }
124
125 /* NOTREACHED */
126 return 0;
127 }
128
129 /* NOTE: the result is overwritten with each successive function call */
130 const char *
convert2mb(const wchar_t * str)131 convert2mb(const wchar_t *str)
132 {
133 static USTRING local = UNULL;
134
135 return us_convert2mb(str,&local);
136 }
137
138 /*
139 * CREDITS: the utf_iscomposing() code including the intable() function
140 * was taken with small modifications from the VIM text editor
141 * written by Bram Moolenaar (www.vim.org)
142 */
143
144 typedef struct {
145 unsigned int first, last;
146 } INTERVAL;
147
148 /* return 1 if 'c' is in 'table' */
149 static int
intable(INTERVAL * table,size_t size,int c)150 intable (INTERVAL *table, size_t size, int c)
151 {
152 int mid, bot, top;
153
154 /* first quick check */
155 if (c < table[0].first)
156 return 0;
157
158 /* binary search in table */
159 bot = 0;
160 top = size - 1;
161 while (top >= bot) {
162 mid = (bot + top) / 2;
163 if (table[mid].last < c)
164 bot = mid + 1;
165 else if (table[mid].first > c)
166 top = mid - 1;
167 else
168 return 1;
169 }
170 return 0;
171 }
172
173 /*
174 * Return 1 if "ch" is a composing UTF-8 character. This means it will be
175 * drawn on top of the preceding character.
176 * Based on code from Markus Kuhn.
177 */
178 int
utf_iscomposing(wchar_t ch)179 utf_iscomposing(wchar_t ch)
180 {
181 /* sorted list of non-overlapping intervals */
182 static INTERVAL combining[] =
183 {
184 {0x0300, 0x034f}, {0x0360, 0x036f}, {0x0483, 0x0486}, {0x0488, 0x0489},
185 {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05bd}, {0x05bf, 0x05bf},
186 {0x05c1, 0x05c2}, {0x05c4, 0x05c4}, {0x0610, 0x0615}, {0x064b, 0x0658},
187 {0x0670, 0x0670}, {0x06d6, 0x06dc}, {0x06de, 0x06e4}, {0x06e7, 0x06e8},
188 {0x06ea, 0x06ed}, {0x0711, 0x0711}, {0x0730, 0x074a}, {0x07a6, 0x07b0},
189 {0x0901, 0x0903}, {0x093c, 0x093c}, {0x093e, 0x094d}, {0x0951, 0x0954},
190 {0x0962, 0x0963}, {0x0981, 0x0983}, {0x09bc, 0x09bc}, {0x09be, 0x09c4},
191 {0x09c7, 0x09c8}, {0x09cb, 0x09cd}, {0x09d7, 0x09d7}, {0x09e2, 0x09e3},
192 {0x0a01, 0x0a03}, {0x0a3c, 0x0a3c}, {0x0a3e, 0x0a42}, {0x0a47, 0x0a48},
193 {0x0a4b, 0x0a4d}, {0x0a70, 0x0a71}, {0x0a81, 0x0a83}, {0x0abc, 0x0abc},
194 {0x0abe, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, {0x0ae2, 0x0ae3},
195 {0x0b01, 0x0b03}, {0x0b3c, 0x0b3c}, {0x0b3e, 0x0b43}, {0x0b47, 0x0b48},
196 {0x0b4b, 0x0b4d}, {0x0b56, 0x0b57}, {0x0b82, 0x0b82}, {0x0bbe, 0x0bc2},
197 {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, {0x0bd7, 0x0bd7}, {0x0c01, 0x0c03},
198 {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c55, 0x0c56},
199 {0x0c82, 0x0c83}, {0x0cbc, 0x0cbc}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8},
200 {0x0cca, 0x0ccd}, {0x0cd5, 0x0cd6}, {0x0d02, 0x0d03}, {0x0d3e, 0x0d43},
201 {0x0d46, 0x0d48}, {0x0d4a, 0x0d4d}, {0x0d57, 0x0d57}, {0x0d82, 0x0d83},
202 {0x0dca, 0x0dca}, {0x0dcf, 0x0dd4}, {0x0dd6, 0x0dd6}, {0x0dd8, 0x0ddf},
203 {0x0df2, 0x0df3}, {0x0e31, 0x0e31}, {0x0e34, 0x0e3a}, {0x0e47, 0x0e4e},
204 {0x0eb1, 0x0eb1}, {0x0eb4, 0x0eb9}, {0x0ebb, 0x0ebc}, {0x0ec8, 0x0ecd},
205 {0x0f18, 0x0f19}, {0x0f35, 0x0f35}, {0x0f37, 0x0f37}, {0x0f39, 0x0f39},
206 {0x0f3e, 0x0f3f}, {0x0f71, 0x0f84}, {0x0f86, 0x0f87}, {0x0f90, 0x0f97},
207 {0x0f99, 0x0fbc}, {0x0fc6, 0x0fc6}, {0x102c, 0x1032}, {0x1036, 0x1039},
208 {0x1056, 0x1059}, {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753},
209 {0x1772, 0x1773}, {0x17b6, 0x17d3}, {0x17dd, 0x17dd}, {0x180b, 0x180d},
210 {0x18a9, 0x18a9}, {0x1920, 0x192b}, {0x1930, 0x193b}, {0x20d0, 0x20ea},
211 {0x302a, 0x302f}, {0x3099, 0x309a}, {0xfb1e, 0xfb1e}, {0xfe00, 0xfe0f},
212 {0xfe20, 0xfe23},
213 };
214
215 return lang_data.utf8 && intable(combining,ARRAY_SIZE(combining),(int)ch);
216 }
217