1 /*
2  *
3  * CLEX File Manager
4  *
5  * Copyright (C) 2001-2018 Vlado Potisk <vlado_potisk@clex.sk>
6  *
7  * CLEX is free software without warranty of any kind; see the
8  * GNU General Public License as set out in the "COPYING" document
9  * which accompanies the CLEX File Manager package.
10  *
11  * CLEX can be downloaded from http://www.clex.sk
12  *
13  */
14 
15 /* multibyte and wide string functions */
16 
17 #include "clexheaders.h"
18 
19 #include <stdlib.h>		/* mbstowcs() */
20 #include <string.h>		/* strlen() */
21 #include <wctype.h>		/* iswprint() in WCW macro */
22 
23 #include "mbwstring.h"
24 
25 /* wc_cols() returns width (in display columns) of a substring */
26 int
wc_cols(const wchar_t * str,int from,int to)27 wc_cols(const wchar_t *str, int from, int to /* negative = till end */)
28 {
29 	int i, cols;
30 	wchar_t ch;
31 
32 	for (cols = 0, i = from; to < 0 || i < to; i++) {
33 		if ((ch = str[i]) == L'\0')
34 			break;
35 		cols += WCW(ch);
36 	}
37 	return cols;
38 }
39 
40 /*
41  * multibyte to wide string conversion with error recovery,
42  * the result is returned as exit value and also stored in
43  * the USTRINGW structure 'dst'
44 */
45 const wchar_t *
usw_convert2w(const char * str,USTRINGW * dst)46 usw_convert2w(const char *str, USTRINGW *dst)
47 {
48 	int len, max, i, conv;
49 	const char *src;
50 	mbstate_t mbstate;
51 
52 	/* try the easy way first */
53 	len = mbstowcs(0,str,0);
54 	if (len >= 0) {
55 		usw_setsize(dst,len + 1);
56 		mbstowcs(PUSTR(dst),str,len + 1);
57 		return PUSTR(dst);
58 	}
59 
60 	/* there was an error, make a char-by-char conversion with error recovery */
61 	src = str;
62 	max = usw_setsize(dst,strlen(src) + 1);
63 	memset(&mbstate,0,sizeof(mbstate));
64 	for (i = 0; /* until return */; i++) {
65 		if (i == max)
66 			max = usw_resize(dst,max + ALLOC_UNIT);
67 		conv = mbsrtowcs(PUSTR(dst) + i,&src,1,&mbstate);
68 		if (conv == -1) {
69 			/* invalid sequence */
70 			src++;
71 			PUSTR(dst)[i] = lang_data.repl;
72 			memset(&mbstate,0,sizeof(mbstate));
73 		}
74 		else if (src == 0)
75 			return PUSTR(dst);		/* conversion completed */
76 	}
77 
78 	/* NOTREACHED */
79 	return 0;
80 }
81 
82 /* NOTE: the result is overwritten with each successive function call */
83 const wchar_t *
convert2w(const char * str)84 convert2w(const char *str)
85 {
86 	static USTRINGW local = UNULL;
87 
88 	return usw_convert2w(str,&local);
89 }
90 
91 /* wide to multibyte string conversion with error recovery */
92 const char *
us_convert2mb(const wchar_t * str,USTRING * dst)93 us_convert2mb(const wchar_t *str,USTRING *dst)
94 {
95 	int len, max, i, conv;
96 	const wchar_t *src;
97 	mbstate_t mbstate;
98 
99 	/* try the easy way first */
100 	len = wcstombs(0,str,0);
101 	if (len >= 0) {
102 		us_setsize(dst,len + 1);
103 		wcstombs(PUSTR(dst),str,len + 1);
104 		return PUSTR(dst);
105 	}
106 
107 	/* there was an error, make a char-by-char conversion with error recovery */
108 	src = str;
109 	max = us_setsize(dst,wcslen(src) + 1);
110 	memset(&mbstate,0,sizeof(mbstate));
111 	for (i = 0; /* until return */; i++) {
112 		if (i == max)
113 			max = us_resize(dst,max + ALLOC_UNIT);
114 		conv = wcsrtombs(PUSTR(dst) + i,&src,1,&mbstate);
115 		if (conv == -1) {
116 			/* invalid sequence */
117 			src++;
118 			PUSTR(dst)[i] = '?';
119 			memset(&mbstate,0,sizeof(mbstate));
120 		}
121 		else if (src == 0)
122 			return PUSTR(dst);		/* conversion completed */
123 	}
124 
125 	/* NOTREACHED */
126 	return 0;
127 }
128 
129 /* NOTE: the result is overwritten with each successive function call */
130 const char *
convert2mb(const wchar_t * str)131 convert2mb(const wchar_t *str)
132 {
133 	static USTRING local = UNULL;
134 
135 	return us_convert2mb(str,&local);
136 }
137 
138 /*
139  * CREDITS: the utf_iscomposing() code including the intable() function
140  * was taken with small modifications from the VIM text editor
141  * written by Bram Moolenaar (www.vim.org)
142  */
143 
144 typedef struct {
145     unsigned int first, last;
146 } INTERVAL;
147 
148 /* return 1 if 'c' is in 'table' */
149 static int
intable(INTERVAL * table,size_t size,int c)150 intable (INTERVAL *table, size_t size, int c)
151 {
152     int mid, bot, top;
153 
154     /* first quick check */
155     if (c < table[0].first)
156 		return 0;
157 
158     /* binary search in table */
159     bot = 0;
160     top = size - 1;
161     while (top >= bot) {
162 		mid = (bot + top) / 2;
163 		if (table[mid].last < c)
164 			bot = mid + 1;
165 		else if (table[mid].first > c)
166 			top = mid - 1;
167 		else
168 			return 1;
169     }
170     return 0;
171 }
172 
173 /*
174  * Return 1 if "ch" is a composing UTF-8 character. This means it will be
175  * drawn on top of the preceding character.
176  * Based on code from Markus Kuhn.
177  */
178 int
utf_iscomposing(wchar_t ch)179 utf_iscomposing(wchar_t ch)
180 {
181     /* sorted list of non-overlapping intervals */
182     static INTERVAL combining[] =
183     {
184 	{0x0300, 0x034f}, {0x0360, 0x036f}, {0x0483, 0x0486}, {0x0488, 0x0489},
185 	{0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05bd}, {0x05bf, 0x05bf},
186 	{0x05c1, 0x05c2}, {0x05c4, 0x05c4}, {0x0610, 0x0615}, {0x064b, 0x0658},
187 	{0x0670, 0x0670}, {0x06d6, 0x06dc}, {0x06de, 0x06e4}, {0x06e7, 0x06e8},
188 	{0x06ea, 0x06ed}, {0x0711, 0x0711}, {0x0730, 0x074a}, {0x07a6, 0x07b0},
189 	{0x0901, 0x0903}, {0x093c, 0x093c}, {0x093e, 0x094d}, {0x0951, 0x0954},
190 	{0x0962, 0x0963}, {0x0981, 0x0983}, {0x09bc, 0x09bc}, {0x09be, 0x09c4},
191 	{0x09c7, 0x09c8}, {0x09cb, 0x09cd}, {0x09d7, 0x09d7}, {0x09e2, 0x09e3},
192 	{0x0a01, 0x0a03}, {0x0a3c, 0x0a3c}, {0x0a3e, 0x0a42}, {0x0a47, 0x0a48},
193 	{0x0a4b, 0x0a4d}, {0x0a70, 0x0a71}, {0x0a81, 0x0a83}, {0x0abc, 0x0abc},
194 	{0x0abe, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, {0x0ae2, 0x0ae3},
195 	{0x0b01, 0x0b03}, {0x0b3c, 0x0b3c}, {0x0b3e, 0x0b43}, {0x0b47, 0x0b48},
196 	{0x0b4b, 0x0b4d}, {0x0b56, 0x0b57}, {0x0b82, 0x0b82}, {0x0bbe, 0x0bc2},
197 	{0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, {0x0bd7, 0x0bd7}, {0x0c01, 0x0c03},
198 	{0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c55, 0x0c56},
199 	{0x0c82, 0x0c83}, {0x0cbc, 0x0cbc}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8},
200 	{0x0cca, 0x0ccd}, {0x0cd5, 0x0cd6}, {0x0d02, 0x0d03}, {0x0d3e, 0x0d43},
201 	{0x0d46, 0x0d48}, {0x0d4a, 0x0d4d}, {0x0d57, 0x0d57}, {0x0d82, 0x0d83},
202 	{0x0dca, 0x0dca}, {0x0dcf, 0x0dd4}, {0x0dd6, 0x0dd6}, {0x0dd8, 0x0ddf},
203 	{0x0df2, 0x0df3}, {0x0e31, 0x0e31}, {0x0e34, 0x0e3a}, {0x0e47, 0x0e4e},
204 	{0x0eb1, 0x0eb1}, {0x0eb4, 0x0eb9}, {0x0ebb, 0x0ebc}, {0x0ec8, 0x0ecd},
205 	{0x0f18, 0x0f19}, {0x0f35, 0x0f35}, {0x0f37, 0x0f37}, {0x0f39, 0x0f39},
206 	{0x0f3e, 0x0f3f}, {0x0f71, 0x0f84}, {0x0f86, 0x0f87}, {0x0f90, 0x0f97},
207 	{0x0f99, 0x0fbc}, {0x0fc6, 0x0fc6}, {0x102c, 0x1032}, {0x1036, 0x1039},
208 	{0x1056, 0x1059}, {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753},
209 	{0x1772, 0x1773}, {0x17b6, 0x17d3}, {0x17dd, 0x17dd}, {0x180b, 0x180d},
210 	{0x18a9, 0x18a9}, {0x1920, 0x192b}, {0x1930, 0x193b}, {0x20d0, 0x20ea},
211 	{0x302a, 0x302f}, {0x3099, 0x309a}, {0xfb1e, 0xfb1e}, {0xfe00, 0xfe0f},
212 	{0xfe20, 0xfe23},
213     };
214 
215     return lang_data.utf8 && intable(combining,ARRAY_SIZE(combining),(int)ch);
216 }
217