1 /* mbutil.c -- readline multibyte character utility functions */
2 
3 /* Copyright (C) 2001-2005 Free Software Foundation, Inc.
4 
5    This file is part of the GNU Readline Library, a library for
6    reading lines of text with interactive input and history editing.
7 
8    The GNU Readline Library is free software; you can redistribute it
9    and/or modify it under the terms of the GNU General Public License
10    as published by the Free Software Foundation; either version 2, or
11    (at your option) any later version.
12 
13    The GNU Readline Library is distributed in the hope that it will be
14    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    The GNU General Public License is often shipped with GNU software, and
19    is generally kept in a file called COPYING or LICENSE.  If you do not
20    have a copy of the license, write to the Free Software Foundation,
21    51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA. */
22 #define READLINE_LIBRARY
23 
24 #if defined (HAVE_CONFIG_H)
25 #  include "config_readline.h"
26 #endif
27 
28 #include <sys/types.h>
29 #include <fcntl.h>
30 #include "posixjmp.h"
31 
32 #if defined (HAVE_UNISTD_H)
33 #  include <unistd.h>	   /* for _POSIX_VERSION */
34 #endif /* HAVE_UNISTD_H */
35 
36 #if defined (HAVE_STDLIB_H)
37 #  include <stdlib.h>
38 #else
39 #  include "ansi_stdlib.h"
40 #endif /* HAVE_STDLIB_H */
41 
42 #include <stdio.h>
43 #include <ctype.h>
44 
45 /* System-specific feature definitions and include files. */
46 #include "rldefs.h"
47 #include "rlmbutil.h"
48 
49 #if defined (TIOCSTAT_IN_SYS_IOCTL)
50 #  include <sys/ioctl.h>
51 #endif /* TIOCSTAT_IN_SYS_IOCTL */
52 
53 /* Some standard library routines. */
54 #include "readline.h"
55 
56 #include "rlprivate.h"
57 #include "xmalloc.h"
58 
59 /* Declared here so it can be shared between the readline and history
60    libraries. */
61 #if defined (HANDLE_MULTIBYTE)
62 int rl_byte_oriented = 0;
63 #else
64 int rl_byte_oriented = 1;
65 #endif
66 
67 /* **************************************************************** */
68 /*								    */
69 /*		Multibyte Character Utility Functions		    */
70 /*								    */
71 /* **************************************************************** */
72 
73 #if defined(HANDLE_MULTIBYTE)
74 
75 static int
_rl_find_next_mbchar_internal(string,seed,count,find_non_zero)76 _rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77      char *string;
78      int seed, count, find_non_zero;
79 {
80   size_t tmp;
81   mbstate_t ps;
82   int point;
83   wchar_t wc;
84 
85   tmp = 0;
86 
87   memset(&ps, 0, sizeof (mbstate_t));
88   if (seed < 0)
89     seed = 0;
90   if (count <= 0)
91     return seed;
92 
93   point = seed + _rl_adjust_point (string, seed, &ps);
94   /* if this is true, means that seed was not pointed character
95      started byte.  So correct the point and consume count */
96   if (seed < point)
97     count--;
98 
99   while (count > 0)
100     {
101       tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
102       if (MB_INVALIDCH ((size_t)tmp))
103 	{
104 	  /* invalid bytes. asume a byte represents a character */
105 	  point++;
106 	  count--;
107 	  /* reset states. */
108 	  memset(&ps, 0, sizeof(mbstate_t));
109 	}
110       else if (MB_NULLWCH (tmp))
111 	break;			/* found wide '\0' */
112       else
113 	{
114 	  /* valid bytes */
115 	  point += tmp;
116 	  if (find_non_zero)
117 	    {
118 	      if (wcwidth (wc) == 0)
119 		continue;
120 	      else
121 		count--;
122 	    }
123 	  else
124 	    count--;
125 	}
126     }
127 
128   if (find_non_zero)
129     {
130       tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
131       while (tmp > 0 && wcwidth (wc) == 0)
132 	{
133 	  point += tmp;
134 	  tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
135 	  if (MB_NULLWCH (tmp) || MB_INVALIDCH (tmp))
136 	    break;
137 	}
138     }
139 
140   return point;
141 }
142 
143 static int
_rl_find_prev_mbchar_internal(string,seed,find_non_zero)144 _rl_find_prev_mbchar_internal (string, seed, find_non_zero)
145      char *string;
146      int seed, find_non_zero;
147 {
148   mbstate_t ps;
149   int prev, non_zero_prev, point, length;
150   size_t tmp;
151   wchar_t wc;
152 
153   memset(&ps, 0, sizeof(mbstate_t));
154   length = strlen(string);
155 
156   if (seed < 0)
157     return 0;
158   else if (length < seed)
159     return length;
160 
161   prev = non_zero_prev = point = 0;
162   while (point < seed)
163     {
164       tmp = mbrtowc (&wc, string + point, length - point, &ps);
165       if (MB_INVALIDCH ((size_t)tmp))
166 	{
167 	  /* in this case, bytes are invalid or shorted to compose
168 	     multibyte char, so assume that the first byte represents
169 	     a single character anyway. */
170 	  tmp = 1;
171 	  /* clear the state of the byte sequence, because
172 	     in this case effect of mbstate is undefined  */
173 	  memset(&ps, 0, sizeof (mbstate_t));
174 
175 	  /* Since we're assuming that this byte represents a single
176 	     non-zero-width character, don't forget about it. */
177 	  prev = point;
178 	}
179       else if (MB_NULLWCH (tmp))
180 	break;			/* Found '\0' char.  Can this happen? */
181       else
182 	{
183 	  if (find_non_zero)
184 	    {
185 	      if (wcwidth (wc) != 0)
186 		prev = point;
187 	    }
188 	  else
189 	    prev = point;
190 	}
191 
192       point += tmp;
193     }
194 
195   return prev;
196 }
197 
198 /* return the number of bytes parsed from the multibyte sequence starting
199    at src, if a non-L'\0' wide character was recognized. It returns 0,
200    if a L'\0' wide character was recognized. It  returns (size_t)(-1),
201    if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
202    if it couldn't parse a complete  multibyte character.  */
203 int
_rl_get_char_len(src,ps)204 _rl_get_char_len (src, ps)
205      char *src;
206      mbstate_t *ps;
207 {
208   size_t tmp;
209 
210   tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
211   if (tmp == (size_t)(-2))
212     {
213       /* shorted to compose multibyte char */
214       if (ps)
215 	memset (ps, 0, sizeof(mbstate_t));
216       return -2;
217     }
218   else if (tmp == (size_t)(-1))
219     {
220       /* invalid to compose multibyte char */
221       /* initialize the conversion state */
222       if (ps)
223 	memset (ps, 0, sizeof(mbstate_t));
224       return -1;
225     }
226   else if (tmp == (size_t)0)
227     return 0;
228   else
229     return (int)tmp;
230 }
231 
232 /* compare the specified two characters. If the characters matched,
233    return 1. Otherwise return 0. */
234 int
_rl_compare_chars(buf1,pos1,ps1,buf2,pos2,ps2)235 _rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
236      char *buf1;
237      int pos1;
238      mbstate_t *ps1;
239      char *buf2;
240      int pos2;
241      mbstate_t *ps2;
242 {
243   int i, w1, w2;
244 
245   if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
246 	(w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
247 	(w1 != w2) ||
248 	(buf1[pos1] != buf2[pos2]))
249     return 0;
250 
251   for (i = 1; i < w1; i++)
252     if (buf1[pos1+i] != buf2[pos2+i])
253       return 0;
254 
255   return 1;
256 }
257 
258 /* adjust pointed byte and find mbstate of the point of string.
259    adjusted point will be point <= adjusted_point, and returns
260    differences of the byte(adjusted_point - point).
261    if point is invalied (point < 0 || more than string length),
262    it returns -1 */
263 int
_rl_adjust_point(string,point,ps)264 _rl_adjust_point(string, point, ps)
265      char *string;
266      int point;
267      mbstate_t *ps;
268 {
269   size_t tmp = 0;
270   int length;
271   int pos = 0;
272 
273   length = strlen(string);
274   if (point < 0)
275     return -1;
276   if (length < point)
277     return -1;
278 
279   while (pos < point)
280     {
281       tmp = mbrlen (string + pos, length - pos, ps);
282       if (MB_INVALIDCH ((size_t)tmp))
283 	{
284 	  /* in this case, bytes are invalid or shorted to compose
285 	     multibyte char, so assume that the first byte represents
286 	     a single character anyway. */
287 	  pos++;
288 	  /* clear the state of the byte sequence, because
289 	     in this case effect of mbstate is undefined  */
290 	  if (ps)
291 	    memset (ps, 0, sizeof (mbstate_t));
292 	}
293       else if (MB_NULLWCH (tmp))
294 	pos++;
295       else
296 	pos += tmp;
297     }
298 
299   return (pos - point);
300 }
301 
302 int
_rl_is_mbchar_matched(string,seed,end,mbchar,length)303 _rl_is_mbchar_matched (string, seed, end, mbchar, length)
304      char *string;
305      int seed, end;
306      char *mbchar;
307      int length;
308 {
309   int i;
310 
311   if ((end - seed) < length)
312     return 0;
313 
314   for (i = 0; i < length; i++)
315     if (string[seed + i] != mbchar[i])
316       return 0;
317   return 1;
318 }
319 
320 wchar_t
_rl_char_value(buf,ind)321 _rl_char_value (buf, ind)
322      char *buf;
323      int ind;
324 {
325   size_t tmp;
326   wchar_t wc;
327   mbstate_t ps;
328   int l;
329 
330   if (MB_LEN_MAX == 1 || rl_byte_oriented)
331     return ((wchar_t) buf[ind]);
332   l = strlen (buf);
333   if (ind >= l - 1)
334     return ((wchar_t) buf[ind]);
335   memset (&ps, 0, sizeof (mbstate_t));
336   tmp = mbrtowc (&wc, buf + ind, l - ind, &ps);
337   if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
338     return ((wchar_t) buf[ind]);
339   return wc;
340 }
341 #endif /* HANDLE_MULTIBYTE */
342 
343 /* Find next `count' characters started byte point of the specified seed.
344    If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
345    characters. */
346 #undef _rl_find_next_mbchar
347 int
_rl_find_next_mbchar(string,seed,count,flags)348 _rl_find_next_mbchar (string, seed, count, flags)
349      char *string __attribute__((unused));
350      int seed, count, flags __attribute__((unused));
351 {
352 #if defined (HANDLE_MULTIBYTE)
353   return _rl_find_next_mbchar_internal (string, seed, count, flags);
354 #else
355   return (seed + count);
356 #endif
357 }
358 
359 /* Find previous character started byte point of the specified seed.
360    Returned point will be point <= seed.  If flags is MB_FIND_NONZERO,
361    we look for non-zero-width multibyte characters. */
362 #undef _rl_find_prev_mbchar
363 int
_rl_find_prev_mbchar(string,seed,flags)364 _rl_find_prev_mbchar (string, seed, flags)
365      char *string __attribute__((unused));
366      int seed, flags __attribute__((unused));
367 {
368 #if defined (HANDLE_MULTIBYTE)
369   return _rl_find_prev_mbchar_internal (string, seed, flags);
370 #else
371   return ((seed == 0) ? seed : seed - 1);
372 #endif
373 }
374