1 /* mbutil.c -- readline multibyte character utility functions */
2 
3 /* Copyright (C) 2001 Free Software Foundation, Inc.
4 
5    This file is part of the GNU Readline Library, a library for
6    reading lines of text with interactive input and history editing.
7 
8    The GNU Readline Library is free software; you can redistribute it
9    and/or modify it under the terms of the GNU General Public License
10    as published by the Free Software Foundation; either version 2, or
11    (at your option) any later version.
12 
13    The GNU Readline Library is distributed in the hope that it will be
14    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    The GNU General Public License is often shipped with GNU software, and
19    is generally kept in a file called COPYING or LICENSE.  If you do not
20    have a copy of the license, write to the Free Software Foundation,
21    59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22 #define READLINE_LIBRARY
23 
24 #if defined (HAVE_CONFIG_H)
25 #  include <config.h>
26 #endif
27 
28 #include <sys/types.h>
29 #include <fcntl.h>
30 #include "posixjmp.h"
31 
32 #if defined (HAVE_UNISTD_H)
33 #  include <unistd.h>	   /* for _POSIX_VERSION */
34 #endif /* HAVE_UNISTD_H */
35 
36 #if defined (HAVE_STDLIB_H)
37 #  include <stdlib.h>
38 #else
39 #  include "ansi_stdlib.h"
40 #endif /* HAVE_STDLIB_H */
41 
42 #include <stdio.h>
43 #include <ctype.h>
44 
45 /* System-specific feature definitions and include files. */
46 #include "rldefs.h"
47 #include "rlmbutil.h"
48 
49 #if defined (TIOCSTAT_IN_SYS_IOCTL)
50 #  include <sys/ioctl.h>
51 #endif /* TIOCSTAT_IN_SYS_IOCTL */
52 
53 /* Some standard library routines. */
54 #include "readline.h"
55 
56 #include "rlprivate.h"
57 #include "xmalloc.h"
58 
59 /* Declared here so it can be shared between the readline and history
60    libraries. */
61 #if defined (HANDLE_MULTIBYTE)
62 int rl_byte_oriented = 0;
63 #else
64 int rl_byte_oriented = 1;
65 #endif
66 
67 /* **************************************************************** */
68 /*								    */
69 /*		Multibyte Character Utility Functions		    */
70 /*								    */
71 /* **************************************************************** */
72 
73 #if defined(HANDLE_MULTIBYTE)
74 
75 static int
_rl_find_next_mbchar_internal(string,seed,count,find_non_zero)76 _rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77      char *string;
78      int seed, count, find_non_zero;
79 {
80   size_t tmp = 0;
81   mbstate_t ps;
82   int point = 0;
83   wchar_t wc;
84 
85   memset(&ps, 0, sizeof (mbstate_t));
86   if (seed < 0)
87     seed = 0;
88   if (count <= 0)
89     return seed;
90 
91   point = seed + _rl_adjust_point(string, seed, &ps);
92   /* if this is true, means that seed was not pointed character
93      started byte.  So correct the point and consume count */
94   if (seed < point)
95     count --;
96 
97   while (count > 0)
98     {
99       tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
100       if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
101 	{
102 	  /* invalid bytes. asume a byte represents a character */
103 	  point++;
104 	  count--;
105 	  /* reset states. */
106 	  memset(&ps, 0, sizeof(mbstate_t));
107 	}
108       else if (tmp == (size_t)0)
109 	/* found '\0' char */
110 	break;
111       else
112 	{
113 	  /* valid bytes */
114 	  point += tmp;
115 	  if (find_non_zero)
116 	    {
117 	      if (wcwidth (wc) == 0)
118 		continue;
119 	      else
120 		count--;
121 	    }
122 	  else
123 	    count--;
124 	}
125     }
126 
127   if (find_non_zero)
128     {
129       tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
130       while (wcwidth (wc) == 0)
131 	{
132 	  point += tmp;
133 	  tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
134 	  if (tmp == (size_t)(0) || tmp == (size_t)(-1) || tmp == (size_t)(-2))
135 	    break;
136 	}
137     }
138     return point;
139 }
140 
141 static int
_rl_find_prev_mbchar_internal(string,seed,find_non_zero)142 _rl_find_prev_mbchar_internal (string, seed, find_non_zero)
143      char *string;
144      int seed, find_non_zero;
145 {
146   mbstate_t ps;
147   int prev, non_zero_prev, point, length;
148   size_t tmp;
149   wchar_t wc;
150 
151   memset(&ps, 0, sizeof(mbstate_t));
152   length = strlen(string);
153 
154   if (seed < 0)
155     return 0;
156   else if (length < seed)
157     return length;
158 
159   prev = non_zero_prev = point = 0;
160   while (point < seed)
161     {
162       tmp = mbrtowc (&wc, string + point, length - point, &ps);
163       if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
164 	{
165 	  /* in this case, bytes are invalid or shorted to compose
166 	     multibyte char, so assume that the first byte represents
167 	     a single character anyway. */
168 	  tmp = 1;
169 	  /* clear the state of the byte sequence, because
170 	     in this case effect of mbstate is undefined  */
171 	  memset(&ps, 0, sizeof (mbstate_t));
172 	}
173       else if (tmp == 0)
174 	break;			/* Found '\0' char.  Can this happen? */
175       else
176 	{
177 	  if (find_non_zero)
178 	    {
179 	      if (wcwidth (wc) != 0)
180 		prev = point;
181 	    }
182 	  else
183 	    prev = point;
184 	}
185 
186       point += tmp;
187     }
188 
189   return prev;
190 }
191 
192 /* return the number of bytes parsed from the multibyte sequence starting
193    at src, if a non-L'\0' wide character was recognized. It returns 0,
194    if a L'\0' wide character was recognized. It  returns (size_t)(-1),
195    if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
196    if it couldn't parse a complete  multibyte character.  */
197 int
_rl_get_char_len(src,ps)198 _rl_get_char_len (src, ps)
199      char *src;
200      mbstate_t *ps;
201 {
202   size_t tmp;
203 
204   tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
205   if (tmp == (size_t)(-2))
206     {
207       /* shorted to compose multibyte char */
208       memset (ps, 0, sizeof(mbstate_t));
209       return -2;
210     }
211   else if (tmp == (size_t)(-1))
212     {
213       /* invalid to compose multibyte char */
214       /* initialize the conversion state */
215       memset (ps, 0, sizeof(mbstate_t));
216       return -1;
217     }
218   else if (tmp == (size_t)0)
219     return 0;
220   else
221     return (int)tmp;
222 }
223 
224 /* compare the specified two characters. If the characters matched,
225    return 1. Otherwise return 0. */
226 int
_rl_compare_chars(buf1,pos1,ps1,buf2,pos2,ps2)227 _rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
228      char *buf1, *buf2;
229      mbstate_t *ps1, *ps2;
230      int pos1, pos2;
231 {
232   int i, w1, w2;
233 
234   if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
235 	(w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
236 	(w1 != w2) ||
237 	(buf1[pos1] != buf2[pos2]))
238     return 0;
239 
240   for (i = 1; i < w1; i++)
241     if (buf1[pos1+i] != buf2[pos2+i])
242       return 0;
243 
244   return 1;
245 }
246 
247 /* adjust pointed byte and find mbstate of the point of string.
248    adjusted point will be point <= adjusted_point, and returns
249    differences of the byte(adjusted_point - point).
250    if point is invalied (point < 0 || more than string length),
251    it returns -1 */
252 int
_rl_adjust_point(string,point,ps)253 _rl_adjust_point(string, point, ps)
254      char *string;
255      int point;
256      mbstate_t *ps;
257 {
258   size_t tmp = 0;
259   int length;
260   int pos = 0;
261 
262   length = strlen(string);
263   if (point < 0)
264     return -1;
265   if (length < point)
266     return -1;
267 
268   while (pos < point)
269     {
270       tmp = mbrlen (string + pos, length - pos, ps);
271       if((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
272 	{
273 	  /* in this case, bytes are invalid or shorted to compose
274 	     multibyte char, so assume that the first byte represents
275 	     a single character anyway. */
276 	  pos++;
277 	  /* clear the state of the byte sequence, because
278 	     in this case effect of mbstate is undefined  */
279 	  memset (ps, 0, sizeof (mbstate_t));
280 	}
281       else
282 	pos += tmp;
283     }
284 
285   return (pos - point);
286 }
287 
288 int
_rl_is_mbchar_matched(string,seed,end,mbchar,length)289 _rl_is_mbchar_matched (string, seed, end, mbchar, length)
290      char *string;
291      int seed, end;
292      char *mbchar;
293      int length;
294 {
295   int i;
296 
297   if ((end - seed) < length)
298     return 0;
299 
300   for (i = 0; i < length; i++)
301     if (string[seed + i] != mbchar[i])
302       return 0;
303   return 1;
304 }
305 #endif /* HANDLE_MULTIBYTE */
306 
307 /* Find next `count' characters started byte point of the specified seed.
308    If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
309    characters. */
310 #undef _rl_find_next_mbchar
311 int
_rl_find_next_mbchar(string,seed,count,flags)312 _rl_find_next_mbchar (string, seed, count, flags)
313      char *string;
314      int seed, count, flags;
315 {
316 #if defined (HANDLE_MULTIBYTE)
317   return _rl_find_next_mbchar_internal (string, seed, count, flags);
318 #else
319   return (seed + count);
320 #endif
321 }
322 
323 /* Find previous character started byte point of the specified seed.
324    Returned point will be point <= seed.  If flags is MB_FIND_NONZERO,
325    we look for non-zero-width multibyte characters. */
326 #undef _rl_find_prev_mbchar
327 int
_rl_find_prev_mbchar(string,seed,flags)328 _rl_find_prev_mbchar (string, seed, flags)
329      char *string;
330      int seed, flags;
331 {
332 #if defined (HANDLE_MULTIBYTE)
333   return _rl_find_prev_mbchar_internal (string, seed, flags);
334 #else
335   return ((seed == 0) ? seed : seed - 1);
336 #endif
337 }
338