xref: /openbsd/gnu/lib/libreadline/mbutil.c (revision 9704b281)
1 /* mbutil.c -- readline multibyte character utility functions */
2 
3 /* Copyright (C) 2001 Free Software Foundation, Inc.
4 
5    This file is part of the GNU Readline Library, a library for
6    reading lines of text with interactive input and history editing.
7 
8    The GNU Readline Library is free software; you can redistribute it
9    and/or modify it under the terms of the GNU General Public License
10    as published by the Free Software Foundation; either version 2, or
11    (at your option) any later version.
12 
13    The GNU Readline Library is distributed in the hope that it will be
14    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    The GNU General Public License is often shipped with GNU software, and
19    is generally kept in a file called COPYING or LICENSE.  If you do not
20    have a copy of the license, write to the Free Software Foundation,
21    59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22 #define READLINE_LIBRARY
23 
24 #if defined (HAVE_CONFIG_H)
25 #  include <config.h>
26 #endif
27 
28 #include <sys/types.h>
29 #include <fcntl.h>
30 #include "posixjmp.h"
31 
32 #if defined (HAVE_UNISTD_H)
33 #  include <unistd.h>	   /* for _POSIX_VERSION */
34 #endif /* HAVE_UNISTD_H */
35 
36 #if defined (HAVE_STDLIB_H)
37 #  include <stdlib.h>
38 #else
39 #  include "ansi_stdlib.h"
40 #endif /* HAVE_STDLIB_H */
41 
42 #include <stdio.h>
43 #include <ctype.h>
44 
45 /* System-specific feature definitions and include files. */
46 #include "rldefs.h"
47 #include "rlmbutil.h"
48 
49 #if defined (TIOCSTAT_IN_SYS_IOCTL)
50 #  include <sys/ioctl.h>
51 #endif /* TIOCSTAT_IN_SYS_IOCTL */
52 
53 /* Some standard library routines. */
54 #include "readline.h"
55 
56 #include "rlprivate.h"
57 #include "xmalloc.h"
58 
59 /* Declared here so it can be shared between the readline and history
60    libraries. */
61 #if defined (HANDLE_MULTIBYTE)
62 int rl_byte_oriented = 0;
63 #else
64 int rl_byte_oriented = 1;
65 #endif
66 
67 /* **************************************************************** */
68 /*								    */
69 /*		Multibyte Character Utility Functions		    */
70 /*								    */
71 /* **************************************************************** */
72 
73 #if defined(HANDLE_MULTIBYTE)
74 
75 static int
_rl_find_next_mbchar_internal(string,seed,count,find_non_zero)76 _rl_find_next_mbchar_internal (string, seed, count, find_non_zero)
77      char *string;
78      int seed, count, find_non_zero;
79 {
80   size_t tmp = 0;
81   mbstate_t ps;
82   int point = 0;
83   wchar_t wc;
84 
85   memset(&ps, 0, sizeof (mbstate_t));
86   if (seed < 0)
87     seed = 0;
88   if (count <= 0)
89     return seed;
90 
91   point = seed + _rl_adjust_point(string, seed, &ps);
92   /* if this is true, means that seed was not pointed character
93      started byte.  So correct the point and consume count */
94   if (seed < point)
95     count --;
96 
97   while (count > 0)
98     {
99       tmp = mbrtowc (&wc, string+point, strlen(string + point), &ps);
100       if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
101 	{
102 	  /* invalid bytes. asume a byte represents a character */
103 	  point++;
104 	  count--;
105 	  /* reset states. */
106 	  memset(&ps, 0, sizeof(mbstate_t));
107 	}
108       else if (tmp == (size_t)0)
109 	/* found '\0' char */
110 	break;
111       else
112 	{
113 	  /* valid bytes */
114 	  point += tmp;
115 	  if (find_non_zero)
116 	    {
117 	      if (wcwidth (wc) == 0)
118 		continue;
119 	      else
120 		count--;
121 	    }
122 	  else
123 	    count--;
124 	}
125     }
126 
127   if (find_non_zero)
128     {
129       tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
130       while (wcwidth (wc) == 0)
131 	{
132 	  point += tmp;
133 	  tmp = mbrtowc (&wc, string + point, strlen (string + point), &ps);
134 	  if (tmp == (size_t)(0) || tmp == (size_t)(-1) || tmp == (size_t)(-2))
135 	    break;
136 	}
137     }
138     return point;
139 }
140 
141 static int
_rl_find_prev_mbchar_internal(string,seed,find_non_zero)142 _rl_find_prev_mbchar_internal (string, seed, find_non_zero)
143      char *string;
144      int seed, find_non_zero;
145 {
146   mbstate_t ps;
147   int prev, non_zero_prev, point, length;
148   size_t tmp;
149   wchar_t wc;
150 
151   memset(&ps, 0, sizeof(mbstate_t));
152   length = strlen(string);
153 
154   if (seed < 0)
155     return 0;
156   else if (length < seed)
157     return length;
158 
159   prev = non_zero_prev = point = 0;
160   while (point < seed)
161     {
162       tmp = mbrtowc (&wc, string + point, length - point, &ps);
163       if ((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
164 	{
165 	  /* in this case, bytes are invalid or shorted to compose
166 	     multibyte char, so assume that the first byte represents
167 	     a single character anyway. */
168 	  tmp = 1;
169 	  /* clear the state of the byte sequence, because
170 	     in this case effect of mbstate is undefined  */
171 	  memset(&ps, 0, sizeof (mbstate_t));
172 	}
173       else if (tmp == 0)
174 	break;			/* Found '\0' char.  Can this happen? */
175       else
176 	{
177 	  if (find_non_zero)
178 	    {
179 	      if (wcwidth (wc) != 0)
180 		prev = point;
181 	    }
182 	  else
183 	    prev = point;
184 	}
185 
186       point += tmp;
187     }
188 
189   return prev;
190 }
191 
192 /* return the number of bytes parsed from the multibyte sequence starting
193    at src, if a non-L'\0' wide character was recognized. It returns 0,
194    if a L'\0' wide character was recognized. It  returns (size_t)(-1),
195    if an invalid multibyte sequence was encountered. It returns (size_t)(-2)
196    if it couldn't parse a complete  multibyte character.  */
197 int
_rl_get_char_len(src,ps)198 _rl_get_char_len (src, ps)
199      char *src;
200      mbstate_t *ps;
201 {
202   size_t tmp;
203 
204   tmp = mbrlen((const char *)src, (size_t)strlen (src), ps);
205   if (tmp == (size_t)(-2))
206     {
207       /* shorted to compose multibyte char */
208       if (ps)
209 	memset (ps, 0, sizeof(mbstate_t));
210       return -2;
211     }
212   else if (tmp == (size_t)(-1))
213     {
214       /* invalid to compose multibyte char */
215       /* initialize the conversion state */
216       if (ps)
217 	memset (ps, 0, sizeof(mbstate_t));
218       return -1;
219     }
220   else if (tmp == (size_t)0)
221     return 0;
222   else
223     return (int)tmp;
224 }
225 
226 /* compare the specified two characters. If the characters matched,
227    return 1. Otherwise return 0. */
228 int
_rl_compare_chars(buf1,pos1,ps1,buf2,pos2,ps2)229 _rl_compare_chars (buf1, pos1, ps1, buf2, pos2, ps2)
230      char *buf1;
231      int pos1;
232      mbstate_t *ps1;
233      char *buf2;
234      int pos2;
235      mbstate_t *ps2;
236 {
237   int i, w1, w2;
238 
239   if ((w1 = _rl_get_char_len (&buf1[pos1], ps1)) <= 0 ||
240 	(w2 = _rl_get_char_len (&buf2[pos2], ps2)) <= 0 ||
241 	(w1 != w2) ||
242 	(buf1[pos1] != buf2[pos2]))
243     return 0;
244 
245   for (i = 1; i < w1; i++)
246     if (buf1[pos1+i] != buf2[pos2+i])
247       return 0;
248 
249   return 1;
250 }
251 
252 /* adjust pointed byte and find mbstate of the point of string.
253    adjusted point will be point <= adjusted_point, and returns
254    differences of the byte(adjusted_point - point).
255    if point is invalied (point < 0 || more than string length),
256    it returns -1 */
257 int
_rl_adjust_point(string,point,ps)258 _rl_adjust_point(string, point, ps)
259      char *string;
260      int point;
261      mbstate_t *ps;
262 {
263   size_t tmp = 0;
264   int length;
265   int pos = 0;
266 
267   length = strlen(string);
268   if (point < 0)
269     return -1;
270   if (length < point)
271     return -1;
272 
273   while (pos < point)
274     {
275       tmp = mbrlen (string + pos, length - pos, ps);
276       if((size_t)(tmp) == (size_t)-1 || (size_t)(tmp) == (size_t)-2)
277 	{
278 	  /* in this case, bytes are invalid or shorted to compose
279 	     multibyte char, so assume that the first byte represents
280 	     a single character anyway. */
281 	  pos++;
282 	  /* clear the state of the byte sequence, because
283 	     in this case effect of mbstate is undefined  */
284 	  if (ps)
285 	    memset (ps, 0, sizeof (mbstate_t));
286 	}
287       else if (tmp == 0)
288 	pos++;
289       else
290 	pos += tmp;
291     }
292 
293   return (pos - point);
294 }
295 
296 int
_rl_is_mbchar_matched(string,seed,end,mbchar,length)297 _rl_is_mbchar_matched (string, seed, end, mbchar, length)
298      char *string;
299      int seed, end;
300      char *mbchar;
301      int length;
302 {
303   int i;
304 
305   if ((end - seed) < length)
306     return 0;
307 
308   for (i = 0; i < length; i++)
309     if (string[seed + i] != mbchar[i])
310       return 0;
311   return 1;
312 }
313 #endif /* HANDLE_MULTIBYTE */
314 
315 /* Find next `count' characters started byte point of the specified seed.
316    If flags is MB_FIND_NONZERO, we look for non-zero-width multibyte
317    characters. */
318 #undef _rl_find_next_mbchar
319 int
_rl_find_next_mbchar(string,seed,count,flags)320 _rl_find_next_mbchar (string, seed, count, flags)
321      char *string;
322      int seed, count, flags;
323 {
324 #if defined (HANDLE_MULTIBYTE)
325   return _rl_find_next_mbchar_internal (string, seed, count, flags);
326 #else
327   return (seed + count);
328 #endif
329 }
330 
331 /* Find previous character started byte point of the specified seed.
332    Returned point will be point <= seed.  If flags is MB_FIND_NONZERO,
333    we look for non-zero-width multibyte characters. */
334 #undef _rl_find_prev_mbchar
335 int
_rl_find_prev_mbchar(string,seed,flags)336 _rl_find_prev_mbchar (string, seed, flags)
337      char *string;
338      int seed, flags;
339 {
340 #if defined (HANDLE_MULTIBYTE)
341   return _rl_find_prev_mbchar_internal (string, seed, flags);
342 #else
343   return ((seed == 0) ? seed : seed - 1);
344 #endif
345 }
346