1 /* mtext.c -- M-text module.
2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3      National Institute of Advanced Industrial Science and Technology (AIST)
4      Registration Number H15PRO112
5 
6    This file is part of the m17n library.
7 
8    The m17n library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public License
10    as published by the Free Software Foundation; either version 2.1 of
11    the License, or (at your option) any later version.
12 
13    The m17n library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
17 
18    You should have received a copy of the GNU Lesser General Public
19    License along with the m17n library; if not, write to the Free
20    Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21    Boston, MA 02110-1301 USA.  */
22 
23 /***en
24     @addtogroup m17nMtext
25     @brief M-text objects and API for them.
26 
27     In the m17n library, text is represented as an object called @e
28     M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29     char *</tt>).  An M-text is a sequence of characters whose length
30     is equals to or more than 0, and can be coined from various
31     character sources, e.g. C-strings, files, character codes, etc.
32 
33     M-texts are more useful than C-strings in the following points.
34 
35     @li M-texts can handle mixture of characters of various scripts,
36     including all Unicode characters and more.  This is an
37     indispensable facility when handling multilingual text.
38 
39     @li Each character in an M-text can have properties called @e text
40     @e properties. Text properties store various kinds of information
41     attached to parts of an M-text to provide application programs
42     with a unified view of those information.  As rich information can
43     be stored in M-texts in the form of text properties, functions in
44     application programs can be simple.
45 
46     In addition, the library provides many functions to manipulate an
47     M-text just the same way as a C-string.  */
48 
49 /***ja
50     @addtogroup m17nMtext
51 
52     @brief M-text ���֥������ȤȤ���˴ؤ��� API.
53 
54     m17n �饤�֥��ϡ� C-string��<tt>char *</tt> �� <tt>unsigned
55     char *</tt>�ˤǤϤʤ� @e M-text �ȸƤ֥��֥������Ȥǥƥ����Ȥ�ɽ�����롣
56     M-text ��Ĺ�� 0 �ʾ��ʸ����Ǥ��ꡢ���ʸ���������ʤ��Ȥ���
57     C-string���ե����롢ʸ�����������ˤ�������Ǥ��롣
58 
59     M-text �ˤϡ�C-string �ˤʤ��ʲ�����ħ�����롣
60 
61     @li M-text ������¿���μ����ʸ����Ʊ���ˡ����ߤ����ơ�Ʊ���˰������Ȥ��Ǥ��롣
62     Unicode �����Ƥ�ʸ���Ϥ��������¿����ʸ���ޤǤⰷ�����Ȥ��Ǥ��롣
63     �����¿����ƥ����Ȥ�����Ǥ�ɬ�ܤε�ǽ�Ǥ��롣
64 
65     @li M-text ��γ�ʸ���ϡ�@e �ƥ����ȥץ�ѥƥ�
66     �ȸƤФ��ץ�ѥƥ��������
67     �ƥ����ȥץ�ѥƥ��ˤ�äơ��ƥ����Ȥγ����̤˴ؤ����͡��ʾ����
68     M-text ����ݻ����뤳�Ȥ���ǽ�ˤʤ롣
69     ���Τ��ᡢ�����ξ�����ץꥱ�������ץ�����������Ū�˰������Ȥ���ǽ�ˤʤ롣
70     �ޤ���M-text
71     ���Τ�˭�٤ʾ������Ĥ��ᡢ���ץꥱ�������ץ������γƴؿ�����Dz����뤳�Ȥ��Ǥ��롣
72 
73     �����m17n �饤�֥��ϡ� C-string
74     �����뤿�����������δؿ���Ʊ���Τ�Τ� M-text
75     �����뤿��˥��ݡ��Ȥ��Ƥ��롣  */
76 
77 /*=*/
78 
79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
80 /*** @addtogroup m17nInternal
81      @{ */
82 
83 #include <config.h>
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include <locale.h>
88 
89 #include "m17n.h"
90 #include "m17n-misc.h"
91 #include "internal.h"
92 #include "textprop.h"
93 #include "character.h"
94 #include "mtext.h"
95 #include "plist.h"
96 
97 static M17NObjectArray mtext_table;
98 
99 static MSymbol M_charbag;
100 
101 /** Increment character position CHAR_POS and unit position UNIT_POS
102     so that they point to the next character in M-text MT.  No range
103     check for CHAR_POS and UNIT_POS.  */
104 
105 #define INC_POSITION(mt, char_pos, unit_pos)			\
106   do {								\
107     int c;							\
108 								\
109     if ((mt)->format <= MTEXT_FORMAT_UTF_8)			\
110       {								\
111 	c = (mt)->data[(unit_pos)];				\
112 	(unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c);		\
113       }								\
114     else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE)		\
115       {								\
116 	c = ((unsigned short *) ((mt)->data))[(unit_pos)];	\
117 								\
118 	if ((mt)->format != MTEXT_FORMAT_UTF_16)		\
119 	  c = SWAP_16 (c);					\
120 	(unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c);		\
121       }								\
122     else							\
123       (unit_pos)++;						\
124     (char_pos)++;						\
125   } while (0)
126 
127 
128 /** Decrement character position CHAR_POS and unit position UNIT_POS
129     so that they point to the previous character in M-text MT.  No
130     range check for CHAR_POS and UNIT_POS.  */
131 
132 #define DEC_POSITION(mt, char_pos, unit_pos)				\
133   do {									\
134     if ((mt)->format <= MTEXT_FORMAT_UTF_8)				\
135       {									\
136 	unsigned char *p1 = (mt)->data + (unit_pos);			\
137 	unsigned char *p0 = p1 - 1;					\
138 									\
139 	while (! CHAR_HEAD_P (p0)) p0--;				\
140 	(unit_pos) -= (p1 - p0);					\
141       }									\
142     else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE)			\
143       {									\
144 	int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1];	\
145 									\
146 	if ((mt)->format != MTEXT_FORMAT_UTF_16)			\
147 	  c = SWAP_16 (c);						\
148 	(unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000);			\
149       }									\
150     else								\
151       (unit_pos)--;							\
152     (char_pos)--;							\
153   } while (0)
154 
155 #define FORMAT_COVERAGE(fmt)					\
156   (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL		\
157    : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII	\
158    : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL		\
159    : MTEXT_COVERAGE_UNICODE)
160 
161 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
162    FROM2 to TO2). */
163 
164 static int
compare(MText * mt1,int from1,int to1,MText * mt2,int from2,int to2)165 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
166 {
167   if (mt1->format == mt2->format
168       && (mt1->format <= MTEXT_FORMAT_UTF_8))
169     {
170       unsigned char *p1, *pend1, *p2, *pend2;
171       int unit_bytes = UNIT_BYTES (mt1->format);
172       int nbytes;
173       int result;
174 
175       p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
176       pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
177 
178       p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
179       pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
180 
181       if (pend1 - p1 < pend2 - p2)
182 	nbytes = pend1 - p1;
183       else
184 	nbytes = pend2 - p2;
185       result = memcmp (p1, p2, nbytes);
186       if (result)
187 	return result;
188       return ((pend1 - p1) - (pend2 - p2));
189     }
190   for (; from1 < to1 && from2 < to2; from1++, from2++)
191     {
192       int c1 = mtext_ref_char (mt1, from1);
193       int c2 = mtext_ref_char (mt2, from2);
194 
195       if (c1 != c2)
196 	return (c1 > c2 ? 1 : -1);
197     }
198   return (from2 == to2 ? (from1 < to1) : -1);
199 }
200 
201 
202 /* Return how many units are required in UTF-8 to represent characters
203    between FROM and TO of MT.  */
204 
205 static int
count_by_utf_8(MText * mt,int from,int to)206 count_by_utf_8 (MText *mt, int from, int to)
207 {
208   int n, c;
209 
210   for (n = 0; from < to; from++)
211     {
212       c = mtext_ref_char (mt, from);
213       n += CHAR_UNITS_UTF8 (c);
214     }
215   return n;
216 }
217 
218 
219 /* Return how many units are required in UTF-16 to represent
220    characters between FROM and TO of MT.  */
221 
222 static int
count_by_utf_16(MText * mt,int from,int to)223 count_by_utf_16 (MText *mt, int from, int to)
224 {
225   int n, c;
226 
227   for (n = 0; from < to; from++)
228     {
229       c = mtext_ref_char (mt, from);
230       n += CHAR_UNITS_UTF16 (c);
231     }
232   return n;
233 }
234 
235 
236 /* Insert text between FROM and TO of MT2 at POS of MT1.  */
237 
238 static MText *
insert(MText * mt1,int pos,MText * mt2,int from,int to)239 insert (MText *mt1, int pos, MText *mt2, int from, int to)
240 {
241   int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
242   int from_unit = POS_CHAR_TO_BYTE (mt2, from);
243   int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
244   int unit_bytes;
245 
246   if (mt1->nchars == 0)
247     mt1->format = mt2->format, mt1->coverage = mt2->coverage;
248   else if (mt1->format != mt2->format)
249     {
250       /* Be sure to make mt1->format sufficient to contain all
251 	 characters in mt2.  */
252       if (mt1->format == MTEXT_FORMAT_UTF_8
253 	  || mt1->format == MTEXT_FORMAT_UTF_32
254 	  || (mt1->format == MTEXT_FORMAT_UTF_16
255 	      && mt2->format <= MTEXT_FORMAT_UTF_16BE
256 	      && mt2->format != MTEXT_FORMAT_UTF_8))
257 	;
258       else if (mt1->format == MTEXT_FORMAT_US_ASCII)
259 	{
260 	  if (mt2->format == MTEXT_FORMAT_UTF_8)
261 	    mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage;
262 	  else if (mt2->format == MTEXT_FORMAT_UTF_16
263 		   || mt2->format == MTEXT_FORMAT_UTF_32)
264 	    mtext__adjust_format (mt1, mt2->format);
265 	  else
266 	    mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
267 	}
268       else
269 	{
270 	  mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
271 	  pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
272 	}
273     }
274 
275   unit_bytes = UNIT_BYTES (mt1->format);
276 
277   if (mt1->format == mt2->format)
278     {
279       int pos_byte = pos_unit * unit_bytes;
280       int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
281       int new_bytes = new_units * unit_bytes;
282 
283       if (total_bytes + unit_bytes > mt1->allocated)
284 	{
285 	  mt1->allocated = total_bytes + unit_bytes;
286 	  if (mt1->data)
287 	    MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
288 	  else
289 	    MTABLE_CALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
290 	}
291       memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
292 	       (mt1->nbytes - pos_unit + 1) * unit_bytes);
293       memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
294 	      new_bytes);
295     }
296   else if (mt1->format == MTEXT_FORMAT_UTF_8)
297     {
298       unsigned char *p;
299       int total_bytes, i, c;
300 
301       new_units = count_by_utf_8 (mt2, from, to);
302       total_bytes = mt1->nbytes + new_units;
303 
304       if (total_bytes + 1 > mt1->allocated)
305 	{
306 	  mt1->allocated = total_bytes + 1;
307 	  MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
308 	}
309       p = mt1->data + pos_unit;
310       memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
311       for (i = from; i < to; i++)
312 	{
313 	  c = mtext_ref_char (mt2, i);
314 	  p += CHAR_STRING_UTF8 (c, p);
315 	}
316     }
317   else if (mt1->format == MTEXT_FORMAT_UTF_16)
318     {
319       unsigned short *p;
320       int total_bytes, i, c;
321 
322       new_units = count_by_utf_16 (mt2, from, to);
323       total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
324 
325       if (total_bytes + USHORT_SIZE > mt1->allocated)
326 	{
327 	  mt1->allocated = total_bytes + USHORT_SIZE;
328 	  MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
329 	}
330       p = (unsigned short *) mt1->data + pos_unit;
331       memmove (p + new_units, p,
332 	       (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
333       for (i = from; i < to; i++)
334 	{
335 	  c = mtext_ref_char (mt2, i);
336 	  p += CHAR_STRING_UTF16 (c, p);
337 	}
338     }
339   else				/* MTEXT_FORMAT_UTF_32 */
340     {
341       unsigned int *p;
342       int total_bytes, i;
343 
344       new_units = to - from;
345       total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
346 
347       if (total_bytes + UINT_SIZE > mt1->allocated)
348 	{
349 	  mt1->allocated = total_bytes + UINT_SIZE;
350 	  MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
351 	}
352       p = (unsigned *) mt1->data + pos_unit;
353       memmove (p + new_units, p,
354 	       (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
355       for (i = from; i < to; i++)
356 	*p++ = mtext_ref_char (mt2, i);
357     }
358 
359   mtext__adjust_plist_for_insert
360     (mt1, pos, to - from,
361      mtext__copy_plist (mt2->plist, from, to, mt1, pos));
362   mt1->nchars += to - from;
363   mt1->nbytes += new_units;
364   if (mt1->cache_char_pos > pos)
365     {
366       mt1->cache_char_pos += to - from;
367       mt1->cache_byte_pos += new_units;
368     }
369 
370   return mt1;
371 }
372 
373 
374 static MCharTable *
get_charbag(MText * mt)375 get_charbag (MText *mt)
376 {
377   MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
378   MCharTable *table;
379   int i;
380 
381   if (prop)
382     {
383       if (prop->end == mt->nchars)
384 	return ((MCharTable *) prop->val);
385       mtext_detach_property (prop);
386     }
387 
388   table = mchartable (Msymbol, (void *) 0);
389   for (i = mt->nchars - 1; i >= 0; i--)
390     mchartable_set (table, mtext_ref_char (mt, i), Mt);
391   prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
392   mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
393   M17N_OBJECT_UNREF (prop);
394   return table;
395 }
396 
397 
398 /* span () : Number of consecutive chars starting at POS in MT1 that
399    are included (if NOT is Mnil) or not included (if NOT is Mt) in
400    MT2.  */
401 
402 static int
span(MText * mt1,MText * mt2,int pos,MSymbol not)403 span (MText *mt1, MText *mt2, int pos, MSymbol not)
404 {
405   int nchars = mtext_nchars (mt1);
406   MCharTable *table = get_charbag (mt2);
407   int i;
408 
409   for (i = pos; i < nchars; i++)
410     if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
411       break;
412   return (i - pos);
413 }
414 
415 
416 static int
count_utf_8_chars(const void * data,int nitems)417 count_utf_8_chars (const void *data, int nitems)
418 {
419   unsigned char *p = (unsigned char *) data;
420   unsigned char *pend = p + nitems;
421   int nchars = 0;
422 
423   while (p < pend)
424     {
425       int i, n;
426 
427       for (; p < pend && *p < 128; nchars++, p++);
428       if (p == pend)
429 	return nchars;
430       if (! CHAR_HEAD_P_UTF8 (p))
431 	return -1;
432       n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
433       if (p + n > pend)
434 	return -1;
435       for (i = 1; i < n; i++)
436 	if (CHAR_HEAD_P_UTF8 (p + i))
437 	  return -1;
438       p += n;
439       nchars++;
440     }
441   return nchars;
442 }
443 
444 static int
count_utf_16_chars(const void * data,int nitems,int swap)445 count_utf_16_chars (const void *data, int nitems, int swap)
446 {
447   unsigned short *p = (unsigned short *) data;
448   unsigned short *pend = p + nitems;
449   int nchars = 0;
450   int prev_surrogate = 0;
451 
452   for (; p < pend; p++)
453     {
454       int c = *p;
455 
456       if (swap)
457 	c = SWAP_16 (c);
458       if (prev_surrogate)
459 	{
460 	  if (c < 0xDC00 || c >= 0xE000)
461 	    /* Invalid surrogate */
462 	    nchars++;
463 	}
464       else
465 	{
466 	  if (c >= 0xD800 && c < 0xDC00)
467 	    prev_surrogate = 1;
468 	  nchars++;
469 	}
470     }
471   if (prev_surrogate)
472     nchars++;
473   return nchars;
474 }
475 
476 
477 static int
find_char_forward(MText * mt,int from,int to,int c)478 find_char_forward (MText *mt, int from, int to, int c)
479 {
480   int from_byte = POS_CHAR_TO_BYTE (mt, from);
481 
482   if (mt->format <= MTEXT_FORMAT_UTF_8)
483     {
484       unsigned char *p = mt->data + from_byte;
485 
486       while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
487     }
488   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
489     {
490       unsigned short *p = (unsigned short *) (mt->data) + from_byte;
491 
492       if (mt->format == MTEXT_FORMAT_UTF_16)
493 	while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
494       else if (c < 0x10000)
495 	{
496 	  c = SWAP_16 (c);
497 	  while (from < to && *p != c)
498 	    {
499 	      from++;
500 	      p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
501 	    }
502 	}
503       else if (c < 0x110000)
504 	{
505 	  int c1 = (c >> 10) + 0xD800;
506 	  int c2 = (c & 0x3FF) + 0xDC00;
507 
508 	  c1 = SWAP_16 (c1);
509 	  c2 = SWAP_16 (c2);
510 	  while (from < to && (*p != c1 || p[1] != c2))
511 	    {
512 	      from++;
513 	      p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
514 	    }
515 	}
516       else
517 	from = to;
518     }
519   else
520     {
521       unsigned *p = (unsigned *) (mt->data) + from_byte;
522       unsigned c1 = c;
523 
524       if (mt->format != MTEXT_FORMAT_UTF_32)
525 	c1 = SWAP_32 (c1);
526       while (from < to && *p++ != c1) from++;
527     }
528 
529   return (from < to ? from : -1);
530 }
531 
532 
533 static int
find_char_backward(MText * mt,int from,int to,int c)534 find_char_backward (MText *mt, int from, int to, int c)
535 {
536   int to_byte = POS_CHAR_TO_BYTE (mt, to);
537 
538   if (mt->format <= MTEXT_FORMAT_UTF_8)
539     {
540       unsigned char *p = mt->data + to_byte;
541 
542       while (from < to)
543 	{
544 	  for (p--; ! CHAR_HEAD_P (p); p--);
545 	  if (c == STRING_CHAR (p))
546 	    break;
547 	  to--;
548 	}
549     }
550   else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
551     {
552       unsigned short *p = (unsigned short *) (mt->data) + to_byte;
553 
554       if (mt->format == MTEXT_FORMAT_UTF_16)
555 	{
556 	  while (from < to)
557 	    {
558 	      p--;
559 	      if (*p >= 0xDC00 && *p < 0xE000)
560 		p--;
561 	      if (c == STRING_CHAR_UTF16 (p))
562 		break;
563 	      to--;
564 	    }
565 	}
566       else if (c < 0x10000)
567 	{
568 	  c = SWAP_16 (c);
569 	  while (from < to && p[-1] != c)
570 	    {
571 	      to--;
572 	      p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
573 	    }
574 	}
575       else if (c < 0x110000)
576 	{
577 	  int c1 = (c >> 10) + 0xD800;
578 	  int c2 = (c & 0x3FF) + 0xDC00;
579 
580 	  c1 = SWAP_16 (c1);
581 	  c2 = SWAP_16 (c2);
582 	  while (from < to && (p[-1] != c2 || p[-2] != c1))
583 	    {
584 	      to--;
585 	      p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
586 	    }
587 	}
588     }
589   else
590     {
591       unsigned *p = (unsigned *) (mt->data) + to_byte;
592       unsigned c1 = c;
593 
594       if (mt->format != MTEXT_FORMAT_UTF_32)
595 	c1 = SWAP_32 (c1);
596       while (from < to && p[-1] != c1) to--, p--;
597     }
598 
599   return (from < to ? to - 1 : -1);
600 }
601 
602 
603 static void
free_mtext(void * object)604 free_mtext (void *object)
605 {
606   MText *mt = (MText *) object;
607 
608   if (mt->plist)
609     mtext__free_plist (mt);
610   if (mt->data && mt->allocated >= 0)
611     free (mt->data);
612   M17N_OBJECT_UNREGISTER (mtext_table, mt);
613   free (object);
614 }
615 
616 /** Case handler (case-folding comparison and case conversion) */
617 
618 /** Structure for an iterator used in case-fold comparison.  */
619 
620 struct casecmp_iterator {
621   MText *mt;
622   int pos;
623   MText *folded;
624   unsigned char *foldedp;
625   int folded_len;
626 };
627 
628 static int
next_char_from_it(struct casecmp_iterator * it)629 next_char_from_it (struct casecmp_iterator *it)
630 {
631   int c, c1;
632 
633   if (it->folded)
634     {
635       c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
636       return c;
637     }
638 
639   c = mtext_ref_char (it->mt, it->pos);
640   c1 = (int) mchar_get_prop (c, Msimple_case_folding);
641   if (c1 == 0xFFFF)
642     {
643       it->folded
644 	= (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
645       it->foldedp = it->folded->data;
646       c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
647       return c;
648     }
649 
650   if (c1 >= 0)
651     c = c1;
652   return c;
653 }
654 
655 static void
advance_it(struct casecmp_iterator * it)656 advance_it (struct casecmp_iterator *it)
657 {
658   if (it->folded)
659     {
660       it->foldedp += it->folded_len;
661       if (it->foldedp == it->folded->data + it->folded->nbytes)
662 	it->folded = NULL;
663     }
664   if (! it->folded)
665     {
666       it->pos++;
667     }
668 }
669 
670 static int
case_compare(MText * mt1,int from1,int to1,MText * mt2,int from2,int to2)671 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
672 {
673   struct casecmp_iterator it1, it2;
674 
675   it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
676   it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
677 
678   while (it1.pos < to1 && it2.pos < to2)
679     {
680       int c1 = next_char_from_it (&it1);
681       int c2 = next_char_from_it (&it2);
682 
683       if (c1 != c2)
684 	return (c1 > c2 ? 1 : -1);
685       advance_it (&it1);
686       advance_it (&it2);
687     }
688   return (it2.pos == to2 ? (it1.pos < to1) : -1);
689 }
690 
691 static MCharTable *tricky_chars, *cased, *soft_dotted, *case_mapping;
692 static MCharTable *combining_class;
693 
694 /* Languages that require special handling in case-conversion.  */
695 static MSymbol Mlt, Mtr, Maz;
696 
697 static MText *gr03A3;
698 static MText *lt0049, *lt004A, *lt012E, *lt00CC, *lt00CD, *lt0128;
699 static MText *tr0130, *tr0049, *tr0069;
700 
701 static int
init_case_conversion()702 init_case_conversion ()
703 {
704   Mlt = msymbol ("lt");
705   Mtr = msymbol ("tr");
706   Maz = msymbol ("az");
707 
708   gr03A3 = mtext ();
709   mtext_cat_char (gr03A3, 0x03C2);
710 
711   lt0049 = mtext ();
712   mtext_cat_char (lt0049, 0x0069);
713   mtext_cat_char (lt0049, 0x0307);
714 
715   lt004A = mtext ();
716   mtext_cat_char (lt004A, 0x006A);
717   mtext_cat_char (lt004A, 0x0307);
718 
719   lt012E = mtext ();
720   mtext_cat_char (lt012E, 0x012F);
721   mtext_cat_char (lt012E, 0x0307);
722 
723   lt00CC = mtext ();
724   mtext_cat_char (lt00CC, 0x0069);
725   mtext_cat_char (lt00CC, 0x0307);
726   mtext_cat_char (lt00CC, 0x0300);
727 
728   lt00CD = mtext ();
729   mtext_cat_char (lt00CD, 0x0069);
730   mtext_cat_char (lt00CD, 0x0307);
731   mtext_cat_char (lt00CD, 0x0301);
732 
733   lt0128 = mtext ();
734   mtext_cat_char (lt0128, 0x0069);
735   mtext_cat_char (lt0128, 0x0307);
736   mtext_cat_char (lt0128, 0x0303);
737 
738   tr0130 = mtext ();
739   mtext_cat_char (tr0130, 0x0069);
740 
741   tr0049 = mtext ();
742   mtext_cat_char (tr0049, 0x0131);
743 
744   tr0069 = mtext ();
745   mtext_cat_char (tr0069, 0x0130);
746 
747   if (! (cased = mchar_get_prop_table (msymbol ("cased"), NULL)))
748     return -1;
749   if (! (soft_dotted = mchar_get_prop_table (msymbol ("soft-dotted"), NULL)))
750     return -1;
751   if (! (case_mapping = mchar_get_prop_table (msymbol ("case-mapping"), NULL)))
752     return -1;
753   if (! (combining_class = mchar_get_prop_table (Mcombining_class, NULL)))
754     return -1;
755 
756   tricky_chars = mchartable (Mnil, 0);
757   mchartable_set (tricky_chars, 0x0049, (void *) 1);
758   mchartable_set (tricky_chars, 0x004A, (void *) 1);
759   mchartable_set (tricky_chars, 0x00CC, (void *) 1);
760   mchartable_set (tricky_chars, 0x00CD, (void *) 1);
761   mchartable_set (tricky_chars, 0x0128, (void *) 1);
762   mchartable_set (tricky_chars, 0x012E, (void *) 1);
763   mchartable_set (tricky_chars, 0x0130, (void *) 1);
764   mchartable_set (tricky_chars, 0x0307, (void *) 1);
765   mchartable_set (tricky_chars, 0x03A3, (void *) 1);
766   return 0;
767 }
768 
769 #define CASE_CONV_INIT(ret)		\
770   do {					\
771     if (! tricky_chars			\
772 	&& init_case_conversion () < 0)	\
773       MERROR (MERROR_MTEXT, ret);	\
774   } while (0)
775 
776 /* Replace the character at POS of MT with VAR and increment I and LEN.  */
777 
778 #define REPLACE(var)					\
779   do {							\
780     int varlen = var->nchars;				\
781 							\
782     mtext_replace (mt, pos, pos + 1, var, 0, varlen);	\
783     pos += varlen;					\
784     end += varlen - 1;					\
785   } while (0)
786 
787 /* Delete the character at POS of MT and decrement LEN.  */
788 
789 #define DELETE				\
790   do {					\
791     mtext_del (mt, pos, pos + 1);	\
792     end--;				\
793   } while (0)
794 
795 #define LOOKUP								\
796   do {									\
797     MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c);	\
798 									\
799     if (pl)								\
800       {									\
801 	/* Lowercase is the 1st element. */				\
802 	MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl));		\
803 	int llen = mtext_nchars (lower);				\
804 									\
805 	if (mtext_ref_char (lower, 0) != c || llen > 1)			\
806 	  {								\
807 	    mtext_replace (mt, pos, pos + 1, lower, 0, llen);		\
808 	    pos += llen;						\
809 	    end += llen - 1;						\
810 	  }								\
811 	else								\
812 	  pos++;							\
813       }									\
814     else								\
815       pos++;								\
816   } while (0)
817 
818 
819 int
uppercase_precheck(MText * mt,int pos,int end)820 uppercase_precheck (MText *mt, int pos, int end)
821 {
822   for (; pos < end; pos++)
823     if (mtext_ref_char (mt, pos) == 0x0307 &&
824 	(MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt)
825       return 1;
826   return 0;
827 }
828 
829 int
lowercase_precheck(MText * mt,int pos,int end)830 lowercase_precheck (MText *mt, int pos, int end)
831 {
832   int c;
833   MSymbol lang;
834 
835   for (; pos < end; pos++)
836     {
837       c = mtext_ref_char (mt, pos);
838 
839       if ((int) mchartable_lookup (tricky_chars, c) == 1)
840       {
841 	if (c == 0x03A3)
842 	  return 1;
843 
844 	lang = mtext_get_prop (mt, pos, Mlanguage);
845 
846 	if (lang == Mlt &&
847 	    (c == 0x0049 || c == 0x004A || c == 0x012E))
848 	  return 1;
849 
850 	if ((lang == Mtr || lang == Maz) &&
851 	    (c == 0x0307 || c == 0x0049))
852 	  return 1;
853       }
854     }
855   return 0;
856 }
857 
858 #define CASED 1
859 #define CASE_IGNORABLE 2
860 
861 int
final_sigma(MText * mt,int pos)862 final_sigma (MText *mt, int pos)
863 {
864   int i, len = mtext_len (mt);
865   int c;
866 
867   for (i = pos - 1; i >= 0; i--)
868     {
869       c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
870       if (c == -1)
871 	c = 0;
872       if (c & CASED)
873 	break;
874       if (! (c & CASE_IGNORABLE))
875 	return 0;
876     }
877 
878   if (i == -1)
879     return 0;
880 
881   for (i = pos + 1; i < len; i++)
882     {
883       c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
884       if (c == -1)
885 	c = 0;
886       if (c & CASED)
887 	return 0;
888       if (! (c & CASE_IGNORABLE))
889 	return 1;
890     }
891 
892   return 1;
893 }
894 
895 int
after_soft_dotted(MText * mt,int i)896 after_soft_dotted (MText *mt, int i)
897 {
898   int c, class;
899 
900   for (i--; i >= 0; i--)
901     {
902       c = mtext_ref_char (mt, i);
903       if ((MSymbol) mchartable_lookup (soft_dotted, c) == Mt)
904 	return 1;
905       class = (int) mchartable_lookup (combining_class, c);
906       if (class == 0 || class == 230)
907 	return 0;
908     }
909 
910   return 0;
911 }
912 
913 int
more_above(MText * mt,int i)914 more_above (MText *mt, int i)
915 {
916   int class, len = mtext_len (mt);
917 
918   for (i++; i < len; i++)
919     {
920       class = (int) mchartable_lookup (combining_class,
921 				       mtext_ref_char (mt, i));
922       if (class == 230)
923 	return 1;
924       if (class == 0)
925 	return 0;
926     }
927 
928   return 0;
929 }
930 
931 int
before_dot(MText * mt,int i)932 before_dot (MText *mt, int i)
933 {
934   int c, class, len = mtext_len (mt);
935 
936   for (i++; i < len; i++)
937     {
938       c = mtext_ref_char (mt, i);
939       if (c == 0x0307)
940 	return 1;
941       class = (int) mchartable_lookup (combining_class, c);
942       if (class == 230 || class == 0)
943 	return 0;
944     }
945 
946   return 0;
947 }
948 
949 int
after_i(MText * mt,int i)950 after_i (MText *mt, int i)
951 {
952   int c, class;
953 
954   for (i--; i >= 0; i--)
955     {
956       c = mtext_ref_char (mt, i);
957       if (c == (int) 'I')
958 	return 1;
959       class = (int) mchartable_lookup (combining_class, c);
960       if (class == 230 || class == 0)
961 	return 0;
962     }
963 
964   return 0;
965 }
966 
967 
968 /* Internal API */
969 
970 int
mtext__init()971 mtext__init ()
972 {
973   M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
974   M_charbag = msymbol_as_managing_key ("  charbag");
975   mtext_table.count = 0;
976   Mlanguage = msymbol ("language");
977   return 0;
978 }
979 
980 
981 void
mtext__fini(void)982 mtext__fini (void)
983 {
984   mtext__wseg_fini ();
985 }
986 
987 
988 int
mtext__char_to_byte(MText * mt,int pos)989 mtext__char_to_byte (MText *mt, int pos)
990 {
991   int char_pos, byte_pos;
992   int forward;
993 
994   if (pos < mt->cache_char_pos)
995     {
996       if (mt->cache_char_pos == mt->cache_byte_pos)
997 	return pos;
998       if (pos < mt->cache_char_pos - pos)
999 	{
1000 	  char_pos = byte_pos = 0;
1001 	  forward = 1;
1002 	}
1003       else
1004 	{
1005 	  char_pos = mt->cache_char_pos;
1006 	  byte_pos = mt->cache_byte_pos;
1007 	  forward = 0;
1008 	}
1009     }
1010   else
1011     {
1012       if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1013 	return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
1014       if (pos - mt->cache_char_pos < mt->nchars - pos)
1015 	{
1016 	  char_pos = mt->cache_char_pos;
1017 	  byte_pos = mt->cache_byte_pos;
1018 	  forward = 1;
1019 	}
1020       else
1021 	{
1022 	  char_pos = mt->nchars;
1023 	  byte_pos = mt->nbytes;
1024 	  forward = 0;
1025 	}
1026     }
1027   if (forward)
1028     while (char_pos < pos)
1029       INC_POSITION (mt, char_pos, byte_pos);
1030   else
1031     while (char_pos > pos)
1032       DEC_POSITION (mt, char_pos, byte_pos);
1033   mt->cache_char_pos = char_pos;
1034   mt->cache_byte_pos = byte_pos;
1035   return byte_pos;
1036 }
1037 
1038 /* mtext__byte_to_char () */
1039 
1040 int
mtext__byte_to_char(MText * mt,int pos_byte)1041 mtext__byte_to_char (MText *mt, int pos_byte)
1042 {
1043   int char_pos, byte_pos;
1044   int forward;
1045 
1046   if (pos_byte < mt->cache_byte_pos)
1047     {
1048       if (mt->cache_char_pos == mt->cache_byte_pos)
1049 	return pos_byte;
1050       if (pos_byte < mt->cache_byte_pos - pos_byte)
1051 	{
1052 	  char_pos = byte_pos = 0;
1053 	  forward = 1;
1054 	}
1055       else
1056 	{
1057 	  char_pos = mt->cache_char_pos;
1058 	  byte_pos = mt->cache_byte_pos;
1059 	  forward = 0;
1060 	}
1061     }
1062   else
1063     {
1064       if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1065 	return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
1066       if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
1067 	{
1068 	  char_pos = mt->cache_char_pos;
1069 	  byte_pos = mt->cache_byte_pos;
1070 	  forward = 1;
1071 	}
1072       else
1073 	{
1074 	  char_pos = mt->nchars;
1075 	  byte_pos = mt->nbytes;
1076 	  forward = 0;
1077 	}
1078     }
1079   if (forward)
1080     while (byte_pos < pos_byte)
1081       INC_POSITION (mt, char_pos, byte_pos);
1082   else
1083     while (byte_pos > pos_byte)
1084       DEC_POSITION (mt, char_pos, byte_pos);
1085   mt->cache_char_pos = char_pos;
1086   mt->cache_byte_pos = byte_pos;
1087   return char_pos;
1088 }
1089 
1090 /* Estimated extra bytes that malloc will use for its own purpose on
1091    each memory allocation.  */
1092 #define MALLOC_OVERHEAD 4
1093 #define MALLOC_MININUM_BYTES 12
1094 
1095 void
mtext__enlarge(MText * mt,int nbytes)1096 mtext__enlarge (MText *mt, int nbytes)
1097 {
1098   nbytes += MAX_UTF8_CHAR_BYTES;
1099   if (mt->allocated >= nbytes)
1100     return;
1101   if (nbytes < MALLOC_MININUM_BYTES)
1102     nbytes = MALLOC_MININUM_BYTES;
1103   while (mt->allocated < nbytes)
1104     mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
1105   MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1106 }
1107 
1108 int
mtext__takein(MText * mt,int nchars,int nbytes)1109 mtext__takein (MText *mt, int nchars, int nbytes)
1110 {
1111   if (mt->plist)
1112     mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
1113   mt->nchars += nchars;
1114   mt->nbytes += nbytes;
1115   mt->data[mt->nbytes] = 0;
1116   return 0;
1117 }
1118 
1119 
1120 int
mtext__cat_data(MText * mt,unsigned char * p,int nbytes,enum MTextFormat format)1121 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
1122 		 enum MTextFormat format)
1123 {
1124   int nchars = -1;
1125 
1126   if (mt->format > MTEXT_FORMAT_UTF_8)
1127     MERROR (MERROR_MTEXT, -1);
1128   if (format == MTEXT_FORMAT_US_ASCII)
1129     nchars = nbytes;
1130   else if (format == MTEXT_FORMAT_UTF_8)
1131     nchars = count_utf_8_chars (p, nbytes);
1132   if (nchars < 0)
1133     MERROR (MERROR_MTEXT, -1);
1134   mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
1135   memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
1136   mtext__takein (mt, nchars, nbytes);
1137   return nchars;
1138 }
1139 
1140 MText *
mtext__from_data(const void * data,int nitems,enum MTextFormat format,int need_copy)1141 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
1142 		  int need_copy)
1143 {
1144   MText *mt;
1145   int nchars, nbytes, unit_bytes;
1146 
1147   if (format == MTEXT_FORMAT_US_ASCII)
1148     {
1149       const char *p = (char *) data, *pend = p + nitems;
1150 
1151       while (p < pend)
1152 	if (*p++ < 0)
1153 	  MERROR (MERROR_MTEXT, NULL);
1154       nchars = nbytes = nitems;
1155       unit_bytes = 1;
1156     }
1157   else if (format == MTEXT_FORMAT_UTF_8)
1158     {
1159       if ((nchars = count_utf_8_chars (data, nitems)) < 0)
1160 	MERROR (MERROR_MTEXT, NULL);
1161       nbytes = nitems;
1162       unit_bytes = 1;
1163     }
1164   else if (format <= MTEXT_FORMAT_UTF_16BE)
1165     {
1166       if ((nchars = count_utf_16_chars (data, nitems,
1167 					format != MTEXT_FORMAT_UTF_16)) < 0)
1168 	MERROR (MERROR_MTEXT, NULL);
1169       nbytes = USHORT_SIZE * nitems;
1170       unit_bytes = USHORT_SIZE;
1171     }
1172   else				/* MTEXT_FORMAT_UTF_32XX */
1173     {
1174       nchars = nitems;
1175       nbytes = UINT_SIZE * nitems;
1176       unit_bytes = UINT_SIZE;
1177     }
1178 
1179   mt = mtext ();
1180   mt->format = format;
1181   mt->coverage = FORMAT_COVERAGE (format);
1182   mt->allocated = need_copy ? nbytes + unit_bytes : -1;
1183   mt->nchars = nchars;
1184   mt->nbytes = nitems;
1185   if (need_copy)
1186     {
1187       MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1188       memcpy (mt->data, data, nbytes);
1189       mt->data[nbytes] = 0;
1190     }
1191   else
1192     mt->data = (unsigned char *) data;
1193   return mt;
1194 }
1195 
1196 
1197 void
mtext__adjust_format(MText * mt,enum MTextFormat format)1198 mtext__adjust_format (MText *mt, enum MTextFormat format)
1199 {
1200   int i, c;
1201 
1202   if (mt->nchars > 0)
1203     switch (format)
1204       {
1205       case MTEXT_FORMAT_US_ASCII:
1206 	{
1207 	  unsigned char *p = mt->data;
1208 
1209 	  for (i = 0; i < mt->nchars; i++)
1210 	    *p++ = mtext_ref_char (mt, i);
1211 	  mt->nbytes = mt->nchars;
1212 	  mt->cache_byte_pos = mt->cache_char_pos;
1213 	  break;
1214 	}
1215 
1216       case MTEXT_FORMAT_UTF_8:
1217 	{
1218 	  unsigned char *p0, *p1;
1219 
1220 	  i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
1221 	  MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1222 	  mt->allocated = i;
1223 	  for (i = 0, p1 = p0; i < mt->nchars; i++)
1224 	    {
1225 	      c = mtext_ref_char (mt, i);
1226 	      p1 += CHAR_STRING_UTF8 (c, p1);
1227 	    }
1228 	  *p1 = '\0';
1229 	  free (mt->data);
1230 	  mt->data = p0;
1231 	  mt->nbytes = p1 - p0;
1232 	  mt->cache_char_pos = mt->cache_byte_pos = 0;
1233 	  break;
1234 	}
1235 
1236       default:
1237 	if (format == MTEXT_FORMAT_UTF_16)
1238 	  {
1239 	    unsigned short *p0, *p1;
1240 
1241 	    i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
1242 	    MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1243 	    mt->allocated = i;
1244 	    for (i = 0, p1 = p0; i < mt->nchars; i++)
1245 	      {
1246 		c = mtext_ref_char (mt, i);
1247 		p1 += CHAR_STRING_UTF16 (c, p1);
1248 	      }
1249 	    *p1 = 0;
1250 	    free (mt->data);
1251 	    mt->data = (unsigned char *) p0;
1252 	    mt->nbytes = p1 - p0;
1253 	    mt->cache_char_pos = mt->cache_byte_pos = 0;
1254 	    break;
1255 	  }
1256 	else
1257 	  {
1258 	    unsigned int *p;
1259 
1260 	    mt->allocated = (mt->nchars + 1) * UINT_SIZE;
1261 	    MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
1262 	    for (i = 0; i < mt->nchars; i++)
1263 	      p[i] = mtext_ref_char (mt, i);
1264 	    p[i] = 0;
1265 	    free (mt->data);
1266 	    mt->data = (unsigned char *) p;
1267 	    mt->nbytes = mt->nchars;
1268 	    mt->cache_byte_pos = mt->cache_char_pos;
1269 	  }
1270       }
1271   mt->format = format;
1272   mt->coverage = FORMAT_COVERAGE (format);
1273 }
1274 
1275 
1276 /* Find the position of a character at the beginning of a line of
1277    M-Text MT searching backward from POS.  */
1278 
1279 int
mtext__bol(MText * mt,int pos)1280 mtext__bol (MText *mt, int pos)
1281 {
1282   int byte_pos;
1283 
1284   if (pos == 0)
1285     return pos;
1286   byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1287   if (mt->format <= MTEXT_FORMAT_UTF_8)
1288     {
1289       unsigned char *p = mt->data + byte_pos;
1290 
1291       if (p[-1] == '\n')
1292 	return pos;
1293       p--;
1294       while (p > mt->data && p[-1] != '\n')
1295 	p--;
1296       if (p == mt->data)
1297 	return 0;
1298       byte_pos = p - mt->data;
1299       return POS_BYTE_TO_CHAR (mt, byte_pos);
1300     }
1301   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1302     {
1303       unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1304       unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1305 				? 0x0A00 : 0x000A);
1306 
1307       if (p[-1] == newline)
1308 	return pos;
1309       p--;
1310       while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1311 	p--;
1312       if (p == (unsigned short *) (mt->data))
1313 	return 0;
1314       byte_pos = p - (unsigned short *) (mt->data);
1315       return POS_BYTE_TO_CHAR (mt, byte_pos);;
1316     }
1317   else
1318     {
1319       unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1320       unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1321 			  ? 0x0A000000 : 0x0000000A);
1322 
1323       if (p[-1] == newline)
1324 	return pos;
1325       p--, pos--;
1326       while (p > (unsigned *) (mt->data) && p[-1] != newline)
1327 	p--, pos--;
1328       return pos;
1329     }
1330 }
1331 
1332 
1333 /* Find the position of a character at the end of a line of M-Text MT
1334    searching forward from POS.  */
1335 
1336 int
mtext__eol(MText * mt,int pos)1337 mtext__eol (MText *mt, int pos)
1338 {
1339   int byte_pos;
1340 
1341   if (pos == mt->nchars)
1342     return pos;
1343   byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1344   if (mt->format <= MTEXT_FORMAT_UTF_8)
1345     {
1346       unsigned char *p = mt->data + byte_pos;
1347       unsigned char *endp;
1348 
1349       if (*p == '\n')
1350 	return pos + 1;
1351       p++;
1352       endp = mt->data + mt->nbytes;
1353       while (p < endp && *p != '\n')
1354 	p++;
1355       if (p == endp)
1356 	return mt->nchars;
1357       byte_pos = p + 1 - mt->data;
1358       return POS_BYTE_TO_CHAR (mt, byte_pos);
1359     }
1360   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1361     {
1362       unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1363       unsigned short *endp;
1364       unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1365 				? 0x0A00 : 0x000A);
1366 
1367       if (*p == newline)
1368 	return pos + 1;
1369       p++;
1370       endp = (unsigned short *) (mt->data) + mt->nbytes;
1371       while (p < endp && *p != newline)
1372 	p++;
1373       if (p == endp)
1374 	return mt->nchars;
1375       byte_pos = p + 1 - (unsigned short *) (mt->data);
1376       return POS_BYTE_TO_CHAR (mt, byte_pos);
1377     }
1378   else
1379     {
1380       unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1381       unsigned *endp;
1382       unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1383 			  ? 0x0A000000 : 0x0000000A);
1384 
1385       if (*p == newline)
1386 	return pos + 1;
1387       p++, pos++;
1388       endp = (unsigned *) (mt->data) + mt->nbytes;
1389       while (p < endp && *p != newline)
1390 	p++, pos++;
1391       return pos;
1392     }
1393 }
1394 
1395 int
mtext__lowercase(MText * mt,int pos,int end)1396 mtext__lowercase (MText *mt, int pos, int end)
1397 {
1398   int opos = pos;
1399   int c;
1400   MText *orig = NULL;
1401   MSymbol lang;
1402 
1403   if (lowercase_precheck (mt, pos, end))
1404     orig = mtext_dup (mt);
1405 
1406   for (; pos < end; opos++)
1407     {
1408       c = mtext_ref_char (mt, pos);
1409       lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1410 
1411       if (c == 0x03A3 && final_sigma (orig, opos))
1412 	REPLACE (gr03A3);
1413 
1414       else if (lang == Mlt)
1415 	{
1416 	  if (c == 0x00CC)
1417     	    REPLACE (lt00CC);
1418 	  else if (c == 0x00CD)
1419 	    REPLACE (lt00CD);
1420 	  else if (c == 0x0128)
1421 	    REPLACE (lt0128);
1422 	  else if (orig && more_above (orig, opos))
1423 	    {
1424 	      if (c == 0x0049)
1425 		REPLACE (lt0049);
1426 	      else if (c == 0x004A)
1427 		REPLACE (lt004A);
1428 	      else if (c == 0x012E)
1429 		REPLACE (lt012E);
1430 	      else
1431 		LOOKUP;
1432 	    }
1433 	  else
1434 	    LOOKUP;
1435 	}
1436 
1437       else if (lang == Mtr || lang == Maz)
1438 	{
1439 	  if (c == 0x0130)
1440 	    REPLACE (tr0130);
1441 	  else if (c == 0x0307 && after_i (orig, opos))
1442 	    DELETE;
1443 	  else if (c == 0x0049 && ! before_dot (orig, opos))
1444 	    REPLACE (tr0049);
1445 	  else
1446 	    LOOKUP;
1447 	}
1448 
1449       else
1450 	LOOKUP;
1451     }
1452 
1453   if (orig)
1454     m17n_object_unref (orig);
1455 
1456   return end;
1457 }
1458 
1459 int
mtext__titlecase(MText * mt,int pos,int end)1460 mtext__titlecase (MText *mt, int pos, int end)
1461 {
1462   int opos = pos;
1463   int c;
1464   MText *orig = NULL;
1465   MSymbol lang;
1466   MPlist *pl;
1467 
1468   /* Precheck for titlecase is identical to that for uppercase. */
1469   if (uppercase_precheck (mt, pos, end))
1470     orig = mtext_dup (mt);
1471 
1472   for (; pos < end; opos++)
1473     {
1474       c = mtext_ref_char (mt, pos);
1475       lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1476 
1477       if ((lang == Mtr || lang == Maz) && c == 0x0069)
1478 	REPLACE (tr0069);
1479 
1480       else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
1481 	DELETE;
1482 
1483       else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)))
1484 	{
1485 	  /* Titlecase is the 2nd element. */
1486 	  MText *title
1487 	    = (MText *) mplist_value (mplist_next (mplist_value (pl)));
1488 	  int tlen = mtext_len (title);
1489 
1490 	  if (mtext_ref_char (title, 0) != c || tlen > 1)
1491 	    {
1492 	      mtext_replace (mt, pos, pos + 1, title, 0, tlen);
1493 	      pos += tlen;
1494 	      end += tlen - 1;
1495 	    }
1496 
1497 	  else
1498 	    pos++;
1499 	}
1500 
1501       else
1502 	pos++;
1503     }
1504 
1505   if (orig)
1506     m17n_object_unref (orig);
1507 
1508   return end;
1509 }
1510 
1511 int
mtext__uppercase(MText * mt,int pos,int end)1512 mtext__uppercase (MText *mt, int pos, int end)
1513 {
1514   int opos = pos;
1515   int c;
1516   MText *orig = NULL;
1517   MSymbol lang;
1518   MPlist *pl;
1519 
1520   CASE_CONV_INIT (-1);
1521 
1522   if (uppercase_precheck (mt, 0, end))
1523     orig = mtext_dup (mt);
1524 
1525   for (; pos < end; opos++)
1526     {
1527       c = mtext_ref_char (mt, pos);
1528       lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1529 
1530       if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
1531 	DELETE;
1532 
1533       else if ((lang == Mtr || lang == Maz) && c == 0x0069)
1534 	REPLACE (tr0069);
1535 
1536       else
1537 	{
1538 	  if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)) != NULL)
1539 	    {
1540 	      MText *upper;
1541 	      int ulen;
1542 
1543 	      /* Uppercase is the 3rd element. */
1544 	      upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl))));
1545 	      ulen = mtext_len (upper);
1546 
1547 	      if (mtext_ref_char (upper, 0) != c || ulen > 1)
1548 		{
1549 		  mtext_replace (mt, pos, pos + 1, upper, 0, ulen);
1550 		  pos += ulen;
1551 		  end += ulen - 1;
1552 		}
1553 
1554 	      else
1555 		pos++;
1556 	    }
1557 
1558 	  else						 /* pl == NULL */
1559 	    pos++;
1560 	}
1561     }
1562 
1563   if (orig)
1564     m17n_object_unref (orig);
1565 
1566   return end;
1567 }
1568 
1569 /*** @} */
1570 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1571 
1572 
1573 /* External API */
1574 
1575 #ifdef WORDS_BIGENDIAN
1576 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1577 #else
1578 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1579 #endif
1580 
1581 #ifdef WORDS_BIGENDIAN
1582 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1583 #else
1584 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1585 #endif
1586 
1587 /*** @addtogroup m17nMtext */
1588 /*** @{ */
1589 /*=*/
1590 
1591 /***en The symbol whose name is "language".  */
1592 /***ja "language" �Ȥ���̾������ĥ���ܥ�.  */
1593 MSymbol Mlanguage;
1594 
1595 /*=*/
1596 
1597 /***en
1598     @brief Allocate a new M-text.
1599 
1600     The mtext () function allocates a new M-text of length 0 and
1601     returns a pointer to it.  The allocated M-text will not be freed
1602     unless the user explicitly does so with the m17n_object_unref ()
1603     function.  */
1604 
1605 /***ja
1606     @brief ������M-text�������Ƥ�.
1607 
1608     �ؿ� mtext () �ϡ�Ĺ�� 0 �ο����� M-text
1609     �������ơ�����ؤΥݥ������֤���������Ƥ�줿 M-text �ϡ��ؿ�
1610     m17n_object_unref () �ˤ�äƥ桼��������Ū�˹Ԥʤ�ʤ��¤ꡢ��������ʤ���
1611 
1612     @latexonly \IPAlabel{mtext} @endlatexonly  */
1613 
1614 /***
1615     @seealso
1616     m17n_object_unref ()  */
1617 
1618 MText *
mtext()1619 mtext ()
1620 {
1621   MText *mt;
1622 
1623   M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1624   mt->format = MTEXT_FORMAT_US_ASCII;
1625   mt->coverage = MTEXT_COVERAGE_ASCII;
1626   M17N_OBJECT_REGISTER (mtext_table, mt);
1627   return mt;
1628 }
1629 
1630 /***en
1631     @brief Allocate a new M-text with specified data.
1632 
1633     The mtext_from_data () function allocates a new M-text whose
1634     character sequence is specified by array $DATA of $NITEMS
1635     elements.  $FORMAT specifies the format of $DATA.
1636 
1637     When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1638     #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1639     unsigned @c char, and $NITEMS counts by byte.
1640 
1641     When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1642     #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1643     @c unsigned @c short, and $NITEMS counts by unsigned short.
1644 
1645     When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1646     #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1647     @c unsigned, and $NITEMS counts by unsigned.
1648 
1649     The character sequence of the M-text is not modifiable.
1650     The contents of $DATA must not be modified while the M-text is alive.
1651 
1652     The allocated M-text will not be freed unless the user explicitly
1653     does so with the m17n_object_unref () function.  Even in that case,
1654     $DATA is not freed.
1655 
1656     @return
1657     If the operation was successful, mtext_from_data () returns a
1658     pointer to the allocated M-text.  Otherwise it returns @c NULL and
1659     assigns an error code to the external variable #merror_code.  */
1660 /***ja
1661     @brief ����Υǡ������˿����� M-text �������Ƥ�.
1662 
1663     �ؿ� mtext_from_data () �ϡ����ǿ� $NITEMS ������ $DATA
1664     �ǻ��ꤵ�줿ʸ�������Ŀ����� M-text �������Ƥ롣$FORMAT �� $DATA
1665     �Υե����ޥåȤ�����
1666 
1667     $FORMAT �� #MTEXT_FORMAT_US_ASCII �� #MTEXT_FORMAT_UTF_8 �ʤ�С�
1668     $DATA �����Ƥ� @c unsigned @c char ���Ǥ��ꡢ$NITEMS
1669     �ϥХ���ñ�̤�ɽ����Ƥ��롣
1670 
1671     $FORMAT �� #MTEXT_FORMAT_UTF_16LE �� #MTEXT_FORMAT_UTF_16BE �ʤ�С�
1672     $DATA �����Ƥ� @c unsigned @c short ���Ǥ��ꡢ$NITEMS �� unsigned
1673     short ñ�̤Ǥ��롣
1674 
1675     $FORMAT �� #MTEXT_FORMAT_UTF_32LE �� #MTEXT_FORMAT_UTF_32BE �ʤ�С�
1676     $DATA �����Ƥ� @c unsigned ���Ǥ��ꡢ$NITEMS �� unsigned ñ�̤Ǥ��롣
1677 
1678     ������Ƥ�줿 M-text ��ʸ������ѹ��Ǥ��ʤ���$DATA �����Ƥ�
1679     M-text ��ͭ���ʴ֤��ѹ����ƤϤʤ�ʤ���
1680 
1681     ������Ƥ�줿 M-text �ϡ��ؿ� m17n_object_unref ()
1682     �ˤ�äƥ桼��������Ū�˹Ԥʤ�ʤ��¤ꡢ��������ʤ������ξ��Ǥ� $DATA �ϲ�������ʤ���
1683 
1684     @return
1685     ��������������С�mtext_from_data () �ϳ�����Ƥ�줿M-text
1686     �ؤΥݥ������֤��������Ǥʤ���� @c NULL ���֤������ѿ� #merror_code
1687     �˥��顼�����ɤ����ꤹ�롣  */
1688 
1689 /***
1690     @errors
1691     @c MERROR_MTEXT  */
1692 
1693 MText *
mtext_from_data(const void * data,int nitems,enum MTextFormat format)1694 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1695 {
1696   if (nitems < 0
1697       || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1698     MERROR (MERROR_MTEXT, NULL);
1699   return mtext__from_data (data, nitems, format, 0);
1700 }
1701 
1702 /*=*/
1703 
1704 /***en
1705     @brief Get information about the text data in M-text.
1706 
1707     The mtext_data () function returns a pointer to the text data of
1708     M-text $MT.  If $FMT is not NULL, the format of the text data is
1709     stored in it.  If $NUNITS is not NULL, the number of units of the
1710     text data is stored in it.
1711 
1712     If $POS_IDX is not NULL and it points to a non-negative number,
1713     what it points to is a character position.  In this case, the
1714     return value is a pointer to the text data of a character at that
1715     position.
1716 
1717     Otherwise, if $UNIT_IDX is not NULL, it points to a unit position.
1718     In this case, the return value is a pointer to the text data of a
1719     character containing that unit.
1720 
1721     The character position and unit position of the return value are
1722     stored in $POS_IDX and $UNIT_DIX respectively if they are not
1723     NULL.
1724 
1725     <ul>
1726 
1727     <li> If the format of the text data is MTEXT_FORMAT_US_ASCII or
1728     MTEXT_FORMAT_UTF_8, one unit is unsigned char.
1729 
1730     <li> If the format is MTEXT_FORMAT_UTF_16LE or
1731     MTEXT_FORMAT_UTF_16BE, one unit is unsigned short.
1732 
1733     <li> If the format is MTEXT_FORMAT_UTF_32LE or
1734     MTEXT_FORMAT_UTF_32BE, one unit is unsigned int.
1735 
1736     </ul> */
1737 
1738 void *
mtext_data(MText * mt,enum MTextFormat * fmt,int * nunits,int * pos_idx,int * unit_idx)1739 mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits,
1740 	    int *pos_idx, int *unit_idx)
1741 {
1742   void *data;
1743   int pos = 0, unit_pos = 0;
1744 
1745   if (fmt)
1746     *fmt = mt->format;
1747   data = MTEXT_DATA (mt);
1748   if (pos_idx && *pos_idx >= 0)
1749     {
1750       pos = *pos_idx;
1751       if (pos > mtext_nchars (mt))
1752 	MERROR (MERROR_MTEXT, NULL);
1753       unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1754     }
1755   else if (unit_idx)
1756     {
1757       unit_pos = *unit_idx;
1758 
1759       if (unit_pos < 0 || unit_pos > mtext_nbytes (mt))
1760 	MERROR (MERROR_MTEXT, NULL);
1761       pos = POS_BYTE_TO_CHAR (mt, unit_pos);
1762       unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1763     }
1764   if (nunits)
1765     *nunits = mtext_nbytes (mt) - unit_pos;
1766   if (pos_idx)
1767     *pos_idx = pos;
1768   if (unit_idx)
1769     *unit_idx = unit_pos;
1770   if (unit_pos > 0)
1771     {
1772       if (mt->format <= MTEXT_FORMAT_UTF_8)
1773 	data = (unsigned char *) data + unit_pos;
1774       else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1775 	data = (unsigned short *) data + unit_pos;
1776       else
1777 	data = (unsigned int *) data + unit_pos;
1778     }
1779   return data;
1780 }
1781 
1782 /*=*/
1783 
1784 /***en
1785     @brief Number of characters in M-text.
1786 
1787     The mtext_len () function returns the number of characters in
1788     M-text $MT.  */
1789 
1790 /***ja
1791     @brief M-text ���ʸ���ο�.
1792 
1793     �ؿ� mtext_len () �� M-text $MT ���ʸ���ο����֤���
1794 
1795     @latexonly \IPAlabel{mtext_len} @endlatexonly  */
1796 
1797 int
mtext_len(MText * mt)1798 mtext_len (MText *mt)
1799 {
1800   return (mt->nchars);
1801 }
1802 
1803 /*=*/
1804 
1805 /***en
1806     @brief Return the character at the specified position in an M-text.
1807 
1808     The mtext_ref_char () function returns the character at $POS in
1809     M-text $MT.  If an error is detected, it returns -1 and assigns an
1810     error code to the external variable #merror_code.  */
1811 
1812 /***ja
1813     @brief M-text ��λ��ꤵ�줿���֤�ʸ�����֤�.
1814 
1815     �ؿ� mtext_ref_char () �ϡ�M-text $MT �ΰ��� $POS
1816     ��ʸ�����֤������顼�����Ф��줿���� -1 ���֤��������ѿ� #merror_code
1817     �˥��顼�����ɤ����ꤹ�롣
1818 
1819     @latexonly \IPAlabel{mtext_ref_char} @endlatexonly  */
1820 
1821 /***
1822     @errors
1823     @c MERROR_RANGE  */
1824 
1825 int
mtext_ref_char(MText * mt,int pos)1826 mtext_ref_char (MText *mt, int pos)
1827 {
1828   int c;
1829 
1830   M_CHECK_POS (mt, pos, -1);
1831   if (mt->format <= MTEXT_FORMAT_UTF_8)
1832     {
1833       unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1834 
1835       c = STRING_CHAR_UTF8 (p);
1836     }
1837   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1838     {
1839       unsigned short *p
1840 	= (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1841       unsigned short p1[2];
1842 
1843       if (mt->format != MTEXT_FORMAT_UTF_16)
1844 	{
1845 	  p1[0] = SWAP_16 (*p);
1846 	  if (p1[0] >= 0xD800 && p1[0] < 0xDC00)
1847 	    p1[1] = SWAP_16 (p[1]);
1848 	  p = p1;
1849 	}
1850       c = STRING_CHAR_UTF16 (p);
1851     }
1852   else
1853     {
1854       c = ((unsigned *) (mt->data))[pos];
1855       if (mt->format != MTEXT_FORMAT_UTF_32)
1856 	c = SWAP_32 (c);
1857     }
1858   return c;
1859 }
1860 
1861 /*=*/
1862 
1863 /***en
1864     @brief Store a character into an M-text.
1865 
1866     The mtext_set_char () function sets character $C, which has no
1867     text properties, at $POS in M-text $MT.
1868 
1869     @return
1870     If the operation was successful, mtext_set_char () returns 0.
1871     Otherwise it returns -1 and assigns an error code to the external
1872     variable #merror_code.  */
1873 
1874 /***ja
1875     @brief M-text �˰�ʸ�������ꤹ��.
1876 
1877     �ؿ� mtext_set_char () �ϡ��ƥ����ȥץ�ѥƥ�̵����ʸ�� $C ��
1878     M-text $MT �ΰ��� $POS �����ꤹ�롣
1879 
1880     @return
1881     ��������������� mtext_set_char () �� 0 ���֤������Ԥ���� -1
1882     ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣
1883 
1884     @latexonly \IPAlabel{mtext_set_char} @endlatexonly  */
1885 
1886 /***
1887     @errors
1888     @c MERROR_RANGE */
1889 
1890 int
mtext_set_char(MText * mt,int pos,int c)1891 mtext_set_char (MText *mt, int pos, int c)
1892 {
1893   int pos_unit;
1894   int old_units, new_units;
1895   int delta;
1896   unsigned char *p;
1897   int unit_bytes;
1898 
1899   M_CHECK_POS (mt, pos, -1);
1900   M_CHECK_READONLY (mt, -1);
1901 
1902   mtext__adjust_plist_for_change (mt, pos, 1, 1);
1903 
1904   if (mt->format <= MTEXT_FORMAT_UTF_8)
1905     {
1906       if (c >= 0x80)
1907 	mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL;
1908     }
1909   else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1910     {
1911       if (c >= 0x110000)
1912 	mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1913       else if (mt->format != MTEXT_FORMAT_UTF_16)
1914 	mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1915     }
1916   else if (mt->format != MTEXT_FORMAT_UTF_32)
1917     mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1918 
1919   unit_bytes = UNIT_BYTES (mt->format);
1920   pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1921   p = mt->data + pos_unit * unit_bytes;
1922   old_units = CHAR_UNITS_AT (mt, p);
1923   new_units = CHAR_UNITS (c, mt->format);
1924   delta = new_units - old_units;
1925 
1926   if (delta)
1927     {
1928       if (mt->cache_char_pos > pos)
1929 	mt->cache_byte_pos += delta;
1930 
1931       if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1932 	{
1933 	  mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1934 	  MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1935 	}
1936 
1937       memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1938 	       mt->data + (pos_unit + old_units) * unit_bytes,
1939 	       (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1940       mt->nbytes += delta;
1941       mt->data[mt->nbytes * unit_bytes] = 0;
1942     }
1943   switch (mt->format)
1944     {
1945     case MTEXT_FORMAT_US_ASCII:
1946       mt->data[pos_unit] = c;
1947       break;
1948     case MTEXT_FORMAT_UTF_8:
1949       {
1950 	unsigned char *p = mt->data + pos_unit;
1951 	CHAR_STRING_UTF8 (c, p);
1952 	break;
1953       }
1954     default:
1955       if (mt->format == MTEXT_FORMAT_UTF_16)
1956 	{
1957 	  unsigned short *p = (unsigned short *) mt->data + pos_unit;
1958 
1959 	  CHAR_STRING_UTF16 (c, p);
1960 	}
1961       else
1962 	((unsigned *) mt->data)[pos_unit] = c;
1963     }
1964   return 0;
1965 }
1966 
1967 /*=*/
1968 
1969 /***en
1970     @brief  Append a character to an M-text.
1971 
1972     The mtext_cat_char () function appends character $C, which has no
1973     text properties, to the end of M-text $MT.
1974 
1975     @return
1976     This function returns a pointer to the resulting M-text $MT.  If
1977     $C is an invalid character, it returns @c NULL.  */
1978 
1979 /***ja
1980     @brief M-text �˰�ʸ���ɲä���.
1981 
1982     �ؿ� mtext_cat_char () �ϡ��ƥ����ȥץ�ѥƥ�̵����ʸ�� $C ��
1983     M-text $MT ���������ɲä��롣
1984 
1985     @return
1986     ���δؿ����ѹ����줿 M-text $MT �ؤΥݥ������֤���$C
1987     ��������ʸ���Ǥʤ����ˤ� @c NULL ���֤���  */
1988 
1989 /***
1990     @seealso
1991     mtext_cat (), mtext_ncat ()  */
1992 
1993 MText *
mtext_cat_char(MText * mt,int c)1994 mtext_cat_char (MText *mt, int c)
1995 {
1996   int nunits;
1997   int unit_bytes = UNIT_BYTES (mt->format);
1998 
1999   M_CHECK_READONLY (mt, NULL);
2000   if (c < 0 || c > MCHAR_MAX)
2001     return NULL;
2002   mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
2003 
2004   if (c >= 0x80
2005       && (mt->format == MTEXT_FORMAT_US_ASCII
2006 	  || (c >= 0x10000
2007 	      && (mt->format == MTEXT_FORMAT_UTF_16LE
2008 		  || mt->format == MTEXT_FORMAT_UTF_16BE))))
2009 
2010     {
2011       mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2012       unit_bytes = 1;
2013     }
2014   else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2015     {
2016       if (mt->format != MTEXT_FORMAT_UTF_32)
2017 	mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2018     }
2019   else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2020     {
2021       if (mt->format != MTEXT_FORMAT_UTF_16)
2022 	mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2023     }
2024 
2025   nunits = CHAR_UNITS (c, mt->format);
2026   if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
2027     {
2028       mt->allocated = (mt->nbytes + nunits * 16 + 1) * unit_bytes;
2029       MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2030     }
2031 
2032   if (mt->format <= MTEXT_FORMAT_UTF_8)
2033     {
2034       unsigned char *p = mt->data + mt->nbytes;
2035       p += CHAR_STRING_UTF8 (c, p);
2036       *p = 0;
2037     }
2038   else if (mt->format == MTEXT_FORMAT_UTF_16)
2039     {
2040       unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
2041       p += CHAR_STRING_UTF16 (c, p);
2042       *p = 0;
2043     }
2044   else
2045     {
2046       unsigned *p = (unsigned *) mt->data + mt->nbytes;
2047       *p++ = c;
2048       *p = 0;
2049     }
2050 
2051   mt->nchars++;
2052   mt->nbytes += nunits;
2053   return mt;
2054 }
2055 
2056 /*=*/
2057 
2058 /***en
2059     @brief  Create a copy of an M-text.
2060 
2061     The mtext_dup () function creates a copy of M-text $MT while
2062     inheriting all the text properties of $MT.
2063 
2064     @return
2065     This function returns a pointer to the created copy.  */
2066 
2067 /***ja
2068     @brief M-text �Υ��ԡ�����.
2069 
2070     �ؿ� mtext_dup () �ϡ�M-text $MT �Υ��ԡ����롣$MT
2071     �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣
2072 
2073     @return
2074     ���δؿ��Ϻ��줿���ԡ��ؤΥݥ������֤���
2075 
2076      @latexonly \IPAlabel{mtext_dup} @endlatexonly  */
2077 
2078 /***
2079     @seealso
2080     mtext_duplicate ()  */
2081 
2082 MText *
mtext_dup(MText * mt)2083 mtext_dup (MText *mt)
2084 {
2085   return mtext_duplicate (mt, 0, mtext_nchars (mt));
2086 }
2087 
2088 /*=*/
2089 
2090 /***en
2091     @brief  Append an M-text to another.
2092 
2093     The mtext_cat () function appends M-text $MT2 to the end of M-text
2094     $MT1 while inheriting all the text properties.  $MT2 itself is not
2095     modified.
2096 
2097     @return
2098     This function returns a pointer to the resulting M-text $MT1.  */
2099 
2100 /***ja
2101     @brief 2�Ĥ� M-text��Ϣ�뤹��.
2102 
2103     �ؿ� mtext_cat () �ϡ� M-text $MT2 �� M-text $MT1
2104     ���������դ��ä��롣$MT2 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT2 ���ѹ�����ʤ���
2105 
2106     @return
2107     ���δؿ����ѹ����줿 M-text $MT1 �ؤΥݥ������֤���
2108 
2109     @latexonly \IPAlabel{mtext_cat} @endlatexonly  */
2110 
2111 /***
2112     @seealso
2113     mtext_ncat (), mtext_cat_char ()  */
2114 
2115 MText *
mtext_cat(MText * mt1,MText * mt2)2116 mtext_cat (MText *mt1, MText *mt2)
2117 {
2118   M_CHECK_READONLY (mt1, NULL);
2119 
2120   if (mt2->nchars > 0)
2121     insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
2122   return mt1;
2123 }
2124 
2125 
2126 /*=*/
2127 
2128 /***en
2129     @brief Append a part of an M-text to another.
2130 
2131     The mtext_ncat () function appends the first $N characters of
2132     M-text $MT2 to the end of M-text $MT1 while inheriting all the
2133     text properties.  If the length of $MT2 is less than $N, all
2134     characters are copied.  $MT2 is not modified.
2135 
2136     @return
2137     If the operation was successful, mtext_ncat () returns a
2138     pointer to the resulting M-text $MT1.  If an error is detected, it
2139     returns @c NULL and assigns an error code to the global variable
2140     #merror_code.  */
2141 
2142 /***ja
2143     @brief M-text �ΰ������̤� M-text ���ղä���.
2144 
2145     �ؿ� mtext_ncat () �ϡ�M-text $MT2 �ΤϤ���� $N ʸ���� M-text
2146     $MT1 ���������դ��ä��롣$MT2 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT2
2147     ��Ĺ���� $N �ʲ��ʤ�С�$MT2 �Τ��٤Ƥ�ʸ�����ղä���롣 $MT2 ���ѹ�����ʤ���
2148 
2149     @return
2150     ����������������硢mtext_ncat () ���ѹ����줿 M-text $MT1
2151     �ؤΥݥ������֤������顼�����Ф��줿���� @c NULL ���֤��������ѿ�
2152     #merror_code �˥��顼�����ɤ����ꤹ�롣
2153 
2154     @latexonly \IPAlabel{mtext_ncat} @endlatexonly  */
2155 
2156 /***
2157     @errors
2158     @c MERROR_RANGE
2159 
2160     @seealso
2161     mtext_cat (), mtext_cat_char ()  */
2162 
2163 MText *
mtext_ncat(MText * mt1,MText * mt2,int n)2164 mtext_ncat (MText *mt1, MText *mt2, int n)
2165 {
2166   M_CHECK_READONLY (mt1, NULL);
2167   if (n < 0)
2168     MERROR (MERROR_RANGE, NULL);
2169   if (mt2->nchars > 0)
2170     insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2171   return mt1;
2172 }
2173 
2174 
2175 /*=*/
2176 
2177 /***en
2178     @brief Copy an M-text to another.
2179 
2180     The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
2181     inheriting all the text properties.  The old text in $MT1 is
2182     overwritten and the length of $MT1 is extended if necessary.  $MT2
2183     is not modified.
2184 
2185     @return
2186     This function returns a pointer to the resulting M-text $MT1.  */
2187 
2188 /***ja
2189     @brief M-text ���̤� M-text �˥��ԡ�����.
2190 
2191     �ؿ� mtext_cpy () �� M-text $MT2 �� M-text $MT1 �˾�����ԡ����롣
2192     $MT2 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT1
2193     ��Ĺ����ɬ�פ˱����ƿ��Ф���롣$MT2 ���ѹ�����ʤ���
2194 
2195     @return
2196     ���δؿ����ѹ����줿 M-text $MT1 �ؤΥݥ������֤���
2197 
2198     @latexonly \IPAlabel{mtext_cpy} @endlatexonly  */
2199 
2200 /***
2201     @seealso
2202     mtext_ncpy (), mtext_copy ()  */
2203 
2204 MText *
mtext_cpy(MText * mt1,MText * mt2)2205 mtext_cpy (MText *mt1, MText *mt2)
2206 {
2207   M_CHECK_READONLY (mt1, NULL);
2208   mtext_del (mt1, 0, mt1->nchars);
2209   if (mt2->nchars > 0)
2210     insert (mt1, 0, mt2, 0, mt2->nchars);
2211   return mt1;
2212 }
2213 
2214 /*=*/
2215 
2216 /***en
2217     @brief Copy the first some characters in an M-text to another.
2218 
2219     The mtext_ncpy () function copies the first $N characters of
2220     M-text $MT2 to M-text $MT1 while inheriting all the text
2221     properties.  If the length of $MT2 is less than $N, all characters
2222     of $MT2 are copied.  The old text in $MT1 is overwritten and the
2223     length of $MT1 is extended if necessary.  $MT2 is not modified.
2224 
2225     @return
2226     If the operation was successful, mtext_ncpy () returns a pointer
2227     to the resulting M-text $MT1.  If an error is detected, it returns
2228     @c NULL and assigns an error code to the global variable
2229     #merror_code.  */
2230 
2231 /***ja
2232     @brief M-text �˴ޤޤ��ǽ�β�ʸ�������ԡ�����.
2233 
2234     �ؿ� mtext_ncpy () �ϡ�M-text $MT2 �κǽ�� $N ʸ���� M-text $MT1
2235     �˾�����ԡ����롣$MT2 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣�⤷ $MT2
2236     ��Ĺ���� $N ���⾮������� $MT2 �Τ��٤Ƥ�ʸ�����ԡ����롣$MT1
2237     ��Ĺ����ɬ�פ˱����ƿ��Ф���롣$MT2 ���ѹ�����ʤ���
2238 
2239     @return
2240     ����������������硢mtext_ncpy () ���ѹ����줿 M-text $MT1
2241     �ؤΥݥ������֤������顼�����Ф��줿���� @c NULL ���֤��������ѿ�
2242     #merror_code �˥��顼�����ɤ����ꤹ�롣
2243 
2244     @latexonly \IPAlabel{mtext_ncpy} @endlatexonly  */
2245 
2246 /***
2247     @errors
2248     @c MERROR_RANGE
2249 
2250     @seealso
2251     mtext_cpy (), mtext_copy ()  */
2252 
2253 MText *
mtext_ncpy(MText * mt1,MText * mt2,int n)2254 mtext_ncpy (MText *mt1, MText *mt2, int n)
2255 {
2256   M_CHECK_READONLY (mt1, NULL);
2257   if (n < 0)
2258     MERROR (MERROR_RANGE, NULL);
2259   mtext_del (mt1, 0, mt1->nchars);
2260   if (mt2->nchars > 0)
2261     insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2262   return mt1;
2263 }
2264 
2265 /*=*/
2266 
2267 /***en
2268     @brief Create a new M-text from a part of an existing M-text.
2269 
2270     The mtext_duplicate () function creates a copy of sub-text of
2271     M-text $MT, starting at $FROM (inclusive) and ending at $TO
2272     (exclusive) while inheriting all the text properties of $MT.  $MT
2273     itself is not modified.
2274 
2275     @return
2276     If the operation was successful, mtext_duplicate ()
2277     returns a pointer to the created M-text.  If an error is detected,
2278     it returns NULL and assigns an error code to the external variable
2279     #merror_code.  */
2280 
2281 /***ja
2282     @brief ��¸�� M-text �ΰ������鿷���� M-text ��Ĥ���.
2283 
2284     �ؿ� mtext_duplicate () �ϡ�M-text $MT �� $FROM ��$FROM ���Τ�ޤ�ˤ���
2285     $TO ��$TO ���Τϴޤޤʤ��ˤޤǤ���ʬ�Υ��ԡ����롣���ΤȤ� $MT
2286     �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT ���Τ�Τ��ѹ�����ʤ���
2287 
2288     @return
2289     ��������������С�mtext_duplicate () �Ϻ��줿 M-text
2290     �ؤΥݥ������֤������顼�����Ф��줿���� @c NULL ���֤��������ѿ�
2291     #merror_code �˥��顼�����ɤ����ꤹ�롣
2292 
2293     @latexonly \IPAlabel{mtext_duplicate} @endlatexonly  */
2294 
2295 /***
2296     @errors
2297     @c MERROR_RANGE
2298 
2299     @seealso
2300     mtext_dup ()  */
2301 
2302 MText *
mtext_duplicate(MText * mt,int from,int to)2303 mtext_duplicate (MText *mt, int from, int to)
2304 {
2305   MText *new = mtext ();
2306 
2307   M_CHECK_RANGE (mt, from, to, NULL, new);
2308   new->format = mt->format;
2309   new->coverage = mt->coverage;
2310   insert (new, 0, mt, from, to);
2311   return new;
2312 }
2313 
2314 /*=*/
2315 
2316 /***en
2317     @brief Copy characters in the specified range into an M-text.
2318 
2319     The mtext_copy () function copies the text between $FROM
2320     (inclusive) and $TO (exclusive) in M-text $MT2 to the region
2321     starting at $POS in M-text $MT1 while inheriting the text
2322     properties.  The old text in $MT1 is overwritten and the length of
2323     $MT1 is extended if necessary.  $MT2 is not modified.
2324 
2325     @return
2326     If the operation was successful, mtext_copy () returns a pointer
2327     to the modified $MT1.  Otherwise, it returns @c NULL and assigns
2328     an error code to the external variable #merror_code.  */
2329 
2330 /***ja
2331     @brief M-text �˻����ϰϤ�ʸ�����ԡ�����.
2332 
2333     �ؿ� mtext_copy () �ϡ� M-text $MT2 �� $FROM ��$FROM ���Τ�ޤ�ˤ���
2334     $TO ��$TO ���Τϴޤޤʤ��ˤޤǤ��ϰϤΥƥ����Ȥ� M-text $MT1 �ΰ��� $POS
2335     ���������ԡ����롣$MT2 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT1
2336     ��Ĺ����ɬ�פ˱����ƿ��Ф���롣$MT2 ���ѹ�����ʤ���
2337 
2338     @latexonly \IPAlabel{mtext_copy} @endlatexonly
2339 
2340     @return
2341     ����������������硢mtext_copy () ���ѹ����줿 $MT1
2342     �ؤΥݥ������֤��������Ǥʤ���� @c NULL ���֤��������ѿ� #merror_code
2343     �˥��顼�����ɤ����ꤹ�롣  */
2344 
2345 /***
2346     @errors
2347     @c MERROR_RANGE
2348 
2349     @seealso
2350     mtext_cpy (), mtext_ncpy ()  */
2351 
2352 MText *
mtext_copy(MText * mt1,int pos,MText * mt2,int from,int to)2353 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
2354 {
2355   M_CHECK_POS_X (mt1, pos, NULL);
2356   M_CHECK_READONLY (mt1, NULL);
2357   M_CHECK_RANGE_X (mt2, from, to, NULL);
2358   mtext_del (mt1, pos, mt1->nchars);
2359   return insert (mt1, pos, mt2, from, to);
2360 }
2361 
2362 /*=*/
2363 
2364 
2365 /***en
2366     @brief Delete characters in the specified range destructively.
2367 
2368     The mtext_del () function deletes the characters in the range
2369     $FROM (inclusive) and $TO (exclusive) from M-text $MT
2370     destructively.  As a result, the length of $MT shrinks by ($TO -
2371     $FROM) characters.
2372 
2373     @return
2374     If the operation was successful, mtext_del () returns 0.
2375     Otherwise, it returns -1 and assigns an error code to the external
2376     variable #merror_code.  */
2377 
2378 /***ja
2379     @brief �����ϰϤ�ʸ�����˲�Ū�˼�����.
2380 
2381     �ؿ� mtext_del () �ϡ�M-text $MT �� $FROM ��$FROM ���Τ�ޤ�ˤ���
2382     $TO ��$TO ���Τϴޤޤʤ��ˤޤǤ�ʸ�����˲�Ū�˼����������Ū��
2383     $MT ��Ĺ���� ($TO @c - $FROM) �����̤ळ�Ȥˤʤ롣
2384 
2385     @return
2386     ��������������� mtext_del () �� 0 ���֤��������Ǥʤ���� -1
2387     ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣  */
2388 
2389 /***
2390     @errors
2391     @c MERROR_RANGE
2392 
2393     @seealso
2394     mtext_ins ()  */
2395 
2396 int
mtext_del(MText * mt,int from,int to)2397 mtext_del (MText *mt, int from, int to)
2398 {
2399   int from_byte, to_byte;
2400   int unit_bytes = UNIT_BYTES (mt->format);
2401 
2402   M_CHECK_READONLY (mt, -1);
2403   M_CHECK_RANGE (mt, from, to, -1, 0);
2404 
2405   from_byte = POS_CHAR_TO_BYTE (mt, from);
2406   to_byte = POS_CHAR_TO_BYTE (mt, to);
2407 
2408   if (mt->cache_char_pos >= to)
2409     {
2410       mt->cache_char_pos -= to - from;
2411       mt->cache_byte_pos -= to_byte - from_byte;
2412     }
2413   else if (mt->cache_char_pos > from)
2414     {
2415       mt->cache_char_pos -= from;
2416       mt->cache_byte_pos -= from_byte;
2417     }
2418 
2419   mtext__adjust_plist_for_delete (mt, from, to - from);
2420   memmove (mt->data + from_byte * unit_bytes,
2421 	   mt->data + to_byte * unit_bytes,
2422 	   (mt->nbytes - to_byte + 1) * unit_bytes);
2423   mt->nchars -= (to - from);
2424   mt->nbytes -= (to_byte - from_byte);
2425   mt->cache_char_pos = from;
2426   mt->cache_byte_pos = from_byte;
2427   return 0;
2428 }
2429 
2430 
2431 /*=*/
2432 
2433 /***en
2434     @brief Insert an M-text into another M-text.
2435 
2436     The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
2437     position $POS.  As a result, $MT1 is lengthen by the length of
2438     $MT2.  On insertion, all the text properties of $MT2 are
2439     inherited.  The original $MT2 is not modified.
2440 
2441     @return
2442     If the operation was successful, mtext_ins () returns 0.
2443     Otherwise, it returns -1 and assigns an error code to the external
2444     variable #merror_code.  */
2445 
2446 /***ja
2447     @brief M-text ���̤� M-text ����������.
2448 
2449     �ؿ� mtext_ins () �� M-text $MT1 �� $POS �ΰ��֤��̤� M-text $MT2
2450     ���������롣���η�� $MT1 ��Ĺ���� $MT2 ��Ĺ��ʬ���������롣�����κݡ�$MT2
2451     �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT2 ���Τ�Τ��ѹ�����ʤ���
2452 
2453     @return
2454     ��������������� mtext_ins () �� 0 ���֤��������Ǥʤ���� -1
2455     ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣  */
2456 
2457 /***
2458     @errors
2459     @c MERROR_RANGE , @c MERROR_MTEXT
2460 
2461     @seealso
2462     mtext_del () , mtext_insert ()  */
2463 
2464 int
mtext_ins(MText * mt1,int pos,MText * mt2)2465 mtext_ins (MText *mt1, int pos, MText *mt2)
2466 {
2467   M_CHECK_READONLY (mt1, -1);
2468   M_CHECK_POS_X (mt1, pos, -1);
2469 
2470   if (mt2->nchars == 0)
2471     return 0;
2472   insert (mt1, pos, mt2, 0, mt2->nchars);
2473   return 0;
2474 }
2475 
2476 /*=*/
2477 
2478 /***en
2479     @brief Insert sub-text of an M-text into another M-text.
2480 
2481     The mtext_insert () function inserts sub-text of M-text $MT2
2482     between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at
2483     position $POS.  As a result, $MT1 is lengthen by ($TO - $FROM).
2484     On insertion, all the text properties of the sub-text of $MT2 are
2485     inherited.
2486 
2487     @return
2488     If the operation was successful, mtext_insert () returns
2489     0.  Otherwise, it returns -1 and assigns an error code to the
2490     external variable #merror_code.  */
2491 
2492 /***ja
2493     @brief M-text �ΰ������̤� M-text ����������.
2494 
2495     �ؿ� mtext_insert () �� M-text $MT1 ��� $POS �ΰ��֤ˡ��̤�
2496     M-text $MT2 �� $FROM ��$FROM ���Τ�ޤ�ˤ��� $TO ��$TO ���Τϴޤ�
2497     �ʤ��ˤޤǤ�ʸ�����������롣���Ū�� $MT1 ��Ĺ���� ($TO - $FROM)
2498     �������Ӥ롣�����κݡ� $MT2 ��Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����
2499     �롣
2500 
2501     @return
2502     ��������������С�mtext_insert () �� 0 ���֤��������Ǥʤ���� -1
2503     ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣  */
2504 
2505 /***
2506     @errors
2507     @c MERROR_MTEXT , @c MERROR_RANGE
2508 
2509     @seealso
2510     mtext_ins ()  */
2511 
2512 int
mtext_insert(MText * mt1,int pos,MText * mt2,int from,int to)2513 mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to)
2514 {
2515   M_CHECK_READONLY (mt1, -1);
2516   M_CHECK_POS_X (mt1, pos, -1);
2517   M_CHECK_RANGE (mt2, from, to, -1, 0);
2518 
2519   insert (mt1, pos, mt2, from, to);
2520   return 0;
2521 }
2522 
2523 /*=*/
2524 
2525 /***en
2526     @brief Insert a character into an M-text.
2527 
2528     The mtext_ins_char () function inserts $N copies of character $C
2529     into M-text $MT at position $POS.  As a result, $MT is lengthen by
2530     $N.
2531 
2532     @return
2533     If the operation was successful, mtext_ins () returns 0.
2534     Otherwise, it returns -1 and assigns an error code to the external
2535     variable #merror_code.  */
2536 
2537 /***ja
2538     @brief M-text ��ʸ������������.
2539 
2540     �ؿ� mtext_ins_char () �� M-text $MT �� $POS �ΰ��֤�ʸ�� $C �Υ��ԡ��� $N
2541     ���������롣���η�� $MT1 ��Ĺ���� $N ���������롣
2542 
2543     @return
2544     ��������������� mtext_ins_char () �� 0 ���֤��������Ǥʤ���� -1
2545     ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣  */
2546 
2547 /***
2548     @errors
2549     @c MERROR_RANGE
2550 
2551     @seealso
2552     mtext_ins, mtext_del ()  */
2553 
2554 int
mtext_ins_char(MText * mt,int pos,int c,int n)2555 mtext_ins_char (MText *mt, int pos, int c, int n)
2556 {
2557   int nunits;
2558   int unit_bytes = UNIT_BYTES (mt->format);
2559   int pos_unit;
2560   int i;
2561 
2562   M_CHECK_READONLY (mt, -1);
2563   M_CHECK_POS_X (mt, pos, -1);
2564   if (c < 0 || c > MCHAR_MAX)
2565     MERROR (MERROR_MTEXT, -1);
2566   if (n <= 0)
2567     return 0;
2568   mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2569 
2570   if (c >= 0x80
2571       && (mt->format == MTEXT_FORMAT_US_ASCII
2572 	  || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2573 			       || mt->format == MTEXT_FORMAT_UTF_16BE))))
2574     {
2575       mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2576       unit_bytes = 1;
2577     }
2578   else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2579     {
2580       if (mt->format != MTEXT_FORMAT_UTF_32)
2581 	mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2582     }
2583   else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2584     {
2585       if (mt->format != MTEXT_FORMAT_UTF_16)
2586 	mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2587     }
2588 
2589   nunits = CHAR_UNITS (c, mt->format);
2590   if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2591     {
2592       mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2593       MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2594     }
2595   pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2596   if (mt->cache_char_pos > pos)
2597     {
2598       mt->cache_char_pos += n;
2599       mt->cache_byte_pos += nunits * n;
2600     }
2601   memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2602 	   mt->data + pos_unit * unit_bytes,
2603 	   (mt->nbytes - pos_unit + 1) * unit_bytes);
2604   if (mt->format <= MTEXT_FORMAT_UTF_8)
2605     {
2606       unsigned char *p = mt->data + pos_unit;
2607 
2608       for (i = 0; i < n; i++)
2609 	p += CHAR_STRING_UTF8 (c, p);
2610     }
2611   else if (mt->format == MTEXT_FORMAT_UTF_16)
2612     {
2613       unsigned short *p = (unsigned short *) mt->data + pos_unit;
2614 
2615       for (i = 0; i < n; i++)
2616 	p += CHAR_STRING_UTF16 (c, p);
2617     }
2618   else
2619     {
2620       unsigned *p = (unsigned *) mt->data + pos_unit;
2621 
2622       for (i = 0; i < n; i++)
2623 	*p++ = c;
2624     }
2625   mt->nchars += n;
2626   mt->nbytes += nunits * n;
2627   return 0;
2628 }
2629 
2630 /*=*/
2631 
2632 /***en
2633     @brief Replace sub-text of M-text with another.
2634 
2635     The mtext_replace () function replaces sub-text of M-text $MT1
2636     between $FROM1 (inclusive) and $TO1 (exclusive) with the sub-text
2637     of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusive).
2638     The new sub-text inherits text properties of the old sub-text.
2639 
2640     @return
2641     If the operation was successful, mtext_replace () returns
2642     0.  Otherwise, it returns -1 and assigns an error code to the
2643     external variable #merror_code.  */
2644 
2645 /***ja
2646     @brief M-text �ΰ������̤� M-text �ΰ������ִ�����.
2647 
2648     �ؿ� mtext_replace () �ϡ� M-text $MT1 �� $FROM1 ��$FROM1 ���Τ��
2649     ��ˤ��� $TO1 ��$TO1 ���Τϴޤޤʤ��ˤޤǤ� M-text $MT2 ��
2650     $FROM2 ��$FROM2 ���Τ�ޤ�ˤ��� $TO2 ��$TO2 ���Τϴޤޤʤ��ˤ���
2651     �������롣�������������줿��ʬ�ϡ��֤����������Υƥ����ȥץ�ѥƥ�
2652     ���٤Ƥ�Ѿ����롣
2653 
2654     @return
2655     ��������������С� mtext_replace () �� 0 ���֤��������Ǥ�
2656     ����� -1 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣  */
2657 
2658 /***
2659     @errors
2660     @c MERROR_MTEXT , @c MERROR_RANGE
2661 
2662     @seealso
2663     mtext_insert ()  */
2664 
2665 int
mtext_replace(MText * mt1,int from1,int to1,MText * mt2,int from2,int to2)2666 mtext_replace (MText *mt1, int from1, int to1,
2667 	       MText *mt2, int from2, int to2)
2668 {
2669   int len1, len2;
2670   int from1_byte, from2_byte, old_bytes, new_bytes;
2671   int unit_bytes, total_bytes;
2672   unsigned char *p;
2673   int free_mt2 = 0;
2674 
2675   M_CHECK_READONLY (mt1, -1);
2676   M_CHECK_RANGE_X (mt1, from1, to1, -1);
2677   M_CHECK_RANGE_X (mt2, from2, to2, -1);
2678 
2679   if (from1 == to1)
2680     {
2681       struct MTextPlist *saved = mt2->plist;
2682 
2683       mt2->plist = NULL;
2684       insert (mt1, from1, mt2, from2, to2);
2685       mt2->plist = saved;
2686       return 0;
2687     }
2688 
2689   if (from2 == to2)
2690     {
2691       return mtext_del (mt1, from1, to1);
2692     }
2693 
2694   if (mt1 == mt2)
2695     {
2696       mt2 = mtext_duplicate (mt2, from2, to2);
2697       to2 -= from2;
2698       from2 = 0;
2699       free_mt2 = 1;
2700     }
2701 
2702   if (mt1->format != mt2->format
2703       && mt1->format == MTEXT_FORMAT_US_ASCII)
2704     mt1->format = MTEXT_FORMAT_UTF_8;
2705   if (mt1->format != mt2->format
2706       && mt1->coverage < mt2->coverage)
2707     mtext__adjust_format (mt1, mt2->format);
2708   if (mt1->format != mt2->format)
2709     {
2710       mt2 = mtext_duplicate (mt2, from2, to2);
2711       mtext__adjust_format (mt2, mt1->format);
2712       to2 -= from2;
2713       from2 = 0;
2714       free_mt2 = 1;
2715     }
2716 
2717   len1 = to1 - from1;
2718   len2 = to2 - from2;
2719   mtext__adjust_plist_for_change (mt1, from1, len1, len2);
2720 
2721   unit_bytes = UNIT_BYTES (mt1->format);
2722   from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes;
2723   from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes;
2724   old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte;
2725   new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte;
2726   total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes);
2727   if (total_bytes + unit_bytes > mt1->allocated)
2728     {
2729       mt1->allocated = total_bytes + unit_bytes;
2730       MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
2731     }
2732   p = mt1->data + from1_byte;
2733   if (to1 < mt1->nchars
2734       && old_bytes != new_bytes)
2735     memmove (p + new_bytes, p + old_bytes,
2736 	     (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes));
2737   memcpy (p, mt2->data + from2_byte, new_bytes);
2738   mt1->nchars += len2 - len1;
2739   mt1->nbytes += (new_bytes - old_bytes) / unit_bytes;
2740   if (mt1->cache_char_pos >= to1)
2741     {
2742       mt1->cache_char_pos += len2 - len1;
2743       mt1->cache_byte_pos += new_bytes - old_bytes;
2744     }
2745   else if (mt1->cache_char_pos > from1)
2746     {
2747       mt1->cache_char_pos = from1;
2748       mt1->cache_byte_pos = from1_byte;
2749     }
2750 
2751   if (free_mt2)
2752     M17N_OBJECT_UNREF (mt2);
2753   return 0;
2754 }
2755 
2756 /*=*/
2757 
2758 /***en
2759     @brief Search a character in an M-text.
2760 
2761     The mtext_character () function searches M-text $MT for character
2762     $C.  If $FROM is less than $TO, the search begins at position $FROM
2763     and goes forward but does not exceed ($TO - 1).  Otherwise, the search
2764     begins at position ($FROM - 1) and goes backward but does not
2765     exceed $TO.  An invalid position specification is regarded as both
2766     $FROM and $TO being 0.
2767 
2768     @return
2769     If $C is found, mtext_character () returns the position of its
2770     first occurrence.  Otherwise it returns -1 without changing the
2771     external variable #merror_code.  If an error is detected, it returns -1 and
2772     assigns an error code to the external variable #merror_code.  */
2773 
2774 /***ja
2775     @brief M-text ���ʸ����õ��.
2776 
2777     �ؿ� mtext_character () �� M-text $MT ���ʸ�� $C ��õ�����⤷
2778     $FROM �� $TO ��꾮������С�õ���ϰ��� $FROM �������������ء�����
2779     ($TO - 1) �ޤǿʤࡣ�����Ǥʤ���а��� ($FROM - 1) ������Ƭ�����ء�����
2780     $TO �ޤǿʤࡣ���֤λ���˸�꤬������ϡ�$FROM �� $TO
2781     ��ξ���� 0 �����ꤵ�줿��ΤȤߤʤ���
2782 
2783     @return
2784     �⤷ $C �����Ĥ���С�mtext_character ()
2785     �Ϥ��κǽ�νи����֤��֤������Ĥ���ʤ��ä����ϳ����ѿ� #merror_code
2786     ���ѹ������� -1 ���֤������顼�����Ф��줿���� -1 ���֤��������ѿ�
2787     #merror_code �˥��顼�����ɤ����ꤹ�롣  */
2788 
2789 /***
2790     @seealso
2791     mtext_chr(), mtext_rchr ()  */
2792 
2793 int
mtext_character(MText * mt,int from,int to,int c)2794 mtext_character (MText *mt, int from, int to, int c)
2795 {
2796   if (from < to)
2797     {
2798       /* We do not use M_CHECK_RANGE () because this function should
2799 	 not set merror_code.  */
2800       if (from < 0 || to > mt->nchars)
2801 	return -1;
2802       return find_char_forward (mt, from, to, c);
2803     }
2804   else
2805     {
2806       /* ditto */
2807       if (to < 0 || from > mt->nchars)
2808 	return -1;
2809       return find_char_backward (mt, to, from, c);
2810     }
2811 }
2812 
2813 
2814 /*=*/
2815 
2816 /***en
2817     @brief Return the position of the first occurrence of a character in an M-text.
2818 
2819     The mtext_chr () function searches M-text $MT for character $C.
2820     The search starts from the beginning of $MT and goes toward the end.
2821 
2822     @return
2823     If $C is found, mtext_chr () returns its position; otherwise it
2824     returns -1.  */
2825 
2826 /***ja
2827     @brief M-text ��ǻ��ꤵ�줿ʸ�����ǽ�˸������֤��֤�.
2828 
2829     �ؿ� mtext_chr () �� M-text $MT ���ʸ�� $C ��õ����õ���� $MT
2830     ����Ƭ�������������˿ʤࡣ
2831 
2832     @return
2833     �⤷ $C �����Ĥ���С�mtext_chr ()
2834     �Ϥ��νи����֤��֤������Ĥ���ʤ��ä����� -1 ���֤���
2835 
2836     @latexonly \IPAlabel{mtext_chr} @endlatexonly  */
2837 
2838 /***
2839     @errors
2840     @c MERROR_RANGE
2841 
2842     @seealso
2843     mtext_rchr (), mtext_character ()  */
2844 
2845 int
mtext_chr(MText * mt,int c)2846 mtext_chr (MText *mt, int c)
2847 {
2848   return find_char_forward (mt, 0, mt->nchars, c);
2849 }
2850 
2851 /*=*/
2852 
2853 /***en
2854     @brief Return the position of the last occurrence of a character in an M-text.
2855 
2856     The mtext_rchr () function searches M-text $MT for character $C.
2857     The search starts from the end of $MT and goes backwardly toward the
2858     beginning.
2859 
2860     @return
2861     If $C is found, mtext_rchr () returns its position; otherwise it
2862     returns -1.  */
2863 
2864 /***ja
2865     @brief M-text ��ǻ��ꤵ�줿ʸ�����Ǹ�˸������֤��֤�.
2866 
2867     �ؿ� mtext_rchr () �� M-text $MT ���ʸ�� $C ��õ����õ���� $MT
2868     �κǸ夫����Ƭ�����ؤȸ�����˿ʤࡣ
2869 
2870     @return
2871     �⤷ $C �����Ĥ���С�mtext_rchr ()
2872     �Ϥ��νи����֤��֤������Ĥ���ʤ��ä����� -1 ���֤���
2873 
2874     @latexonly \IPAlabel{mtext_rchr} @endlatexonly  */
2875 
2876 /***
2877     @errors
2878     @c MERROR_RANGE
2879 
2880     @seealso
2881     mtext_chr (), mtext_character ()  */
2882 
2883 int
mtext_rchr(MText * mt,int c)2884 mtext_rchr (MText *mt, int c)
2885 {
2886   return find_char_backward (mt, mt->nchars, 0, c);
2887 }
2888 
2889 
2890 /*=*/
2891 
2892 /***en
2893     @brief Compare two M-texts character-by-character.
2894 
2895     The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2896     by character.
2897 
2898     @return
2899     This function returns 1, 0, or -1 if $MT1 is found greater than,
2900     equal to, or less than $MT2, respectively.  Comparison is based on
2901     character codes.  */
2902 
2903 /***ja
2904     @brief ��Ĥ� M-text ��ʸ��ñ�̤���Ӥ���.
2905 
2906     �ؿ� mtext_cmp () �ϡ� M-text $MT1 �� $MT2 ��ʸ��ñ�̤���Ӥ��롣
2907 
2908     @return
2909     ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2 ����礭�����
2910     1��$MT1 �� $MT2 ��꾮������� -1 ���֤�����Ӥ�ʸ�������ɤ˴�Ť���
2911 
2912     @latexonly \IPAlabel{mtext_cmp} @endlatexonly  */
2913 
2914 /***
2915     @seealso
2916     mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2917     mtext_compare (), mtext_case_compare ()  */
2918 
2919 int
mtext_cmp(MText * mt1,MText * mt2)2920 mtext_cmp (MText *mt1, MText *mt2)
2921 {
2922   return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2923 }
2924 
2925 
2926 /*=*/
2927 
2928 /***en
2929     @brief Compare initial parts of two M-texts character-by-character.
2930 
2931     The mtext_ncmp () function is similar to mtext_cmp (), but
2932     compares at most $N characters from the beginning.
2933 
2934     @return
2935     This function returns 1, 0, or -1 if $MT1 is found greater than,
2936     equal to, or less than $MT2, respectively.  */
2937 
2938 /***ja
2939     @brief ��Ĥ� M-text ����Ƭ��ʬ��ʸ��ñ�̤���Ӥ���.
2940 
2941     �ؿ� mtext_ncmp () �ϡ��ؿ� mtext_cmp () Ʊ�ͤ� M-text
2942     Ʊ�Τ���Ӥ���Ƭ������� $N ʸ���ޤǤ˴ؤ��ƹԤʤ���
2943 
2944     @return
2945     ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2 ����礭�����
2946     1��$MT1 �� $MT2 ��꾮������� -1 ���֤���
2947 
2948     @latexonly \IPAlabel{mtext_ncmp} @endlatexonly  */
2949 
2950 /***
2951     @seealso
2952     mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2953     mtext_compare (), mtext_case_compare ()  */
2954 
2955 int
mtext_ncmp(MText * mt1,MText * mt2,int n)2956 mtext_ncmp (MText *mt1, MText *mt2, int n)
2957 {
2958   if (n < 0)
2959     return 0;
2960   return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2961 		  mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2962 }
2963 
2964 /*=*/
2965 
2966 /***en
2967     @brief Compare specified regions of two M-texts.
2968 
2969     The mtext_compare () function compares two M-texts $MT1 and $MT2,
2970     character-by-character.  The compared regions are between $FROM1
2971     and $TO1 in $MT1 and $FROM2 to $TO2 in MT2.  $FROM1 and $FROM2 are
2972     inclusive, $TO1 and $TO2 are exclusive.  $FROM1 being equal to
2973     $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2974     zero.  An invalid region specification is regarded as both $FROM1
2975     and $TO1 (or $FROM2 and $TO2) being 0.
2976 
2977     @return
2978     This function returns 1, 0, or -1 if $MT1 is found greater than,
2979     equal to, or less than $MT2, respectively.  Comparison is based on
2980     character codes.  */
2981 
2982 /***ja
2983     @brief ��Ĥ� M-text �λ��ꤷ���ΰ�Ʊ�Τ���Ӥ���.
2984 
2985     �ؿ� mtext_compare () ����Ĥ� M-text $MT1 �� $MT2
2986     ��ʸ��ñ�̤���Ӥ��롣��Ӥ��оݤ� $MT1 �Τ��� $FROM1 ���� $TO1 �ޤǤȡ�$MT2
2987     �Τ��� $FROM2 ���� $TO2 �ޤǤǤ��롣$FROM1 �� $FROM2 �ϴޤޤ졢$TO1
2988     �� $TO2 �ϴޤޤ�ʤ���$FROM1 �� $TO1 �ʤ��뤤�� $FROM2 �� $TO2
2989     �ˤ�����������Ĺ������� M-text ���̣���롣�ϰϻ���˸�꤬������ϡ�
2990     $FROM1 �� $TO1 �ʤ��뤤�� $FROM2 �� $TO2 �� ξ���� 0 �����ꤵ�줿��ΤȤߤʤ���
2991 
2992     @return
2993     ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2 ����礭�����
2994     1 ��$MT1 �� $MT2 ��꾮������� -1 ���֤�����Ӥ�ʸ�������ɤ˴�Ť���  */
2995 
2996 /***
2997     @seealso
2998     mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2999     mtext_case_compare ()  */
3000 
3001 int
mtext_compare(MText * mt1,int from1,int to1,MText * mt2,int from2,int to2)3002 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
3003 {
3004   if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
3005     from1 = to1 = 0;
3006 
3007   if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
3008     from2 = to2 = 0;
3009 
3010   return compare (mt1, from1, to1, mt2, from2, to2);
3011 }
3012 
3013 /*=*/
3014 
3015 /***en
3016     @brief Search an M-text for a set of characters.
3017 
3018     The mtext_spn () function returns the length of the initial
3019     segment of M-text $MT1 that consists entirely of characters in
3020     M-text $MT2.  */
3021 
3022 /***ja
3023     @brief ���뽸���ʸ���� M-text �����õ��.
3024 
3025     �ؿ� mtext_spn () �ϡ�M-text $MT1 ����Ƭ���� M-text $MT2
3026     �˴ޤޤ��ʸ�������ǤǤ��Ƥ�����ʬ��Ĺ�����֤���
3027 
3028     @latexonly \IPAlabel{mtext_spn} @endlatexonly  */
3029 
3030 /***
3031     @seealso
3032     mtext_cspn ()  */
3033 
3034 int
mtext_spn(MText * mt,MText * accept)3035 mtext_spn (MText *mt, MText *accept)
3036 {
3037   return span (mt, accept, 0, Mnil);
3038 }
3039 
3040 /*=*/
3041 
3042 /***en
3043     @brief Search an M-text for the complement of a set of characters.
3044 
3045     The mtext_cspn () returns the length of the initial segment of
3046     M-text $MT1 that consists entirely of characters not in M-text $MT2.  */
3047 
3048 /***ja
3049     @brief ���뽸���°���ʤ�ʸ���� M-text �����õ��.
3050 
3051     �ؿ� mtext_cspn () �ϡ�M-text $MT1 ����Ƭ��ʬ�� M-text $MT2
3052     �˴ޤޤ�ʤ�ʸ�������ǤǤ��Ƥ�����ʬ��Ĺ�����֤���
3053 
3054     @latexonly \IPAlabel{mtext_cspn} @endlatexonly  */
3055 
3056 /***
3057     @seealso
3058     mtext_spn ()  */
3059 
3060 int
mtext_cspn(MText * mt,MText * reject)3061 mtext_cspn (MText *mt, MText *reject)
3062 {
3063   return span (mt, reject, 0, Mt);
3064 }
3065 
3066 /*=*/
3067 
3068 /***en
3069     @brief Search an M-text for any of a set of characters.
3070 
3071     The mtext_pbrk () function locates the first occurrence in M-text
3072     $MT1 of any of the characters in M-text $MT2.
3073 
3074     @return
3075     This function returns the position in $MT1 of the found character.
3076     If no such character is found, it returns -1. */
3077 
3078 /***ja
3079     @brief ���뽸���°��ʸ���� M-text ���椫��õ��.
3080 
3081     �ؿ� mtext_pbrk () �ϡ�M-text $MT1 ��� M-text $MT2
3082     ��ʸ���Τɤ줫���ǽ�˸������֤�Ĵ�٤롣
3083 
3084     @return
3085     ���Ĥ��ä�ʸ���Ρ�$MT1
3086     ��ˤ�����и����֤��֤����⤷���Τ褦��ʸ�����ʤ���� -1 ���֤���
3087 
3088     @latexonly \IPAlabel{mtext_pbrk} @endlatexonly  */
3089 
3090 int
mtext_pbrk(MText * mt,MText * accept)3091 mtext_pbrk (MText *mt, MText *accept)
3092 {
3093   int nchars = mtext_nchars (mt);
3094   int len = span (mt, accept, 0, Mt);
3095 
3096   return (len == nchars ? -1 : len);
3097 }
3098 
3099 /*=*/
3100 
3101 /***en
3102     @brief Look for a token in an M-text.
3103 
3104     The mtext_tok () function searches a token that firstly occurs
3105     after position $POS in M-text $MT.  Here, a token means a
3106     substring each of which does not appear in M-text $DELIM.  Note
3107     that the type of $POS is not @c int but pointer to @c int.
3108 
3109     @return
3110     If a token is found, mtext_tok () copies the corresponding part of
3111     $MT and returns a pointer to the copy.  In this case, $POS is set
3112     to the end of the found token.  If no token is found, it returns
3113     @c NULL without changing the external variable #merror_code.  If an
3114     error is detected, it returns @c NULL and assigns an error code
3115     to the external variable #merror_code. */
3116 
3117 /***ja
3118     @brief M-text ��Υȡ������õ��.
3119 
3120     �ؿ� mtext_tok () �ϡ�M-text $MT ����ǰ��� $POS
3121     �ʹߺǽ�˸����ȡ������õ���������ǥȡ�����Ȥ� M-text $DELIM
3122     ����˸����ʤ�ʸ����������ʤ���ʬʸ����Ǥ��롣$POS �η��� @c int �ǤϤʤ��� @c
3123     int �ؤΥݥ����Ǥ��뤳�Ȥ���ա�
3124 
3125     @return
3126     �⤷�ȡ��������Ĥ���� mtext_tok ()�Ϥ��Υȡ����������������ʬ��
3127     $MT ���ԡ��������Υ��ԡ��ؤΥݥ������֤������ξ�硢$POS
3128     �ϸ��Ĥ��ä��ȡ�����ν�ü�˥��åȤ���롣�ȡ��������Ĥ���ʤ��ä����ϳ����ѿ�
3129     #merror_code ���Ѥ����� @c NULL ���֤������顼�����Ф��줿����
3130     @c NULL ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣
3131 
3132     @latexonly \IPAlabel{mtext_tok} @endlatexonly  */
3133 
3134 /***
3135     @errors
3136     @c MERROR_RANGE  */
3137 
3138 MText *
mtext_tok(MText * mt,MText * delim,int * pos)3139 mtext_tok (MText *mt, MText *delim, int *pos)
3140 {
3141   int nchars = mtext_nchars (mt);
3142   int pos2;
3143 
3144   M_CHECK_POS (mt, *pos, NULL);
3145 
3146   /*
3147     Skip delimiters starting at POS in MT.
3148     Never do *pos += span(...), or you will change *pos
3149     even though no token is found.
3150    */
3151   pos2 = *pos + span (mt, delim, *pos, Mnil);
3152 
3153   if (pos2 == nchars)
3154     return NULL;
3155 
3156   *pos = pos2 + span (mt, delim, pos2, Mt);
3157   return (insert (mtext (), 0, mt, pos2, *pos));
3158 }
3159 
3160 /*=*/
3161 
3162 /***en
3163     @brief Locate an M-text in another.
3164 
3165     The mtext_text () function finds the first occurrence of M-text
3166     $MT2 in M-text $MT1 after the position $POS while ignoring
3167     difference of the text properties.
3168 
3169     @return
3170     If $MT2 is found in $MT1, mtext_text () returns the position of it
3171     first occurrence.  Otherwise it returns -1.  If $MT2 is empty, it
3172     returns 0.  */
3173 
3174 /***ja
3175     @brief M-text ����̤� M-text ��õ��.
3176 
3177     �ؿ� mtext_text () �ϡ�M-text $MT1 ��ǰ��� $POS �ʹߤ˸�����
3178     M-text $MT2 �κǽ�ΰ��֤�Ĵ�٤롣�ƥ����ȥץ�ѥƥ��ΰ㤤��̵�뤵��롣
3179 
3180     @return
3181     $MT1 ��� $MT2 �����Ĥ���С�mtext_text()
3182     �Ϥ��κǽ�νи����֤��֤������Ĥ���ʤ����� -1 ���֤����⤷ $MT2 �����ʤ�� 0 ���֤���
3183 
3184     @latexonly \IPAlabel{mtext_text} @endlatexonly  */
3185 
3186 int
mtext_text(MText * mt1,int pos,MText * mt2)3187 mtext_text (MText *mt1, int pos, MText *mt2)
3188 {
3189   int from = pos;
3190   int c = mtext_ref_char (mt2, 0);
3191   int nbytes2 = mtext_nbytes (mt2);
3192   int limit;
3193   int use_memcmp = (mt1->format == mt2->format
3194 		    || (mt1->format < MTEXT_FORMAT_UTF_8
3195 			&& mt2->format == MTEXT_FORMAT_UTF_8));
3196   int unit_bytes = UNIT_BYTES (mt1->format);
3197 
3198   if (from + mtext_nchars (mt2) > mtext_nchars (mt1))
3199     return -1;
3200   limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1;
3201 
3202   while (1)
3203     {
3204       int pos_byte;
3205 
3206       if ((pos = mtext_character (mt1, from, limit, c)) < 0)
3207 	return -1;
3208       pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
3209       if (use_memcmp
3210 	  ? ! memcmp (mt1->data + pos_byte * unit_bytes,
3211 		      mt2->data, nbytes2 * unit_bytes)
3212 	  : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
3213 	break;
3214       from = pos + 1;
3215     }
3216   return pos;
3217 }
3218 
3219 /***en
3220     @brief Locate an M-text in a specific range of another.
3221 
3222     The mtext_search () function searches for the first occurrence of
3223     M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
3224     ignoring difference of the text properties.  If $FROM is less than
3225     $TO, the forward search starts from $FROM, otherwise the backward
3226     search starts from $TO.
3227 
3228     @return
3229     If $MT2 is found in $MT1, mtext_search () returns the position of the
3230     first occurrence.  Otherwise it returns -1.  If $MT2 is empty, it
3231     returns 0.  */
3232 
3233 /***ja
3234     @brief M-text ���������ΰ���̤� M-text ��õ��.
3235 
3236     �ؿ� mtext_search () �ϡ�M-text $MT1 ��� $FROM ���� $TO
3237     �ޤǤδ֤��ΰ��M-text $MT2
3238     ���ǽ�˸�������֤�Ĵ�٤롣�ƥ����ȥץ�ѥƥ��ΰ㤤��̵�뤵��롣�⤷
3239     $FROM �� $TO ��꾮�������õ���ϰ��� $FROM �������������ء������Ǥʤ����
3240     $TO ������Ƭ�����ؿʤࡣ
3241 
3242     @return
3243     $MT1 ��� $MT2 �����Ĥ���С�mtext_search()
3244     �Ϥ��κǽ�νи����֤��֤������Ĥ���ʤ����� -1 ���֤����⤷ $MT2 �����ʤ�� 0 ���֤���
3245     */
3246 
3247 int
mtext_search(MText * mt1,int from,int to,MText * mt2)3248 mtext_search (MText *mt1, int from, int to, MText *mt2)
3249 {
3250   int c = mtext_ref_char (mt2, 0);
3251   int from_byte;
3252   int nbytes2 = mtext_nbytes (mt2);
3253 
3254   if (mt1->format > MTEXT_FORMAT_UTF_8
3255       || mt2->format > MTEXT_FORMAT_UTF_8)
3256     MERROR (MERROR_MTEXT, -1);
3257 
3258   if (from < to)
3259     {
3260       to -= mtext_nchars (mt2);
3261       if (from > to)
3262 	return -1;
3263       while (1)
3264 	{
3265 	  if ((from = find_char_forward (mt1, from, to, c)) < 0)
3266 	    return -1;
3267 	  from_byte = POS_CHAR_TO_BYTE (mt1, from);
3268 	  if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3269 	    break;
3270 	  from++;
3271 	}
3272     }
3273   else if (from > to)
3274     {
3275       from -= mtext_nchars (mt2);
3276       if (from < to)
3277 	return -1;
3278       while (1)
3279 	{
3280 	  if ((from = find_char_backward (mt1, to, from + 1, c)) < 0)
3281 	    return -1;
3282 	  from_byte = POS_CHAR_TO_BYTE (mt1, from);
3283 	  if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3284 	    break;
3285 	  from--;
3286 	}
3287     }
3288 
3289   return from;
3290 }
3291 
3292 /*=*/
3293 
3294 /***en
3295     @brief Compare two M-texts ignoring cases.
3296 
3297     The mtext_casecmp () function is similar to mtext_cmp (), but
3298     ignores cases on comparison.
3299 
3300     @return
3301     This function returns 1, 0, or -1 if $MT1 is found greater than,
3302     equal to, or less than $MT2, respectively.  */
3303 
3304 /***ja
3305     @brief ��Ĥ� M-text ����ʸ������ʸ���ζ��̤�̵�뤷����Ӥ���.
3306 
3307     �ؿ� mtext_casecmp () �ϡ��ؿ� mtext_cmp () Ʊ�ͤ� M-text
3308     Ʊ�Τ���Ӥ���ʸ������ʸ���ζ��̤�̵�뤷�ƹԤʤ���
3309 
3310     @return
3311     ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2
3312     ����礭����� 1��$MT1 �� $MT2 ��꾮������� -1 ���֤���
3313 
3314     @latexonly \IPAlabel{mtext_casecmp} @endlatexonly  */
3315 
3316 /***
3317     @seealso
3318     mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
3319     mtext_compare (), mtext_case_compare ()  */
3320 
3321 int
mtext_casecmp(MText * mt1,MText * mt2)3322 mtext_casecmp (MText *mt1, MText *mt2)
3323 {
3324   return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
3325 }
3326 
3327 /*=*/
3328 
3329 /***en
3330     @brief Compare initial parts of two M-texts ignoring cases.
3331 
3332     The mtext_ncasecmp () function is similar to mtext_casecmp (), but
3333     compares at most $N characters from the beginning.
3334 
3335     @return
3336     This function returns 1, 0, or -1 if $MT1 is found greater than,
3337     equal to, or less than $MT2, respectively.  */
3338 
3339 /***ja
3340     @brief ��Ĥ� M-text ����Ƭ��ʬ����ʸ������ʸ���ζ��̤�̵�뤷����Ӥ���.
3341 
3342     �ؿ� mtext_ncasecmp () �ϡ��ؿ� mtext_casecmp () Ʊ�ͤ� M-text
3343     Ʊ�Τ���Ӥ���Ƭ������� $N ʸ���ޤǤ˴ؤ��ƹԤʤ���
3344 
3345     @return
3346     ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2
3347     ����礭����� 1��$MT1 �� $MT2 ��꾮������� -1 ���֤���
3348 
3349     @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly  */
3350 
3351 /***
3352     @seealso
3353     mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
3354     mtext_compare (), mtext_case_compare ()  */
3355 
3356 int
mtext_ncasecmp(MText * mt1,MText * mt2,int n)3357 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
3358 {
3359   if (n < 0)
3360     return 0;
3361   return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
3362 		       mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
3363 }
3364 
3365 /*=*/
3366 
3367 /***en
3368     @brief Compare specified regions of two M-texts ignoring cases.
3369 
3370     The mtext_case_compare () function compares two M-texts $MT1 and
3371     $MT2, character-by-character, ignoring cases.  The compared
3372     regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
3373     MT2.  $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
3374     exclusive.  $FROM1 being equal to $TO1 (or $FROM2 being equal to
3375     $TO2) means an M-text of length zero.  An invalid region
3376     specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
3377     $TO2) being 0.
3378 
3379     @return
3380     This function returns 1, 0, or -1 if $MT1 is found greater than,
3381     equal to, or less than $MT2, respectively.  Comparison is based on
3382     character codes.  */
3383 
3384 /***ja
3385     @brief ��Ĥ� M-text �λ��ꤷ���ΰ����ʸ������ʸ���ζ��̤�̵�뤷����Ӥ���.
3386 
3387     �ؿ� mtext_compare () ����Ĥ� M-text $MT1 �� $MT2
3388     ����ʸ������ʸ���ζ��̤�̵�뤷��ʸ��ñ�̤���Ӥ��롣��Ӥ��оݤ� $MT1
3389     �� $FROM1 ���� $TO1 �ޤǡ�$MT2 �� $FROM2 ���� $TO2 �ޤǤǤ��롣
3390     $FROM1 �� $FROM2 �ϴޤޤ졢$TO1 �� $TO2 �ϴޤޤ�ʤ���$FROM1 �� $TO1
3391     �ʤ��뤤�� $FROM2 �� $TO2 �ˤ�����������Ĺ������� M-text
3392     ���̣���롣�ϰϻ���˸�꤬������ϡ�$FROM1 �� $TO1 �ʤ��뤤��
3393     $FROM2 �� $TO2 ��ξ���� 0 �����ꤵ�줿��Τȸ��ʤ���
3394 
3395     @return
3396     ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2 ����礭�����
3397     1��$MT1 �� $MT2 ��꾮������� -1���֤�����Ӥ�ʸ�������ɤ˴�Ť���
3398 
3399   @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
3400 */
3401 
3402 /***
3403     @seealso
3404     mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
3405     mtext_compare ()  */
3406 
3407 int
mtext_case_compare(MText * mt1,int from1,int to1,MText * mt2,int from2,int to2)3408 mtext_case_compare (MText *mt1, int from1, int to1,
3409 		    MText *mt2, int from2, int to2)
3410 {
3411   if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
3412     from1 = to1 = 0;
3413 
3414   if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
3415     from2 = to2 = 0;
3416 
3417   return case_compare (mt1, from1, to1, mt2, from2, to2);
3418 }
3419 
3420 /*=*/
3421 
3422 /***en
3423     @brief Lowercase an M-text.
3424 
3425     The mtext_lowercase () function destructively converts each
3426     character in M-text $MT to lowercase.  Adjacent characters in $MT
3427     may affect the case conversion.  If the Mlanguage text property is
3428     attached to $MT, it may also affect the conversion.  The length of
3429     $MT may change.  Characters that cannot be converted to lowercase
3430     is left unchanged.  All the text properties are inherited.
3431 
3432     @return
3433     This function returns the length of the updated $MT.
3434 */
3435 
3436 /***ja
3437     @brief M-text ��ʸ���ˤ���.
3438 
3439     �ؿ� mtext_lowercase () �� M-text $MT ��γ�ʸ�����˲�Ū�˾�ʸ������
3440     �����롣�Ѵ��˺ݤ������ܤ���ʸ���αƶ�������뤳�Ȥ����롣$MT �˥�
3441     �����ȥץ�ѥƥ� Mlanguage ���դ��Ƥ�����ϡ�������Ѵ��˱ƶ���
3442     Ϳ�����롣$MT ��Ĺ�����Ѥ�뤳�Ȥ����롣��ʸ�����Ѵ��Ǥ��ʤ��ä�ʸ
3443     ���Ϥ��Τޤ޻Ĥ롣�ƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣
3444 
3445     @return
3446     ���δؿ��Ϲ������ $MT ��Ĺ�����֤���
3447 */
3448 
3449 /***
3450     @seealso
3451      mtext_titlecase (), mtext_uppercase ()
3452 */
3453 
3454 int
mtext_lowercase(MText * mt)3455 mtext_lowercase (MText *mt)
3456 
3457 {
3458   CASE_CONV_INIT (-1);
3459 
3460   return mtext__lowercase (mt, 0, mtext_len (mt));
3461 }
3462 
3463 /*=*/
3464 
3465 /***en
3466     @brief Titlecase an M-text.
3467 
3468     The mtext_titlecase () function destructively converts the first
3469     character with the cased property in M-text $MT to titlecase and
3470     the others to lowercase.  The length of $MT may change.  If the
3471     character cannot be converted to titlecase, it is left unchanged.
3472     All the text properties are inherited.
3473 
3474     @return
3475     This function returns the length of the updated $MT.
3476 */
3477 
3478 /***ja
3479     @brief M-text �����ȥ륱�����ˤ���.
3480 
3481     �ؿ� mtext_titlecase () �� M-text $MT ��� cased �ץ�ѥƥ������
3482     �ǽ��ʸ�������ȥ륱�����ˡ������Ƥ���ʹߤ�ʸ����ʸ�����˲�Ū
3483     ���Ѵ����롣$MT ��Ĺ�����Ѥ�뤳�Ȥ����롣�����ȥ륱�����ˤ��Ѵ���
3484     ���ʤ��ä����Ϥ��Τޤޤ��Ѥ��ʤ����ƥ����ȥץ�ѥƥ��Ϥ��٤Ʒ�
3485     ������롣
3486 
3487     @return
3488     ���δؿ��Ϲ������ $MT ��Ĺ�����֤���
3489 */
3490 
3491 /***
3492     @seealso
3493      mtext_lowercase (), mtext_uppercase ()
3494 */
3495 
3496 int
mtext_titlecase(MText * mt)3497 mtext_titlecase (MText *mt)
3498 {
3499   int len = mtext_len (mt), from, to;
3500 
3501   CASE_CONV_INIT (-1);
3502 
3503   /* Find 1st cased character. */
3504   for (from = 0; from < len; from++)
3505     {
3506       int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from));
3507 
3508       if (csd > 0 && csd & CASED)
3509 	break;
3510     }
3511 
3512   if (from == len)
3513     return len;
3514 
3515   if (from == len - 1)
3516     return (mtext__titlecase (mt, from, len));
3517 
3518   /* Go through following combining characters. */
3519   for (to = from + 1;
3520        (to < len
3521 	&& ((int) mchartable_lookup (combining_class, mtext_ref_char (mt, to))
3522 	    > 0));
3523        to++);
3524 
3525   /* Titlecase the region and prepare for next lowercase operation.
3526      MT may be shortened or lengthened. */
3527   from = mtext__titlecase (mt, from, to);
3528 
3529   return (mtext__lowercase (mt, from, mtext_len (mt)));
3530 }
3531 
3532 /*=*/
3533 
3534 /***en
3535     @brief Uppercase an M-text.
3536 
3537 
3538     The mtext_uppercase () function destructively converts each
3539     character in M-text $MT to uppercase.  Adjacent characters in $MT
3540     may affect the case conversion.  If the Mlanguage text property is
3541     attached to $MT, it may also affect the conversion.  The length of
3542     $MT may change.  Characters that cannot be converted to uppercase
3543     is left unchanged.  All the text properties are inherited.
3544 
3545     @return
3546     This function returns the length of the updated $MT.
3547 */
3548 
3549 /***ja
3550     @brief M-text ����ʸ���ˤ���.
3551 
3552     �ؿ� mtext_uppercase () �� M-text $MT ��γ�ʸ�����˲�Ū����ʸ������
3553     �����롣�Ѵ��˺ݤ������ܤ���ʸ���αƶ�������뤳�Ȥ����롣$MT �˥�
3554     �����ȥץ�ѥƥ� Mlanguage ���դ��Ƥ�����ϡ�������Ѵ��˱ƶ���
3555     Ϳ�����롣$MT ��Ĺ�����Ѥ�뤳�Ȥ����롣��ʸ�����Ѵ��Ǥ��ʤ��ä�ʸ
3556     ���Ϥ��Τޤ޻Ĥ롣�ƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣
3557 
3558     @return
3559     ���δؿ��Ϲ������ $MT ��Ĺ�����֤���
3560 */
3561 
3562 /***
3563     @seealso
3564      mtext_lowercase (), mtext_titlecase ()
3565 */
3566 
3567 int
mtext_uppercase(MText * mt)3568 mtext_uppercase (MText *mt)
3569 {
3570   CASE_CONV_INIT (-1);
3571 
3572   return (mtext__uppercase (mt, 0, mtext_len (mt)));
3573 }
3574 
3575 /*** @} */
3576 
3577 #include <stdio.h>
3578 
3579 /*** @addtogroup m17nDebug */
3580 /*=*/
3581 /*** @{  */
3582 
3583 /***en
3584     @brief Dump an M-text.
3585 
3586     The mdebug_dump_mtext () function prints the M-text $MT in a human
3587     readable way to the stderr or to what specified by the environment
3588     variable MDEBUG_OUTPUT_FILE.  $INDENT specifies how many columns
3589     to indent the lines but the first one.  If $FULLP is zero, this
3590     function prints only a character code sequence.  Otherwise, it
3591     prints the internal byte sequence and text properties as well.
3592 
3593     @return
3594     This function returns $MT.  */
3595 /***ja
3596     @brief M-text �����פ���.
3597 
3598     �ؿ� mdebug_dump_mtext () �� M-text $MT ��ɸ�२�顼���Ϥ⤷���ϴ�
3599     ���ѿ� MDEBUG_DUMP_FONT �ǻ��ꤵ�줿�ե�����˿ʹ֤˲��ɤʷ��ǰ���
3600     ���롣 $INDENT �ϣ����ܰʹߤΥ���ǥ�Ȥ���ꤹ�롣$FULLP �� 0 �ʤ�
3601     �С�ʸ���������������������롣�����Ǥʤ���С������Х�����ȥƥ�
3602     ���ȥץ�ѥƥ���������롣
3603 
3604     @return
3605     ���δؿ��� $MT ���֤���  */
3606 
3607 MText *
mdebug_dump_mtext(MText * mt,int indent,int fullp)3608 mdebug_dump_mtext (MText *mt, int indent, int fullp)
3609 {
3610   int i;
3611 
3612   if (! fullp)
3613     {
3614       fprintf (mdebug__output, "\"");
3615       for (i = 0; i < mt->nchars; i++)
3616 	{
3617 	  int c = mtext_ref_char (mt, i);
3618 
3619 	  if (c == '"' || c == '\\')
3620 	    fprintf (mdebug__output, "\\%c", c);
3621 	  else if ((c >= ' ' && c < 127) || c == '\n')
3622 	    fprintf (mdebug__output, "%c", c);
3623 	  else
3624 	    fprintf (mdebug__output, "\\x%02X", c);
3625 	}
3626       fprintf (mdebug__output, "\"");
3627       return mt;
3628     }
3629 
3630   fprintf (mdebug__output,
3631 	   "(mtext (size %d %d %d) (cache %d %d)",
3632 	   mt->nchars, mt->nbytes, mt->allocated,
3633 	   mt->cache_char_pos, mt->cache_byte_pos);
3634 
3635   if (mt->nchars > 0)
3636     {
3637       char *prefix = (char *) alloca (indent + 1);
3638       unsigned char *p;
3639 
3640       memset (prefix, 32, indent);
3641       prefix[indent] = 0;
3642 
3643       fprintf (mdebug__output, "\n%s (bytes \"", prefix);
3644       for (i = 0; i < mt->nbytes; i++)
3645 	fprintf (mdebug__output, "\\x%02x", mt->data[i]);
3646       fprintf (mdebug__output, "\")\n");
3647       fprintf (mdebug__output, "%s (chars \"", prefix);
3648       p = mt->data;
3649       for (i = 0; i < mt->nchars; i++)
3650 	{
3651 	  int len;
3652 	  int c = STRING_CHAR_AND_BYTES (p, len);
3653 
3654 	  if (c == '"' || c == '\\')
3655 	    fprintf (mdebug__output, "\\%c", c);
3656 	  else if (c >= ' ' && c < 127)
3657 	    fputc (c, mdebug__output);
3658 	  else
3659 	    fprintf (mdebug__output, "\\x%X", c);
3660 	  p += len;
3661 	}
3662       fprintf (mdebug__output, "\")");
3663       if (mt->plist)
3664 	{
3665 	  fprintf (mdebug__output, "\n%s ", prefix);
3666 	  dump_textplist (mt->plist, indent + 1);
3667 	}
3668     }
3669   fprintf (mdebug__output, ")");
3670   return mt;
3671 }
3672 
3673 /*** @} */
3674 
3675 /*
3676   Local Variables:
3677   coding: euc-japan
3678   End:
3679 */
3680