1 /* mtext.c -- M-text module.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
5
6 This file is part of the m17n library.
7
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
12
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301 USA. */
22
23 /***en
24 @addtogroup m17nMtext
25 @brief M-text objects and API for them.
26
27 In the m17n library, text is represented as an object called @e
28 M-text rather than as a C-string (<tt>char *</tt> or <tt>unsigned
29 char *</tt>). An M-text is a sequence of characters whose length
30 is equals to or more than 0, and can be coined from various
31 character sources, e.g. C-strings, files, character codes, etc.
32
33 M-texts are more useful than C-strings in the following points.
34
35 @li M-texts can handle mixture of characters of various scripts,
36 including all Unicode characters and more. This is an
37 indispensable facility when handling multilingual text.
38
39 @li Each character in an M-text can have properties called @e text
40 @e properties. Text properties store various kinds of information
41 attached to parts of an M-text to provide application programs
42 with a unified view of those information. As rich information can
43 be stored in M-texts in the form of text properties, functions in
44 application programs can be simple.
45
46 In addition, the library provides many functions to manipulate an
47 M-text just the same way as a C-string. */
48
49 /***ja
50 @addtogroup m17nMtext
51
52 @brief M-text ���֥������ȤȤ���˴ؤ��� API.
53
54 m17n �饤�֥��ϡ� C-string��<tt>char *</tt> �� <tt>unsigned
55 char *</tt>�ˤǤϤʤ� @e M-text �ȸƤ֥��֥������Ȥǥƥ����Ȥ�ɽ�����롣
56 M-text ��Ĺ�� 0 �ʾ��ʸ����Ǥ��ꡢ���ʸ���������ʤ��Ȥ���
57 C-string���ե����롢ʸ�����������ˤ�������Ǥ��롣
58
59 M-text �ˤϡ�C-string �ˤʤ��ʲ�����ħ�����롣
60
61 @li M-text ������¿���μ����ʸ����Ʊ���ˡ����ߤ����ơ�Ʊ���˰������Ȥ��Ǥ��롣
62 Unicode �����Ƥ�ʸ���Ϥ������¿����ʸ���ޤǤⰷ�����Ȥ��Ǥ��롣
63 �����¿����ƥ����Ȥ���Ǥ�ɬ�ܤε�ǽ�Ǥ��롣
64
65 @li M-text ��γ�ʸ���ϡ�@e �ƥ����ȥץ�ѥƥ�
66 �ȸƤФ��ץ�ѥƥ��������
67 �ƥ����ȥץ�ѥƥ��ˤ�äơ��ƥ����Ȥγ����̤˴ؤ����͡��ʾ����
68 M-text ����ݻ����뤳�Ȥ���ǽ�ˤʤ롣
69 ���Τ��ᡢ�����ξ���ץꥱ�������ץ�����������Ū�˰������Ȥ���ǽ�ˤʤ롣
70 �ޤ���M-text
71 ���Τ�˭�٤ʾ������Ĥ��ᡢ���ץꥱ�������ץ������γƴؿ�����Dz����뤳�Ȥ��Ǥ��롣
72
73 �����m17n �饤�֥��ϡ� C-string
74 �����뤿���������δؿ���Ʊ���Τ�Τ� M-text
75 �����뤿��˥��ݡ��Ȥ��Ƥ��롣 */
76
77 /*=*/
78
79 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
80 /*** @addtogroup m17nInternal
81 @{ */
82
83 #include <config.h>
84 #include <stdio.h>
85 #include <stdlib.h>
86 #include <string.h>
87 #include <locale.h>
88
89 #include "m17n.h"
90 #include "m17n-misc.h"
91 #include "internal.h"
92 #include "textprop.h"
93 #include "character.h"
94 #include "mtext.h"
95 #include "plist.h"
96
97 static M17NObjectArray mtext_table;
98
99 static MSymbol M_charbag;
100
101 /** Increment character position CHAR_POS and unit position UNIT_POS
102 so that they point to the next character in M-text MT. No range
103 check for CHAR_POS and UNIT_POS. */
104
105 #define INC_POSITION(mt, char_pos, unit_pos) \
106 do { \
107 int c; \
108 \
109 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
110 { \
111 c = (mt)->data[(unit_pos)]; \
112 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF8 (c); \
113 } \
114 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
115 { \
116 c = ((unsigned short *) ((mt)->data))[(unit_pos)]; \
117 \
118 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
119 c = SWAP_16 (c); \
120 (unit_pos) += CHAR_UNITS_BY_HEAD_UTF16 (c); \
121 } \
122 else \
123 (unit_pos)++; \
124 (char_pos)++; \
125 } while (0)
126
127
128 /** Decrement character position CHAR_POS and unit position UNIT_POS
129 so that they point to the previous character in M-text MT. No
130 range check for CHAR_POS and UNIT_POS. */
131
132 #define DEC_POSITION(mt, char_pos, unit_pos) \
133 do { \
134 if ((mt)->format <= MTEXT_FORMAT_UTF_8) \
135 { \
136 unsigned char *p1 = (mt)->data + (unit_pos); \
137 unsigned char *p0 = p1 - 1; \
138 \
139 while (! CHAR_HEAD_P (p0)) p0--; \
140 (unit_pos) -= (p1 - p0); \
141 } \
142 else if ((mt)->format <= MTEXT_FORMAT_UTF_16BE) \
143 { \
144 int c = ((unsigned short *) ((mt)->data))[(unit_pos) - 1]; \
145 \
146 if ((mt)->format != MTEXT_FORMAT_UTF_16) \
147 c = SWAP_16 (c); \
148 (unit_pos) -= 2 - (c < 0xD800 || c >= 0xE000); \
149 } \
150 else \
151 (unit_pos)--; \
152 (char_pos)--; \
153 } while (0)
154
155 #define FORMAT_COVERAGE(fmt) \
156 (fmt == MTEXT_FORMAT_UTF_8 ? MTEXT_COVERAGE_FULL \
157 : fmt == MTEXT_FORMAT_US_ASCII ? MTEXT_COVERAGE_ASCII \
158 : fmt >= MTEXT_FORMAT_UTF_32LE ? MTEXT_COVERAGE_FULL \
159 : MTEXT_COVERAGE_UNICODE)
160
161 /* Compoare sub-texts in MT1 (range FROM1 and TO1) and MT2 (range
162 FROM2 to TO2). */
163
164 static int
compare(MText * mt1,int from1,int to1,MText * mt2,int from2,int to2)165 compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
166 {
167 if (mt1->format == mt2->format
168 && (mt1->format <= MTEXT_FORMAT_UTF_8))
169 {
170 unsigned char *p1, *pend1, *p2, *pend2;
171 int unit_bytes = UNIT_BYTES (mt1->format);
172 int nbytes;
173 int result;
174
175 p1 = mt1->data + mtext__char_to_byte (mt1, from1) * unit_bytes;
176 pend1 = mt1->data + mtext__char_to_byte (mt1, to1) * unit_bytes;
177
178 p2 = mt2->data + mtext__char_to_byte (mt2, from2) * unit_bytes;
179 pend2 = mt2->data + mtext__char_to_byte (mt2, to2) * unit_bytes;
180
181 if (pend1 - p1 < pend2 - p2)
182 nbytes = pend1 - p1;
183 else
184 nbytes = pend2 - p2;
185 result = memcmp (p1, p2, nbytes);
186 if (result)
187 return result;
188 return ((pend1 - p1) - (pend2 - p2));
189 }
190 for (; from1 < to1 && from2 < to2; from1++, from2++)
191 {
192 int c1 = mtext_ref_char (mt1, from1);
193 int c2 = mtext_ref_char (mt2, from2);
194
195 if (c1 != c2)
196 return (c1 > c2 ? 1 : -1);
197 }
198 return (from2 == to2 ? (from1 < to1) : -1);
199 }
200
201
202 /* Return how many units are required in UTF-8 to represent characters
203 between FROM and TO of MT. */
204
205 static int
count_by_utf_8(MText * mt,int from,int to)206 count_by_utf_8 (MText *mt, int from, int to)
207 {
208 int n, c;
209
210 for (n = 0; from < to; from++)
211 {
212 c = mtext_ref_char (mt, from);
213 n += CHAR_UNITS_UTF8 (c);
214 }
215 return n;
216 }
217
218
219 /* Return how many units are required in UTF-16 to represent
220 characters between FROM and TO of MT. */
221
222 static int
count_by_utf_16(MText * mt,int from,int to)223 count_by_utf_16 (MText *mt, int from, int to)
224 {
225 int n, c;
226
227 for (n = 0; from < to; from++)
228 {
229 c = mtext_ref_char (mt, from);
230 n += CHAR_UNITS_UTF16 (c);
231 }
232 return n;
233 }
234
235
236 /* Insert text between FROM and TO of MT2 at POS of MT1. */
237
238 static MText *
insert(MText * mt1,int pos,MText * mt2,int from,int to)239 insert (MText *mt1, int pos, MText *mt2, int from, int to)
240 {
241 int pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
242 int from_unit = POS_CHAR_TO_BYTE (mt2, from);
243 int new_units = POS_CHAR_TO_BYTE (mt2, to) - from_unit;
244 int unit_bytes;
245
246 if (mt1->nchars == 0)
247 mt1->format = mt2->format, mt1->coverage = mt2->coverage;
248 else if (mt1->format != mt2->format)
249 {
250 /* Be sure to make mt1->format sufficient to contain all
251 characters in mt2. */
252 if (mt1->format == MTEXT_FORMAT_UTF_8
253 || mt1->format == MTEXT_FORMAT_UTF_32
254 || (mt1->format == MTEXT_FORMAT_UTF_16
255 && mt2->format <= MTEXT_FORMAT_UTF_16BE
256 && mt2->format != MTEXT_FORMAT_UTF_8))
257 ;
258 else if (mt1->format == MTEXT_FORMAT_US_ASCII)
259 {
260 if (mt2->format == MTEXT_FORMAT_UTF_8)
261 mt1->format = MTEXT_FORMAT_UTF_8, mt1->coverage = mt2->coverage;
262 else if (mt2->format == MTEXT_FORMAT_UTF_16
263 || mt2->format == MTEXT_FORMAT_UTF_32)
264 mtext__adjust_format (mt1, mt2->format);
265 else
266 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
267 }
268 else
269 {
270 mtext__adjust_format (mt1, MTEXT_FORMAT_UTF_8);
271 pos_unit = POS_CHAR_TO_BYTE (mt1, pos);
272 }
273 }
274
275 unit_bytes = UNIT_BYTES (mt1->format);
276
277 if (mt1->format == mt2->format)
278 {
279 int pos_byte = pos_unit * unit_bytes;
280 int total_bytes = (mt1->nbytes + new_units) * unit_bytes;
281 int new_bytes = new_units * unit_bytes;
282
283 if (total_bytes + unit_bytes > mt1->allocated)
284 {
285 mt1->allocated = total_bytes + unit_bytes;
286 if (mt1->data)
287 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
288 else
289 MTABLE_CALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
290 }
291 memmove (mt1->data + pos_byte + new_bytes, mt1->data + pos_byte,
292 (mt1->nbytes - pos_unit + 1) * unit_bytes);
293 memcpy (mt1->data + pos_byte, mt2->data + from_unit * unit_bytes,
294 new_bytes);
295 }
296 else if (mt1->format == MTEXT_FORMAT_UTF_8)
297 {
298 unsigned char *p;
299 int total_bytes, i, c;
300
301 new_units = count_by_utf_8 (mt2, from, to);
302 total_bytes = mt1->nbytes + new_units;
303
304 if (total_bytes + 1 > mt1->allocated)
305 {
306 mt1->allocated = total_bytes + 1;
307 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
308 }
309 p = mt1->data + pos_unit;
310 memmove (p + new_units, p, mt1->nbytes - pos_unit + 1);
311 for (i = from; i < to; i++)
312 {
313 c = mtext_ref_char (mt2, i);
314 p += CHAR_STRING_UTF8 (c, p);
315 }
316 }
317 else if (mt1->format == MTEXT_FORMAT_UTF_16)
318 {
319 unsigned short *p;
320 int total_bytes, i, c;
321
322 new_units = count_by_utf_16 (mt2, from, to);
323 total_bytes = (mt1->nbytes + new_units) * USHORT_SIZE;
324
325 if (total_bytes + USHORT_SIZE > mt1->allocated)
326 {
327 mt1->allocated = total_bytes + USHORT_SIZE;
328 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
329 }
330 p = (unsigned short *) mt1->data + pos_unit;
331 memmove (p + new_units, p,
332 (mt1->nbytes - pos_unit + 1) * USHORT_SIZE);
333 for (i = from; i < to; i++)
334 {
335 c = mtext_ref_char (mt2, i);
336 p += CHAR_STRING_UTF16 (c, p);
337 }
338 }
339 else /* MTEXT_FORMAT_UTF_32 */
340 {
341 unsigned int *p;
342 int total_bytes, i;
343
344 new_units = to - from;
345 total_bytes = (mt1->nbytes + new_units) * UINT_SIZE;
346
347 if (total_bytes + UINT_SIZE > mt1->allocated)
348 {
349 mt1->allocated = total_bytes + UINT_SIZE;
350 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
351 }
352 p = (unsigned *) mt1->data + pos_unit;
353 memmove (p + new_units, p,
354 (mt1->nbytes - pos_unit + 1) * UINT_SIZE);
355 for (i = from; i < to; i++)
356 *p++ = mtext_ref_char (mt2, i);
357 }
358
359 mtext__adjust_plist_for_insert
360 (mt1, pos, to - from,
361 mtext__copy_plist (mt2->plist, from, to, mt1, pos));
362 mt1->nchars += to - from;
363 mt1->nbytes += new_units;
364 if (mt1->cache_char_pos > pos)
365 {
366 mt1->cache_char_pos += to - from;
367 mt1->cache_byte_pos += new_units;
368 }
369
370 return mt1;
371 }
372
373
374 static MCharTable *
get_charbag(MText * mt)375 get_charbag (MText *mt)
376 {
377 MTextProperty *prop = mtext_get_property (mt, 0, M_charbag);
378 MCharTable *table;
379 int i;
380
381 if (prop)
382 {
383 if (prop->end == mt->nchars)
384 return ((MCharTable *) prop->val);
385 mtext_detach_property (prop);
386 }
387
388 table = mchartable (Msymbol, (void *) 0);
389 for (i = mt->nchars - 1; i >= 0; i--)
390 mchartable_set (table, mtext_ref_char (mt, i), Mt);
391 prop = mtext_property (M_charbag, table, MTEXTPROP_VOLATILE_WEAK);
392 mtext_attach_property (mt, 0, mtext_nchars (mt), prop);
393 M17N_OBJECT_UNREF (prop);
394 return table;
395 }
396
397
398 /* span () : Number of consecutive chars starting at POS in MT1 that
399 are included (if NOT is Mnil) or not included (if NOT is Mt) in
400 MT2. */
401
402 static int
span(MText * mt1,MText * mt2,int pos,MSymbol not)403 span (MText *mt1, MText *mt2, int pos, MSymbol not)
404 {
405 int nchars = mtext_nchars (mt1);
406 MCharTable *table = get_charbag (mt2);
407 int i;
408
409 for (i = pos; i < nchars; i++)
410 if ((MSymbol) mchartable_lookup (table, mtext_ref_char (mt1, i)) == not)
411 break;
412 return (i - pos);
413 }
414
415
416 static int
count_utf_8_chars(const void * data,int nitems)417 count_utf_8_chars (const void *data, int nitems)
418 {
419 unsigned char *p = (unsigned char *) data;
420 unsigned char *pend = p + nitems;
421 int nchars = 0;
422
423 while (p < pend)
424 {
425 int i, n;
426
427 for (; p < pend && *p < 128; nchars++, p++);
428 if (p == pend)
429 return nchars;
430 if (! CHAR_HEAD_P_UTF8 (p))
431 return -1;
432 n = CHAR_UNITS_BY_HEAD_UTF8 (*p);
433 if (p + n > pend)
434 return -1;
435 for (i = 1; i < n; i++)
436 if (CHAR_HEAD_P_UTF8 (p + i))
437 return -1;
438 p += n;
439 nchars++;
440 }
441 return nchars;
442 }
443
444 static int
count_utf_16_chars(const void * data,int nitems,int swap)445 count_utf_16_chars (const void *data, int nitems, int swap)
446 {
447 unsigned short *p = (unsigned short *) data;
448 unsigned short *pend = p + nitems;
449 int nchars = 0;
450 int prev_surrogate = 0;
451
452 for (; p < pend; p++)
453 {
454 int c = *p;
455
456 if (swap)
457 c = SWAP_16 (c);
458 if (prev_surrogate)
459 {
460 if (c < 0xDC00 || c >= 0xE000)
461 /* Invalid surrogate */
462 nchars++;
463 }
464 else
465 {
466 if (c >= 0xD800 && c < 0xDC00)
467 prev_surrogate = 1;
468 nchars++;
469 }
470 }
471 if (prev_surrogate)
472 nchars++;
473 return nchars;
474 }
475
476
477 static int
find_char_forward(MText * mt,int from,int to,int c)478 find_char_forward (MText *mt, int from, int to, int c)
479 {
480 int from_byte = POS_CHAR_TO_BYTE (mt, from);
481
482 if (mt->format <= MTEXT_FORMAT_UTF_8)
483 {
484 unsigned char *p = mt->data + from_byte;
485
486 while (from < to && STRING_CHAR_ADVANCE_UTF8 (p) != c) from++;
487 }
488 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
489 {
490 unsigned short *p = (unsigned short *) (mt->data) + from_byte;
491
492 if (mt->format == MTEXT_FORMAT_UTF_16)
493 while (from < to && STRING_CHAR_ADVANCE_UTF16 (p) != c) from++;
494 else if (c < 0x10000)
495 {
496 c = SWAP_16 (c);
497 while (from < to && *p != c)
498 {
499 from++;
500 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
501 }
502 }
503 else if (c < 0x110000)
504 {
505 int c1 = (c >> 10) + 0xD800;
506 int c2 = (c & 0x3FF) + 0xDC00;
507
508 c1 = SWAP_16 (c1);
509 c2 = SWAP_16 (c2);
510 while (from < to && (*p != c1 || p[1] != c2))
511 {
512 from++;
513 p += ((*p & 0xFF) < 0xD8 || (*p & 0xFF) >= 0xE0) ? 1 : 2;
514 }
515 }
516 else
517 from = to;
518 }
519 else
520 {
521 unsigned *p = (unsigned *) (mt->data) + from_byte;
522 unsigned c1 = c;
523
524 if (mt->format != MTEXT_FORMAT_UTF_32)
525 c1 = SWAP_32 (c1);
526 while (from < to && *p++ != c1) from++;
527 }
528
529 return (from < to ? from : -1);
530 }
531
532
533 static int
find_char_backward(MText * mt,int from,int to,int c)534 find_char_backward (MText *mt, int from, int to, int c)
535 {
536 int to_byte = POS_CHAR_TO_BYTE (mt, to);
537
538 if (mt->format <= MTEXT_FORMAT_UTF_8)
539 {
540 unsigned char *p = mt->data + to_byte;
541
542 while (from < to)
543 {
544 for (p--; ! CHAR_HEAD_P (p); p--);
545 if (c == STRING_CHAR (p))
546 break;
547 to--;
548 }
549 }
550 else if (mt->format <= MTEXT_FORMAT_UTF_16LE)
551 {
552 unsigned short *p = (unsigned short *) (mt->data) + to_byte;
553
554 if (mt->format == MTEXT_FORMAT_UTF_16)
555 {
556 while (from < to)
557 {
558 p--;
559 if (*p >= 0xDC00 && *p < 0xE000)
560 p--;
561 if (c == STRING_CHAR_UTF16 (p))
562 break;
563 to--;
564 }
565 }
566 else if (c < 0x10000)
567 {
568 c = SWAP_16 (c);
569 while (from < to && p[-1] != c)
570 {
571 to--;
572 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
573 }
574 }
575 else if (c < 0x110000)
576 {
577 int c1 = (c >> 10) + 0xD800;
578 int c2 = (c & 0x3FF) + 0xDC00;
579
580 c1 = SWAP_16 (c1);
581 c2 = SWAP_16 (c2);
582 while (from < to && (p[-1] != c2 || p[-2] != c1))
583 {
584 to--;
585 p -= ((p[-1] & 0xFF) < 0xD8 || (p[-1] & 0xFF) >= 0xE0) ? 1 : 2;
586 }
587 }
588 }
589 else
590 {
591 unsigned *p = (unsigned *) (mt->data) + to_byte;
592 unsigned c1 = c;
593
594 if (mt->format != MTEXT_FORMAT_UTF_32)
595 c1 = SWAP_32 (c1);
596 while (from < to && p[-1] != c1) to--, p--;
597 }
598
599 return (from < to ? to - 1 : -1);
600 }
601
602
603 static void
free_mtext(void * object)604 free_mtext (void *object)
605 {
606 MText *mt = (MText *) object;
607
608 if (mt->plist)
609 mtext__free_plist (mt);
610 if (mt->data && mt->allocated >= 0)
611 free (mt->data);
612 M17N_OBJECT_UNREGISTER (mtext_table, mt);
613 free (object);
614 }
615
616 /** Case handler (case-folding comparison and case conversion) */
617
618 /** Structure for an iterator used in case-fold comparison. */
619
620 struct casecmp_iterator {
621 MText *mt;
622 int pos;
623 MText *folded;
624 unsigned char *foldedp;
625 int folded_len;
626 };
627
628 static int
next_char_from_it(struct casecmp_iterator * it)629 next_char_from_it (struct casecmp_iterator *it)
630 {
631 int c, c1;
632
633 if (it->folded)
634 {
635 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
636 return c;
637 }
638
639 c = mtext_ref_char (it->mt, it->pos);
640 c1 = (int) mchar_get_prop (c, Msimple_case_folding);
641 if (c1 == 0xFFFF)
642 {
643 it->folded
644 = (MText *) mchar_get_prop (c, Mcomplicated_case_folding);
645 it->foldedp = it->folded->data;
646 c = STRING_CHAR_AND_BYTES (it->foldedp, it->folded_len);
647 return c;
648 }
649
650 if (c1 >= 0)
651 c = c1;
652 return c;
653 }
654
655 static void
advance_it(struct casecmp_iterator * it)656 advance_it (struct casecmp_iterator *it)
657 {
658 if (it->folded)
659 {
660 it->foldedp += it->folded_len;
661 if (it->foldedp == it->folded->data + it->folded->nbytes)
662 it->folded = NULL;
663 }
664 if (! it->folded)
665 {
666 it->pos++;
667 }
668 }
669
670 static int
case_compare(MText * mt1,int from1,int to1,MText * mt2,int from2,int to2)671 case_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
672 {
673 struct casecmp_iterator it1, it2;
674
675 it1.mt = mt1, it1.pos = from1, it1.folded = NULL;
676 it2.mt = mt2, it2.pos = from2, it2.folded = NULL;
677
678 while (it1.pos < to1 && it2.pos < to2)
679 {
680 int c1 = next_char_from_it (&it1);
681 int c2 = next_char_from_it (&it2);
682
683 if (c1 != c2)
684 return (c1 > c2 ? 1 : -1);
685 advance_it (&it1);
686 advance_it (&it2);
687 }
688 return (it2.pos == to2 ? (it1.pos < to1) : -1);
689 }
690
691 static MCharTable *tricky_chars, *cased, *soft_dotted, *case_mapping;
692 static MCharTable *combining_class;
693
694 /* Languages that require special handling in case-conversion. */
695 static MSymbol Mlt, Mtr, Maz;
696
697 static MText *gr03A3;
698 static MText *lt0049, *lt004A, *lt012E, *lt00CC, *lt00CD, *lt0128;
699 static MText *tr0130, *tr0049, *tr0069;
700
701 static int
init_case_conversion()702 init_case_conversion ()
703 {
704 Mlt = msymbol ("lt");
705 Mtr = msymbol ("tr");
706 Maz = msymbol ("az");
707
708 gr03A3 = mtext ();
709 mtext_cat_char (gr03A3, 0x03C2);
710
711 lt0049 = mtext ();
712 mtext_cat_char (lt0049, 0x0069);
713 mtext_cat_char (lt0049, 0x0307);
714
715 lt004A = mtext ();
716 mtext_cat_char (lt004A, 0x006A);
717 mtext_cat_char (lt004A, 0x0307);
718
719 lt012E = mtext ();
720 mtext_cat_char (lt012E, 0x012F);
721 mtext_cat_char (lt012E, 0x0307);
722
723 lt00CC = mtext ();
724 mtext_cat_char (lt00CC, 0x0069);
725 mtext_cat_char (lt00CC, 0x0307);
726 mtext_cat_char (lt00CC, 0x0300);
727
728 lt00CD = mtext ();
729 mtext_cat_char (lt00CD, 0x0069);
730 mtext_cat_char (lt00CD, 0x0307);
731 mtext_cat_char (lt00CD, 0x0301);
732
733 lt0128 = mtext ();
734 mtext_cat_char (lt0128, 0x0069);
735 mtext_cat_char (lt0128, 0x0307);
736 mtext_cat_char (lt0128, 0x0303);
737
738 tr0130 = mtext ();
739 mtext_cat_char (tr0130, 0x0069);
740
741 tr0049 = mtext ();
742 mtext_cat_char (tr0049, 0x0131);
743
744 tr0069 = mtext ();
745 mtext_cat_char (tr0069, 0x0130);
746
747 if (! (cased = mchar_get_prop_table (msymbol ("cased"), NULL)))
748 return -1;
749 if (! (soft_dotted = mchar_get_prop_table (msymbol ("soft-dotted"), NULL)))
750 return -1;
751 if (! (case_mapping = mchar_get_prop_table (msymbol ("case-mapping"), NULL)))
752 return -1;
753 if (! (combining_class = mchar_get_prop_table (Mcombining_class, NULL)))
754 return -1;
755
756 tricky_chars = mchartable (Mnil, 0);
757 mchartable_set (tricky_chars, 0x0049, (void *) 1);
758 mchartable_set (tricky_chars, 0x004A, (void *) 1);
759 mchartable_set (tricky_chars, 0x00CC, (void *) 1);
760 mchartable_set (tricky_chars, 0x00CD, (void *) 1);
761 mchartable_set (tricky_chars, 0x0128, (void *) 1);
762 mchartable_set (tricky_chars, 0x012E, (void *) 1);
763 mchartable_set (tricky_chars, 0x0130, (void *) 1);
764 mchartable_set (tricky_chars, 0x0307, (void *) 1);
765 mchartable_set (tricky_chars, 0x03A3, (void *) 1);
766 return 0;
767 }
768
769 #define CASE_CONV_INIT(ret) \
770 do { \
771 if (! tricky_chars \
772 && init_case_conversion () < 0) \
773 MERROR (MERROR_MTEXT, ret); \
774 } while (0)
775
776 /* Replace the character at POS of MT with VAR and increment I and LEN. */
777
778 #define REPLACE(var) \
779 do { \
780 int varlen = var->nchars; \
781 \
782 mtext_replace (mt, pos, pos + 1, var, 0, varlen); \
783 pos += varlen; \
784 end += varlen - 1; \
785 } while (0)
786
787 /* Delete the character at POS of MT and decrement LEN. */
788
789 #define DELETE \
790 do { \
791 mtext_del (mt, pos, pos + 1); \
792 end--; \
793 } while (0)
794
795 #define LOOKUP \
796 do { \
797 MPlist *pl = (MPlist *) mchartable_lookup (case_mapping, c); \
798 \
799 if (pl) \
800 { \
801 /* Lowercase is the 1st element. */ \
802 MText *lower = MPLIST_VAL ((MPlist *) MPLIST_VAL (pl)); \
803 int llen = mtext_nchars (lower); \
804 \
805 if (mtext_ref_char (lower, 0) != c || llen > 1) \
806 { \
807 mtext_replace (mt, pos, pos + 1, lower, 0, llen); \
808 pos += llen; \
809 end += llen - 1; \
810 } \
811 else \
812 pos++; \
813 } \
814 else \
815 pos++; \
816 } while (0)
817
818
819 int
uppercase_precheck(MText * mt,int pos,int end)820 uppercase_precheck (MText *mt, int pos, int end)
821 {
822 for (; pos < end; pos++)
823 if (mtext_ref_char (mt, pos) == 0x0307 &&
824 (MSymbol) mtext_get_prop (mt, pos, Mlanguage) == Mlt)
825 return 1;
826 return 0;
827 }
828
829 int
lowercase_precheck(MText * mt,int pos,int end)830 lowercase_precheck (MText *mt, int pos, int end)
831 {
832 int c;
833 MSymbol lang;
834
835 for (; pos < end; pos++)
836 {
837 c = mtext_ref_char (mt, pos);
838
839 if ((int) mchartable_lookup (tricky_chars, c) == 1)
840 {
841 if (c == 0x03A3)
842 return 1;
843
844 lang = mtext_get_prop (mt, pos, Mlanguage);
845
846 if (lang == Mlt &&
847 (c == 0x0049 || c == 0x004A || c == 0x012E))
848 return 1;
849
850 if ((lang == Mtr || lang == Maz) &&
851 (c == 0x0307 || c == 0x0049))
852 return 1;
853 }
854 }
855 return 0;
856 }
857
858 #define CASED 1
859 #define CASE_IGNORABLE 2
860
861 int
final_sigma(MText * mt,int pos)862 final_sigma (MText *mt, int pos)
863 {
864 int i, len = mtext_len (mt);
865 int c;
866
867 for (i = pos - 1; i >= 0; i--)
868 {
869 c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
870 if (c == -1)
871 c = 0;
872 if (c & CASED)
873 break;
874 if (! (c & CASE_IGNORABLE))
875 return 0;
876 }
877
878 if (i == -1)
879 return 0;
880
881 for (i = pos + 1; i < len; i++)
882 {
883 c = (int) mchartable_lookup (cased, mtext_ref_char (mt, i));
884 if (c == -1)
885 c = 0;
886 if (c & CASED)
887 return 0;
888 if (! (c & CASE_IGNORABLE))
889 return 1;
890 }
891
892 return 1;
893 }
894
895 int
after_soft_dotted(MText * mt,int i)896 after_soft_dotted (MText *mt, int i)
897 {
898 int c, class;
899
900 for (i--; i >= 0; i--)
901 {
902 c = mtext_ref_char (mt, i);
903 if ((MSymbol) mchartable_lookup (soft_dotted, c) == Mt)
904 return 1;
905 class = (int) mchartable_lookup (combining_class, c);
906 if (class == 0 || class == 230)
907 return 0;
908 }
909
910 return 0;
911 }
912
913 int
more_above(MText * mt,int i)914 more_above (MText *mt, int i)
915 {
916 int class, len = mtext_len (mt);
917
918 for (i++; i < len; i++)
919 {
920 class = (int) mchartable_lookup (combining_class,
921 mtext_ref_char (mt, i));
922 if (class == 230)
923 return 1;
924 if (class == 0)
925 return 0;
926 }
927
928 return 0;
929 }
930
931 int
before_dot(MText * mt,int i)932 before_dot (MText *mt, int i)
933 {
934 int c, class, len = mtext_len (mt);
935
936 for (i++; i < len; i++)
937 {
938 c = mtext_ref_char (mt, i);
939 if (c == 0x0307)
940 return 1;
941 class = (int) mchartable_lookup (combining_class, c);
942 if (class == 230 || class == 0)
943 return 0;
944 }
945
946 return 0;
947 }
948
949 int
after_i(MText * mt,int i)950 after_i (MText *mt, int i)
951 {
952 int c, class;
953
954 for (i--; i >= 0; i--)
955 {
956 c = mtext_ref_char (mt, i);
957 if (c == (int) 'I')
958 return 1;
959 class = (int) mchartable_lookup (combining_class, c);
960 if (class == 230 || class == 0)
961 return 0;
962 }
963
964 return 0;
965 }
966
967
968 /* Internal API */
969
970 int
mtext__init()971 mtext__init ()
972 {
973 M17N_OBJECT_ADD_ARRAY (mtext_table, "M-text");
974 M_charbag = msymbol_as_managing_key (" charbag");
975 mtext_table.count = 0;
976 Mlanguage = msymbol ("language");
977 return 0;
978 }
979
980
981 void
mtext__fini(void)982 mtext__fini (void)
983 {
984 mtext__wseg_fini ();
985 }
986
987
988 int
mtext__char_to_byte(MText * mt,int pos)989 mtext__char_to_byte (MText *mt, int pos)
990 {
991 int char_pos, byte_pos;
992 int forward;
993
994 if (pos < mt->cache_char_pos)
995 {
996 if (mt->cache_char_pos == mt->cache_byte_pos)
997 return pos;
998 if (pos < mt->cache_char_pos - pos)
999 {
1000 char_pos = byte_pos = 0;
1001 forward = 1;
1002 }
1003 else
1004 {
1005 char_pos = mt->cache_char_pos;
1006 byte_pos = mt->cache_byte_pos;
1007 forward = 0;
1008 }
1009 }
1010 else
1011 {
1012 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1013 return (mt->cache_byte_pos + (pos - mt->cache_char_pos));
1014 if (pos - mt->cache_char_pos < mt->nchars - pos)
1015 {
1016 char_pos = mt->cache_char_pos;
1017 byte_pos = mt->cache_byte_pos;
1018 forward = 1;
1019 }
1020 else
1021 {
1022 char_pos = mt->nchars;
1023 byte_pos = mt->nbytes;
1024 forward = 0;
1025 }
1026 }
1027 if (forward)
1028 while (char_pos < pos)
1029 INC_POSITION (mt, char_pos, byte_pos);
1030 else
1031 while (char_pos > pos)
1032 DEC_POSITION (mt, char_pos, byte_pos);
1033 mt->cache_char_pos = char_pos;
1034 mt->cache_byte_pos = byte_pos;
1035 return byte_pos;
1036 }
1037
1038 /* mtext__byte_to_char () */
1039
1040 int
mtext__byte_to_char(MText * mt,int pos_byte)1041 mtext__byte_to_char (MText *mt, int pos_byte)
1042 {
1043 int char_pos, byte_pos;
1044 int forward;
1045
1046 if (pos_byte < mt->cache_byte_pos)
1047 {
1048 if (mt->cache_char_pos == mt->cache_byte_pos)
1049 return pos_byte;
1050 if (pos_byte < mt->cache_byte_pos - pos_byte)
1051 {
1052 char_pos = byte_pos = 0;
1053 forward = 1;
1054 }
1055 else
1056 {
1057 char_pos = mt->cache_char_pos;
1058 byte_pos = mt->cache_byte_pos;
1059 forward = 0;
1060 }
1061 }
1062 else
1063 {
1064 if (mt->nchars - mt->cache_char_pos == mt->nbytes - mt->cache_byte_pos)
1065 return (mt->cache_char_pos + (pos_byte - mt->cache_byte_pos));
1066 if (pos_byte - mt->cache_byte_pos < mt->nbytes - pos_byte)
1067 {
1068 char_pos = mt->cache_char_pos;
1069 byte_pos = mt->cache_byte_pos;
1070 forward = 1;
1071 }
1072 else
1073 {
1074 char_pos = mt->nchars;
1075 byte_pos = mt->nbytes;
1076 forward = 0;
1077 }
1078 }
1079 if (forward)
1080 while (byte_pos < pos_byte)
1081 INC_POSITION (mt, char_pos, byte_pos);
1082 else
1083 while (byte_pos > pos_byte)
1084 DEC_POSITION (mt, char_pos, byte_pos);
1085 mt->cache_char_pos = char_pos;
1086 mt->cache_byte_pos = byte_pos;
1087 return char_pos;
1088 }
1089
1090 /* Estimated extra bytes that malloc will use for its own purpose on
1091 each memory allocation. */
1092 #define MALLOC_OVERHEAD 4
1093 #define MALLOC_MININUM_BYTES 12
1094
1095 void
mtext__enlarge(MText * mt,int nbytes)1096 mtext__enlarge (MText *mt, int nbytes)
1097 {
1098 nbytes += MAX_UTF8_CHAR_BYTES;
1099 if (mt->allocated >= nbytes)
1100 return;
1101 if (nbytes < MALLOC_MININUM_BYTES)
1102 nbytes = MALLOC_MININUM_BYTES;
1103 while (mt->allocated < nbytes)
1104 mt->allocated = mt->allocated * 2 + MALLOC_OVERHEAD;
1105 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1106 }
1107
1108 int
mtext__takein(MText * mt,int nchars,int nbytes)1109 mtext__takein (MText *mt, int nchars, int nbytes)
1110 {
1111 if (mt->plist)
1112 mtext__adjust_plist_for_insert (mt, mt->nchars, nchars, NULL);
1113 mt->nchars += nchars;
1114 mt->nbytes += nbytes;
1115 mt->data[mt->nbytes] = 0;
1116 return 0;
1117 }
1118
1119
1120 int
mtext__cat_data(MText * mt,unsigned char * p,int nbytes,enum MTextFormat format)1121 mtext__cat_data (MText *mt, unsigned char *p, int nbytes,
1122 enum MTextFormat format)
1123 {
1124 int nchars = -1;
1125
1126 if (mt->format > MTEXT_FORMAT_UTF_8)
1127 MERROR (MERROR_MTEXT, -1);
1128 if (format == MTEXT_FORMAT_US_ASCII)
1129 nchars = nbytes;
1130 else if (format == MTEXT_FORMAT_UTF_8)
1131 nchars = count_utf_8_chars (p, nbytes);
1132 if (nchars < 0)
1133 MERROR (MERROR_MTEXT, -1);
1134 mtext__enlarge (mt, mtext_nbytes (mt) + nbytes + 1);
1135 memcpy (MTEXT_DATA (mt) + mtext_nbytes (mt), p, nbytes);
1136 mtext__takein (mt, nchars, nbytes);
1137 return nchars;
1138 }
1139
1140 MText *
mtext__from_data(const void * data,int nitems,enum MTextFormat format,int need_copy)1141 mtext__from_data (const void *data, int nitems, enum MTextFormat format,
1142 int need_copy)
1143 {
1144 MText *mt;
1145 int nchars, nbytes, unit_bytes;
1146
1147 if (format == MTEXT_FORMAT_US_ASCII)
1148 {
1149 const char *p = (char *) data, *pend = p + nitems;
1150
1151 while (p < pend)
1152 if (*p++ < 0)
1153 MERROR (MERROR_MTEXT, NULL);
1154 nchars = nbytes = nitems;
1155 unit_bytes = 1;
1156 }
1157 else if (format == MTEXT_FORMAT_UTF_8)
1158 {
1159 if ((nchars = count_utf_8_chars (data, nitems)) < 0)
1160 MERROR (MERROR_MTEXT, NULL);
1161 nbytes = nitems;
1162 unit_bytes = 1;
1163 }
1164 else if (format <= MTEXT_FORMAT_UTF_16BE)
1165 {
1166 if ((nchars = count_utf_16_chars (data, nitems,
1167 format != MTEXT_FORMAT_UTF_16)) < 0)
1168 MERROR (MERROR_MTEXT, NULL);
1169 nbytes = USHORT_SIZE * nitems;
1170 unit_bytes = USHORT_SIZE;
1171 }
1172 else /* MTEXT_FORMAT_UTF_32XX */
1173 {
1174 nchars = nitems;
1175 nbytes = UINT_SIZE * nitems;
1176 unit_bytes = UINT_SIZE;
1177 }
1178
1179 mt = mtext ();
1180 mt->format = format;
1181 mt->coverage = FORMAT_COVERAGE (format);
1182 mt->allocated = need_copy ? nbytes + unit_bytes : -1;
1183 mt->nchars = nchars;
1184 mt->nbytes = nitems;
1185 if (need_copy)
1186 {
1187 MTABLE_MALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1188 memcpy (mt->data, data, nbytes);
1189 mt->data[nbytes] = 0;
1190 }
1191 else
1192 mt->data = (unsigned char *) data;
1193 return mt;
1194 }
1195
1196
1197 void
mtext__adjust_format(MText * mt,enum MTextFormat format)1198 mtext__adjust_format (MText *mt, enum MTextFormat format)
1199 {
1200 int i, c;
1201
1202 if (mt->nchars > 0)
1203 switch (format)
1204 {
1205 case MTEXT_FORMAT_US_ASCII:
1206 {
1207 unsigned char *p = mt->data;
1208
1209 for (i = 0; i < mt->nchars; i++)
1210 *p++ = mtext_ref_char (mt, i);
1211 mt->nbytes = mt->nchars;
1212 mt->cache_byte_pos = mt->cache_char_pos;
1213 break;
1214 }
1215
1216 case MTEXT_FORMAT_UTF_8:
1217 {
1218 unsigned char *p0, *p1;
1219
1220 i = count_by_utf_8 (mt, 0, mt->nchars) + 1;
1221 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1222 mt->allocated = i;
1223 for (i = 0, p1 = p0; i < mt->nchars; i++)
1224 {
1225 c = mtext_ref_char (mt, i);
1226 p1 += CHAR_STRING_UTF8 (c, p1);
1227 }
1228 *p1 = '\0';
1229 free (mt->data);
1230 mt->data = p0;
1231 mt->nbytes = p1 - p0;
1232 mt->cache_char_pos = mt->cache_byte_pos = 0;
1233 break;
1234 }
1235
1236 default:
1237 if (format == MTEXT_FORMAT_UTF_16)
1238 {
1239 unsigned short *p0, *p1;
1240
1241 i = (count_by_utf_16 (mt, 0, mt->nchars) + 1) * USHORT_SIZE;
1242 MTABLE_MALLOC (p0, i, MERROR_MTEXT);
1243 mt->allocated = i;
1244 for (i = 0, p1 = p0; i < mt->nchars; i++)
1245 {
1246 c = mtext_ref_char (mt, i);
1247 p1 += CHAR_STRING_UTF16 (c, p1);
1248 }
1249 *p1 = 0;
1250 free (mt->data);
1251 mt->data = (unsigned char *) p0;
1252 mt->nbytes = p1 - p0;
1253 mt->cache_char_pos = mt->cache_byte_pos = 0;
1254 break;
1255 }
1256 else
1257 {
1258 unsigned int *p;
1259
1260 mt->allocated = (mt->nchars + 1) * UINT_SIZE;
1261 MTABLE_MALLOC (p, mt->allocated, MERROR_MTEXT);
1262 for (i = 0; i < mt->nchars; i++)
1263 p[i] = mtext_ref_char (mt, i);
1264 p[i] = 0;
1265 free (mt->data);
1266 mt->data = (unsigned char *) p;
1267 mt->nbytes = mt->nchars;
1268 mt->cache_byte_pos = mt->cache_char_pos;
1269 }
1270 }
1271 mt->format = format;
1272 mt->coverage = FORMAT_COVERAGE (format);
1273 }
1274
1275
1276 /* Find the position of a character at the beginning of a line of
1277 M-Text MT searching backward from POS. */
1278
1279 int
mtext__bol(MText * mt,int pos)1280 mtext__bol (MText *mt, int pos)
1281 {
1282 int byte_pos;
1283
1284 if (pos == 0)
1285 return pos;
1286 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1287 if (mt->format <= MTEXT_FORMAT_UTF_8)
1288 {
1289 unsigned char *p = mt->data + byte_pos;
1290
1291 if (p[-1] == '\n')
1292 return pos;
1293 p--;
1294 while (p > mt->data && p[-1] != '\n')
1295 p--;
1296 if (p == mt->data)
1297 return 0;
1298 byte_pos = p - mt->data;
1299 return POS_BYTE_TO_CHAR (mt, byte_pos);
1300 }
1301 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1302 {
1303 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1304 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1305 ? 0x0A00 : 0x000A);
1306
1307 if (p[-1] == newline)
1308 return pos;
1309 p--;
1310 while (p > (unsigned short *) (mt->data) && p[-1] != newline)
1311 p--;
1312 if (p == (unsigned short *) (mt->data))
1313 return 0;
1314 byte_pos = p - (unsigned short *) (mt->data);
1315 return POS_BYTE_TO_CHAR (mt, byte_pos);;
1316 }
1317 else
1318 {
1319 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1320 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1321 ? 0x0A000000 : 0x0000000A);
1322
1323 if (p[-1] == newline)
1324 return pos;
1325 p--, pos--;
1326 while (p > (unsigned *) (mt->data) && p[-1] != newline)
1327 p--, pos--;
1328 return pos;
1329 }
1330 }
1331
1332
1333 /* Find the position of a character at the end of a line of M-Text MT
1334 searching forward from POS. */
1335
1336 int
mtext__eol(MText * mt,int pos)1337 mtext__eol (MText *mt, int pos)
1338 {
1339 int byte_pos;
1340
1341 if (pos == mt->nchars)
1342 return pos;
1343 byte_pos = POS_CHAR_TO_BYTE (mt, pos);
1344 if (mt->format <= MTEXT_FORMAT_UTF_8)
1345 {
1346 unsigned char *p = mt->data + byte_pos;
1347 unsigned char *endp;
1348
1349 if (*p == '\n')
1350 return pos + 1;
1351 p++;
1352 endp = mt->data + mt->nbytes;
1353 while (p < endp && *p != '\n')
1354 p++;
1355 if (p == endp)
1356 return mt->nchars;
1357 byte_pos = p + 1 - mt->data;
1358 return POS_BYTE_TO_CHAR (mt, byte_pos);
1359 }
1360 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1361 {
1362 unsigned short *p = ((unsigned short *) (mt->data)) + byte_pos;
1363 unsigned short *endp;
1364 unsigned short newline = (mt->format == MTEXT_FORMAT_UTF_16
1365 ? 0x0A00 : 0x000A);
1366
1367 if (*p == newline)
1368 return pos + 1;
1369 p++;
1370 endp = (unsigned short *) (mt->data) + mt->nbytes;
1371 while (p < endp && *p != newline)
1372 p++;
1373 if (p == endp)
1374 return mt->nchars;
1375 byte_pos = p + 1 - (unsigned short *) (mt->data);
1376 return POS_BYTE_TO_CHAR (mt, byte_pos);
1377 }
1378 else
1379 {
1380 unsigned *p = ((unsigned *) (mt->data)) + byte_pos;
1381 unsigned *endp;
1382 unsigned newline = (mt->format == MTEXT_FORMAT_UTF_32
1383 ? 0x0A000000 : 0x0000000A);
1384
1385 if (*p == newline)
1386 return pos + 1;
1387 p++, pos++;
1388 endp = (unsigned *) (mt->data) + mt->nbytes;
1389 while (p < endp && *p != newline)
1390 p++, pos++;
1391 return pos;
1392 }
1393 }
1394
1395 int
mtext__lowercase(MText * mt,int pos,int end)1396 mtext__lowercase (MText *mt, int pos, int end)
1397 {
1398 int opos = pos;
1399 int c;
1400 MText *orig = NULL;
1401 MSymbol lang;
1402
1403 if (lowercase_precheck (mt, pos, end))
1404 orig = mtext_dup (mt);
1405
1406 for (; pos < end; opos++)
1407 {
1408 c = mtext_ref_char (mt, pos);
1409 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1410
1411 if (c == 0x03A3 && final_sigma (orig, opos))
1412 REPLACE (gr03A3);
1413
1414 else if (lang == Mlt)
1415 {
1416 if (c == 0x00CC)
1417 REPLACE (lt00CC);
1418 else if (c == 0x00CD)
1419 REPLACE (lt00CD);
1420 else if (c == 0x0128)
1421 REPLACE (lt0128);
1422 else if (orig && more_above (orig, opos))
1423 {
1424 if (c == 0x0049)
1425 REPLACE (lt0049);
1426 else if (c == 0x004A)
1427 REPLACE (lt004A);
1428 else if (c == 0x012E)
1429 REPLACE (lt012E);
1430 else
1431 LOOKUP;
1432 }
1433 else
1434 LOOKUP;
1435 }
1436
1437 else if (lang == Mtr || lang == Maz)
1438 {
1439 if (c == 0x0130)
1440 REPLACE (tr0130);
1441 else if (c == 0x0307 && after_i (orig, opos))
1442 DELETE;
1443 else if (c == 0x0049 && ! before_dot (orig, opos))
1444 REPLACE (tr0049);
1445 else
1446 LOOKUP;
1447 }
1448
1449 else
1450 LOOKUP;
1451 }
1452
1453 if (orig)
1454 m17n_object_unref (orig);
1455
1456 return end;
1457 }
1458
1459 int
mtext__titlecase(MText * mt,int pos,int end)1460 mtext__titlecase (MText *mt, int pos, int end)
1461 {
1462 int opos = pos;
1463 int c;
1464 MText *orig = NULL;
1465 MSymbol lang;
1466 MPlist *pl;
1467
1468 /* Precheck for titlecase is identical to that for uppercase. */
1469 if (uppercase_precheck (mt, pos, end))
1470 orig = mtext_dup (mt);
1471
1472 for (; pos < end; opos++)
1473 {
1474 c = mtext_ref_char (mt, pos);
1475 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1476
1477 if ((lang == Mtr || lang == Maz) && c == 0x0069)
1478 REPLACE (tr0069);
1479
1480 else if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
1481 DELETE;
1482
1483 else if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)))
1484 {
1485 /* Titlecase is the 2nd element. */
1486 MText *title
1487 = (MText *) mplist_value (mplist_next (mplist_value (pl)));
1488 int tlen = mtext_len (title);
1489
1490 if (mtext_ref_char (title, 0) != c || tlen > 1)
1491 {
1492 mtext_replace (mt, pos, pos + 1, title, 0, tlen);
1493 pos += tlen;
1494 end += tlen - 1;
1495 }
1496
1497 else
1498 pos++;
1499 }
1500
1501 else
1502 pos++;
1503 }
1504
1505 if (orig)
1506 m17n_object_unref (orig);
1507
1508 return end;
1509 }
1510
1511 int
mtext__uppercase(MText * mt,int pos,int end)1512 mtext__uppercase (MText *mt, int pos, int end)
1513 {
1514 int opos = pos;
1515 int c;
1516 MText *orig = NULL;
1517 MSymbol lang;
1518 MPlist *pl;
1519
1520 CASE_CONV_INIT (-1);
1521
1522 if (uppercase_precheck (mt, 0, end))
1523 orig = mtext_dup (mt);
1524
1525 for (; pos < end; opos++)
1526 {
1527 c = mtext_ref_char (mt, pos);
1528 lang = (MSymbol) mtext_get_prop (mt, pos, Mlanguage);
1529
1530 if (lang == Mlt && c == 0x0307 && after_soft_dotted (orig, opos))
1531 DELETE;
1532
1533 else if ((lang == Mtr || lang == Maz) && c == 0x0069)
1534 REPLACE (tr0069);
1535
1536 else
1537 {
1538 if ((pl = (MPlist *) mchartable_lookup (case_mapping, c)) != NULL)
1539 {
1540 MText *upper;
1541 int ulen;
1542
1543 /* Uppercase is the 3rd element. */
1544 upper = (MText *) mplist_value (mplist_next (mplist_next (mplist_value (pl))));
1545 ulen = mtext_len (upper);
1546
1547 if (mtext_ref_char (upper, 0) != c || ulen > 1)
1548 {
1549 mtext_replace (mt, pos, pos + 1, upper, 0, ulen);
1550 pos += ulen;
1551 end += ulen - 1;
1552 }
1553
1554 else
1555 pos++;
1556 }
1557
1558 else /* pl == NULL */
1559 pos++;
1560 }
1561 }
1562
1563 if (orig)
1564 m17n_object_unref (orig);
1565
1566 return end;
1567 }
1568
1569 /*** @} */
1570 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
1571
1572
1573 /* External API */
1574
1575 #ifdef WORDS_BIGENDIAN
1576 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16BE;
1577 #else
1578 const enum MTextFormat MTEXT_FORMAT_UTF_16 = MTEXT_FORMAT_UTF_16LE;
1579 #endif
1580
1581 #ifdef WORDS_BIGENDIAN
1582 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32BE;
1583 #else
1584 const int MTEXT_FORMAT_UTF_32 = MTEXT_FORMAT_UTF_32LE;
1585 #endif
1586
1587 /*** @addtogroup m17nMtext */
1588 /*** @{ */
1589 /*=*/
1590
1591 /***en The symbol whose name is "language". */
1592 /***ja "language" �Ȥ���̾������ĥ���ܥ�. */
1593 MSymbol Mlanguage;
1594
1595 /*=*/
1596
1597 /***en
1598 @brief Allocate a new M-text.
1599
1600 The mtext () function allocates a new M-text of length 0 and
1601 returns a pointer to it. The allocated M-text will not be freed
1602 unless the user explicitly does so with the m17n_object_unref ()
1603 function. */
1604
1605 /***ja
1606 @brief ������M-text�������Ƥ�.
1607
1608 �ؿ� mtext () �ϡ�Ĺ�� 0 �ο����� M-text
1609 �������ơ�����ؤΥݥ����֤���������Ƥ�줿 M-text �ϡ��ؿ�
1610 m17n_object_unref () �ˤ�äƥ桼��������Ū�˹Ԥʤ�ʤ��¤ꡢ��������ʤ���
1611
1612 @latexonly \IPAlabel{mtext} @endlatexonly */
1613
1614 /***
1615 @seealso
1616 m17n_object_unref () */
1617
1618 MText *
mtext()1619 mtext ()
1620 {
1621 MText *mt;
1622
1623 M17N_OBJECT (mt, free_mtext, MERROR_MTEXT);
1624 mt->format = MTEXT_FORMAT_US_ASCII;
1625 mt->coverage = MTEXT_COVERAGE_ASCII;
1626 M17N_OBJECT_REGISTER (mtext_table, mt);
1627 return mt;
1628 }
1629
1630 /***en
1631 @brief Allocate a new M-text with specified data.
1632
1633 The mtext_from_data () function allocates a new M-text whose
1634 character sequence is specified by array $DATA of $NITEMS
1635 elements. $FORMAT specifies the format of $DATA.
1636
1637 When $FORMAT is either #MTEXT_FORMAT_US_ASCII or
1638 #MTEXT_FORMAT_UTF_8, the contents of $DATA must be of the type @c
1639 unsigned @c char, and $NITEMS counts by byte.
1640
1641 When $FORMAT is either #MTEXT_FORMAT_UTF_16LE or
1642 #MTEXT_FORMAT_UTF_16BE, the contents of $DATA must be of the type
1643 @c unsigned @c short, and $NITEMS counts by unsigned short.
1644
1645 When $FORMAT is either #MTEXT_FORMAT_UTF_32LE or
1646 #MTEXT_FORMAT_UTF_32BE, the contents of $DATA must be of the type
1647 @c unsigned, and $NITEMS counts by unsigned.
1648
1649 The character sequence of the M-text is not modifiable.
1650 The contents of $DATA must not be modified while the M-text is alive.
1651
1652 The allocated M-text will not be freed unless the user explicitly
1653 does so with the m17n_object_unref () function. Even in that case,
1654 $DATA is not freed.
1655
1656 @return
1657 If the operation was successful, mtext_from_data () returns a
1658 pointer to the allocated M-text. Otherwise it returns @c NULL and
1659 assigns an error code to the external variable #merror_code. */
1660 /***ja
1661 @brief ����Υǡ����˿����� M-text �������Ƥ�.
1662
1663 �ؿ� mtext_from_data () �ϡ����ǿ� $NITEMS ������ $DATA
1664 �ǻ��ꤵ�줿ʸ�������Ŀ����� M-text �������Ƥ롣$FORMAT �� $DATA
1665 �Υե����ޥåȤ���
1666
1667 $FORMAT �� #MTEXT_FORMAT_US_ASCII �� #MTEXT_FORMAT_UTF_8 �ʤ�С�
1668 $DATA �����Ƥ� @c unsigned @c char ���Ǥ��ꡢ$NITEMS
1669 �ϥХ���ñ�̤�ɽ����Ƥ��롣
1670
1671 $FORMAT �� #MTEXT_FORMAT_UTF_16LE �� #MTEXT_FORMAT_UTF_16BE �ʤ�С�
1672 $DATA �����Ƥ� @c unsigned @c short ���Ǥ��ꡢ$NITEMS �� unsigned
1673 short ñ�̤Ǥ��롣
1674
1675 $FORMAT �� #MTEXT_FORMAT_UTF_32LE �� #MTEXT_FORMAT_UTF_32BE �ʤ�С�
1676 $DATA �����Ƥ� @c unsigned ���Ǥ��ꡢ$NITEMS �� unsigned ñ�̤Ǥ��롣
1677
1678 ������Ƥ�줿 M-text ��ʸ������ѹ��Ǥ��ʤ���$DATA �����Ƥ�
1679 M-text ��ͭ���ʴ֤��ѹ����ƤϤʤ�ʤ���
1680
1681 ������Ƥ�줿 M-text �ϡ��ؿ� m17n_object_unref ()
1682 �ˤ�äƥ桼��������Ū�˹Ԥʤ�ʤ��¤ꡢ��������ʤ������ξ��Ǥ� $DATA �ϲ�������ʤ���
1683
1684 @return
1685 ��������������С�mtext_from_data () �ϳ�����Ƥ�줿M-text
1686 �ؤΥݥ����֤��������Ǥʤ���� @c NULL ���֤������ѿ� #merror_code
1687 �˥��顼�����ɤ����ꤹ�롣 */
1688
1689 /***
1690 @errors
1691 @c MERROR_MTEXT */
1692
1693 MText *
mtext_from_data(const void * data,int nitems,enum MTextFormat format)1694 mtext_from_data (const void *data, int nitems, enum MTextFormat format)
1695 {
1696 if (nitems < 0
1697 || format < MTEXT_FORMAT_US_ASCII || format >= MTEXT_FORMAT_MAX)
1698 MERROR (MERROR_MTEXT, NULL);
1699 return mtext__from_data (data, nitems, format, 0);
1700 }
1701
1702 /*=*/
1703
1704 /***en
1705 @brief Get information about the text data in M-text.
1706
1707 The mtext_data () function returns a pointer to the text data of
1708 M-text $MT. If $FMT is not NULL, the format of the text data is
1709 stored in it. If $NUNITS is not NULL, the number of units of the
1710 text data is stored in it.
1711
1712 If $POS_IDX is not NULL and it points to a non-negative number,
1713 what it points to is a character position. In this case, the
1714 return value is a pointer to the text data of a character at that
1715 position.
1716
1717 Otherwise, if $UNIT_IDX is not NULL, it points to a unit position.
1718 In this case, the return value is a pointer to the text data of a
1719 character containing that unit.
1720
1721 The character position and unit position of the return value are
1722 stored in $POS_IDX and $UNIT_DIX respectively if they are not
1723 NULL.
1724
1725 <ul>
1726
1727 <li> If the format of the text data is MTEXT_FORMAT_US_ASCII or
1728 MTEXT_FORMAT_UTF_8, one unit is unsigned char.
1729
1730 <li> If the format is MTEXT_FORMAT_UTF_16LE or
1731 MTEXT_FORMAT_UTF_16BE, one unit is unsigned short.
1732
1733 <li> If the format is MTEXT_FORMAT_UTF_32LE or
1734 MTEXT_FORMAT_UTF_32BE, one unit is unsigned int.
1735
1736 </ul> */
1737
1738 void *
mtext_data(MText * mt,enum MTextFormat * fmt,int * nunits,int * pos_idx,int * unit_idx)1739 mtext_data (MText *mt, enum MTextFormat *fmt, int *nunits,
1740 int *pos_idx, int *unit_idx)
1741 {
1742 void *data;
1743 int pos = 0, unit_pos = 0;
1744
1745 if (fmt)
1746 *fmt = mt->format;
1747 data = MTEXT_DATA (mt);
1748 if (pos_idx && *pos_idx >= 0)
1749 {
1750 pos = *pos_idx;
1751 if (pos > mtext_nchars (mt))
1752 MERROR (MERROR_MTEXT, NULL);
1753 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1754 }
1755 else if (unit_idx)
1756 {
1757 unit_pos = *unit_idx;
1758
1759 if (unit_pos < 0 || unit_pos > mtext_nbytes (mt))
1760 MERROR (MERROR_MTEXT, NULL);
1761 pos = POS_BYTE_TO_CHAR (mt, unit_pos);
1762 unit_pos = POS_CHAR_TO_BYTE (mt, pos);
1763 }
1764 if (nunits)
1765 *nunits = mtext_nbytes (mt) - unit_pos;
1766 if (pos_idx)
1767 *pos_idx = pos;
1768 if (unit_idx)
1769 *unit_idx = unit_pos;
1770 if (unit_pos > 0)
1771 {
1772 if (mt->format <= MTEXT_FORMAT_UTF_8)
1773 data = (unsigned char *) data + unit_pos;
1774 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1775 data = (unsigned short *) data + unit_pos;
1776 else
1777 data = (unsigned int *) data + unit_pos;
1778 }
1779 return data;
1780 }
1781
1782 /*=*/
1783
1784 /***en
1785 @brief Number of characters in M-text.
1786
1787 The mtext_len () function returns the number of characters in
1788 M-text $MT. */
1789
1790 /***ja
1791 @brief M-text ���ʸ���ο�.
1792
1793 �ؿ� mtext_len () �� M-text $MT ���ʸ���ο����֤���
1794
1795 @latexonly \IPAlabel{mtext_len} @endlatexonly */
1796
1797 int
mtext_len(MText * mt)1798 mtext_len (MText *mt)
1799 {
1800 return (mt->nchars);
1801 }
1802
1803 /*=*/
1804
1805 /***en
1806 @brief Return the character at the specified position in an M-text.
1807
1808 The mtext_ref_char () function returns the character at $POS in
1809 M-text $MT. If an error is detected, it returns -1 and assigns an
1810 error code to the external variable #merror_code. */
1811
1812 /***ja
1813 @brief M-text ��λ��ꤵ�줿���֤�ʸ�����֤�.
1814
1815 �ؿ� mtext_ref_char () �ϡ�M-text $MT �ΰ��� $POS
1816 ��ʸ�����֤������顼�����Ф��줿���� -1 ���֤��������ѿ� #merror_code
1817 �˥��顼�����ɤ����ꤹ�롣
1818
1819 @latexonly \IPAlabel{mtext_ref_char} @endlatexonly */
1820
1821 /***
1822 @errors
1823 @c MERROR_RANGE */
1824
1825 int
mtext_ref_char(MText * mt,int pos)1826 mtext_ref_char (MText *mt, int pos)
1827 {
1828 int c;
1829
1830 M_CHECK_POS (mt, pos, -1);
1831 if (mt->format <= MTEXT_FORMAT_UTF_8)
1832 {
1833 unsigned char *p = mt->data + POS_CHAR_TO_BYTE (mt, pos);
1834
1835 c = STRING_CHAR_UTF8 (p);
1836 }
1837 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1838 {
1839 unsigned short *p
1840 = (unsigned short *) (mt->data) + POS_CHAR_TO_BYTE (mt, pos);
1841 unsigned short p1[2];
1842
1843 if (mt->format != MTEXT_FORMAT_UTF_16)
1844 {
1845 p1[0] = SWAP_16 (*p);
1846 if (p1[0] >= 0xD800 && p1[0] < 0xDC00)
1847 p1[1] = SWAP_16 (p[1]);
1848 p = p1;
1849 }
1850 c = STRING_CHAR_UTF16 (p);
1851 }
1852 else
1853 {
1854 c = ((unsigned *) (mt->data))[pos];
1855 if (mt->format != MTEXT_FORMAT_UTF_32)
1856 c = SWAP_32 (c);
1857 }
1858 return c;
1859 }
1860
1861 /*=*/
1862
1863 /***en
1864 @brief Store a character into an M-text.
1865
1866 The mtext_set_char () function sets character $C, which has no
1867 text properties, at $POS in M-text $MT.
1868
1869 @return
1870 If the operation was successful, mtext_set_char () returns 0.
1871 Otherwise it returns -1 and assigns an error code to the external
1872 variable #merror_code. */
1873
1874 /***ja
1875 @brief M-text �˰�ʸ�������ꤹ��.
1876
1877 �ؿ� mtext_set_char () �ϡ��ƥ����ȥץ�ѥƥ�̵����ʸ�� $C ��
1878 M-text $MT �ΰ��� $POS �����ꤹ�롣
1879
1880 @return
1881 ��������������� mtext_set_char () �� 0 ���֤������Ԥ���� -1
1882 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣
1883
1884 @latexonly \IPAlabel{mtext_set_char} @endlatexonly */
1885
1886 /***
1887 @errors
1888 @c MERROR_RANGE */
1889
1890 int
mtext_set_char(MText * mt,int pos,int c)1891 mtext_set_char (MText *mt, int pos, int c)
1892 {
1893 int pos_unit;
1894 int old_units, new_units;
1895 int delta;
1896 unsigned char *p;
1897 int unit_bytes;
1898
1899 M_CHECK_POS (mt, pos, -1);
1900 M_CHECK_READONLY (mt, -1);
1901
1902 mtext__adjust_plist_for_change (mt, pos, 1, 1);
1903
1904 if (mt->format <= MTEXT_FORMAT_UTF_8)
1905 {
1906 if (c >= 0x80)
1907 mt->format = MTEXT_FORMAT_UTF_8, mt->coverage = MTEXT_COVERAGE_FULL;
1908 }
1909 else if (mt->format <= MTEXT_FORMAT_UTF_16BE)
1910 {
1911 if (c >= 0x110000)
1912 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
1913 else if (mt->format != MTEXT_FORMAT_UTF_16)
1914 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
1915 }
1916 else if (mt->format != MTEXT_FORMAT_UTF_32)
1917 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
1918
1919 unit_bytes = UNIT_BYTES (mt->format);
1920 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
1921 p = mt->data + pos_unit * unit_bytes;
1922 old_units = CHAR_UNITS_AT (mt, p);
1923 new_units = CHAR_UNITS (c, mt->format);
1924 delta = new_units - old_units;
1925
1926 if (delta)
1927 {
1928 if (mt->cache_char_pos > pos)
1929 mt->cache_byte_pos += delta;
1930
1931 if ((mt->nbytes + delta + 1) * unit_bytes > mt->allocated)
1932 {
1933 mt->allocated = (mt->nbytes + delta + 1) * unit_bytes;
1934 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
1935 }
1936
1937 memmove (mt->data + (pos_unit + new_units) * unit_bytes,
1938 mt->data + (pos_unit + old_units) * unit_bytes,
1939 (mt->nbytes - pos_unit - old_units + 1) * unit_bytes);
1940 mt->nbytes += delta;
1941 mt->data[mt->nbytes * unit_bytes] = 0;
1942 }
1943 switch (mt->format)
1944 {
1945 case MTEXT_FORMAT_US_ASCII:
1946 mt->data[pos_unit] = c;
1947 break;
1948 case MTEXT_FORMAT_UTF_8:
1949 {
1950 unsigned char *p = mt->data + pos_unit;
1951 CHAR_STRING_UTF8 (c, p);
1952 break;
1953 }
1954 default:
1955 if (mt->format == MTEXT_FORMAT_UTF_16)
1956 {
1957 unsigned short *p = (unsigned short *) mt->data + pos_unit;
1958
1959 CHAR_STRING_UTF16 (c, p);
1960 }
1961 else
1962 ((unsigned *) mt->data)[pos_unit] = c;
1963 }
1964 return 0;
1965 }
1966
1967 /*=*/
1968
1969 /***en
1970 @brief Append a character to an M-text.
1971
1972 The mtext_cat_char () function appends character $C, which has no
1973 text properties, to the end of M-text $MT.
1974
1975 @return
1976 This function returns a pointer to the resulting M-text $MT. If
1977 $C is an invalid character, it returns @c NULL. */
1978
1979 /***ja
1980 @brief M-text �˰�ʸ���ɲä���.
1981
1982 �ؿ� mtext_cat_char () �ϡ��ƥ����ȥץ�ѥƥ�̵����ʸ�� $C ��
1983 M-text $MT ���������ɲä��롣
1984
1985 @return
1986 ���δؿ����ѹ����줿 M-text $MT �ؤΥݥ����֤���$C
1987 ��������ʸ���Ǥʤ����ˤ� @c NULL ���֤��� */
1988
1989 /***
1990 @seealso
1991 mtext_cat (), mtext_ncat () */
1992
1993 MText *
mtext_cat_char(MText * mt,int c)1994 mtext_cat_char (MText *mt, int c)
1995 {
1996 int nunits;
1997 int unit_bytes = UNIT_BYTES (mt->format);
1998
1999 M_CHECK_READONLY (mt, NULL);
2000 if (c < 0 || c > MCHAR_MAX)
2001 return NULL;
2002 mtext__adjust_plist_for_insert (mt, mt->nchars, 1, NULL);
2003
2004 if (c >= 0x80
2005 && (mt->format == MTEXT_FORMAT_US_ASCII
2006 || (c >= 0x10000
2007 && (mt->format == MTEXT_FORMAT_UTF_16LE
2008 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2009
2010 {
2011 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2012 unit_bytes = 1;
2013 }
2014 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2015 {
2016 if (mt->format != MTEXT_FORMAT_UTF_32)
2017 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2018 }
2019 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2020 {
2021 if (mt->format != MTEXT_FORMAT_UTF_16)
2022 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2023 }
2024
2025 nunits = CHAR_UNITS (c, mt->format);
2026 if ((mt->nbytes + nunits + 1) * unit_bytes > mt->allocated)
2027 {
2028 mt->allocated = (mt->nbytes + nunits * 16 + 1) * unit_bytes;
2029 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2030 }
2031
2032 if (mt->format <= MTEXT_FORMAT_UTF_8)
2033 {
2034 unsigned char *p = mt->data + mt->nbytes;
2035 p += CHAR_STRING_UTF8 (c, p);
2036 *p = 0;
2037 }
2038 else if (mt->format == MTEXT_FORMAT_UTF_16)
2039 {
2040 unsigned short *p = (unsigned short *) mt->data + mt->nbytes;
2041 p += CHAR_STRING_UTF16 (c, p);
2042 *p = 0;
2043 }
2044 else
2045 {
2046 unsigned *p = (unsigned *) mt->data + mt->nbytes;
2047 *p++ = c;
2048 *p = 0;
2049 }
2050
2051 mt->nchars++;
2052 mt->nbytes += nunits;
2053 return mt;
2054 }
2055
2056 /*=*/
2057
2058 /***en
2059 @brief Create a copy of an M-text.
2060
2061 The mtext_dup () function creates a copy of M-text $MT while
2062 inheriting all the text properties of $MT.
2063
2064 @return
2065 This function returns a pointer to the created copy. */
2066
2067 /***ja
2068 @brief M-text �Υ��ԡ�����.
2069
2070 �ؿ� mtext_dup () �ϡ�M-text $MT �Υ��ԡ����롣$MT
2071 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣
2072
2073 @return
2074 ���δؿ��Ϻ��줿���ԡ��ؤΥݥ����֤���
2075
2076 @latexonly \IPAlabel{mtext_dup} @endlatexonly */
2077
2078 /***
2079 @seealso
2080 mtext_duplicate () */
2081
2082 MText *
mtext_dup(MText * mt)2083 mtext_dup (MText *mt)
2084 {
2085 return mtext_duplicate (mt, 0, mtext_nchars (mt));
2086 }
2087
2088 /*=*/
2089
2090 /***en
2091 @brief Append an M-text to another.
2092
2093 The mtext_cat () function appends M-text $MT2 to the end of M-text
2094 $MT1 while inheriting all the text properties. $MT2 itself is not
2095 modified.
2096
2097 @return
2098 This function returns a pointer to the resulting M-text $MT1. */
2099
2100 /***ja
2101 @brief 2�Ĥ� M-text��Ϣ�뤹��.
2102
2103 �ؿ� mtext_cat () �ϡ� M-text $MT2 �� M-text $MT1
2104 ���������դ��ä��롣$MT2 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT2 ���ѹ�����ʤ���
2105
2106 @return
2107 ���δؿ����ѹ����줿 M-text $MT1 �ؤΥݥ����֤���
2108
2109 @latexonly \IPAlabel{mtext_cat} @endlatexonly */
2110
2111 /***
2112 @seealso
2113 mtext_ncat (), mtext_cat_char () */
2114
2115 MText *
mtext_cat(MText * mt1,MText * mt2)2116 mtext_cat (MText *mt1, MText *mt2)
2117 {
2118 M_CHECK_READONLY (mt1, NULL);
2119
2120 if (mt2->nchars > 0)
2121 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars);
2122 return mt1;
2123 }
2124
2125
2126 /*=*/
2127
2128 /***en
2129 @brief Append a part of an M-text to another.
2130
2131 The mtext_ncat () function appends the first $N characters of
2132 M-text $MT2 to the end of M-text $MT1 while inheriting all the
2133 text properties. If the length of $MT2 is less than $N, all
2134 characters are copied. $MT2 is not modified.
2135
2136 @return
2137 If the operation was successful, mtext_ncat () returns a
2138 pointer to the resulting M-text $MT1. If an error is detected, it
2139 returns @c NULL and assigns an error code to the global variable
2140 #merror_code. */
2141
2142 /***ja
2143 @brief M-text �ΰ������̤� M-text ���ղä���.
2144
2145 �ؿ� mtext_ncat () �ϡ�M-text $MT2 �ΤϤ���� $N ʸ���� M-text
2146 $MT1 ���������դ��ä��롣$MT2 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT2
2147 ��Ĺ���� $N �ʲ��ʤ�С�$MT2 �Τ��٤Ƥ�ʸ�����ղä���롣 $MT2 ���ѹ�����ʤ���
2148
2149 @return
2150 ����������������硢mtext_ncat () ���ѹ����줿 M-text $MT1
2151 �ؤΥݥ����֤������顼�����Ф��줿���� @c NULL ���֤��������ѿ�
2152 #merror_code �˥��顼�����ɤ����ꤹ�롣
2153
2154 @latexonly \IPAlabel{mtext_ncat} @endlatexonly */
2155
2156 /***
2157 @errors
2158 @c MERROR_RANGE
2159
2160 @seealso
2161 mtext_cat (), mtext_cat_char () */
2162
2163 MText *
mtext_ncat(MText * mt1,MText * mt2,int n)2164 mtext_ncat (MText *mt1, MText *mt2, int n)
2165 {
2166 M_CHECK_READONLY (mt1, NULL);
2167 if (n < 0)
2168 MERROR (MERROR_RANGE, NULL);
2169 if (mt2->nchars > 0)
2170 insert (mt1, mt1->nchars, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2171 return mt1;
2172 }
2173
2174
2175 /*=*/
2176
2177 /***en
2178 @brief Copy an M-text to another.
2179
2180 The mtext_cpy () function copies M-text $MT2 to M-text $MT1 while
2181 inheriting all the text properties. The old text in $MT1 is
2182 overwritten and the length of $MT1 is extended if necessary. $MT2
2183 is not modified.
2184
2185 @return
2186 This function returns a pointer to the resulting M-text $MT1. */
2187
2188 /***ja
2189 @brief M-text ���̤� M-text �˥��ԡ�����.
2190
2191 �ؿ� mtext_cpy () �� M-text $MT2 �� M-text $MT1 �˾���ԡ����롣
2192 $MT2 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT1
2193 ��Ĺ����ɬ�פ˱����ƿ��Ф���롣$MT2 ���ѹ�����ʤ���
2194
2195 @return
2196 ���δؿ����ѹ����줿 M-text $MT1 �ؤΥݥ����֤���
2197
2198 @latexonly \IPAlabel{mtext_cpy} @endlatexonly */
2199
2200 /***
2201 @seealso
2202 mtext_ncpy (), mtext_copy () */
2203
2204 MText *
mtext_cpy(MText * mt1,MText * mt2)2205 mtext_cpy (MText *mt1, MText *mt2)
2206 {
2207 M_CHECK_READONLY (mt1, NULL);
2208 mtext_del (mt1, 0, mt1->nchars);
2209 if (mt2->nchars > 0)
2210 insert (mt1, 0, mt2, 0, mt2->nchars);
2211 return mt1;
2212 }
2213
2214 /*=*/
2215
2216 /***en
2217 @brief Copy the first some characters in an M-text to another.
2218
2219 The mtext_ncpy () function copies the first $N characters of
2220 M-text $MT2 to M-text $MT1 while inheriting all the text
2221 properties. If the length of $MT2 is less than $N, all characters
2222 of $MT2 are copied. The old text in $MT1 is overwritten and the
2223 length of $MT1 is extended if necessary. $MT2 is not modified.
2224
2225 @return
2226 If the operation was successful, mtext_ncpy () returns a pointer
2227 to the resulting M-text $MT1. If an error is detected, it returns
2228 @c NULL and assigns an error code to the global variable
2229 #merror_code. */
2230
2231 /***ja
2232 @brief M-text �˴ޤޤ��ǽ�β�ʸ�����ԡ�����.
2233
2234 �ؿ� mtext_ncpy () �ϡ�M-text $MT2 �κǽ�� $N ʸ���� M-text $MT1
2235 �˾���ԡ����롣$MT2 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣�⤷ $MT2
2236 ��Ĺ���� $N ���⾮������� $MT2 �Τ��٤Ƥ�ʸ���ԡ����롣$MT1
2237 ��Ĺ����ɬ�פ˱����ƿ��Ф���롣$MT2 ���ѹ�����ʤ���
2238
2239 @return
2240 ����������������硢mtext_ncpy () ���ѹ����줿 M-text $MT1
2241 �ؤΥݥ����֤������顼�����Ф��줿���� @c NULL ���֤��������ѿ�
2242 #merror_code �˥��顼�����ɤ����ꤹ�롣
2243
2244 @latexonly \IPAlabel{mtext_ncpy} @endlatexonly */
2245
2246 /***
2247 @errors
2248 @c MERROR_RANGE
2249
2250 @seealso
2251 mtext_cpy (), mtext_copy () */
2252
2253 MText *
mtext_ncpy(MText * mt1,MText * mt2,int n)2254 mtext_ncpy (MText *mt1, MText *mt2, int n)
2255 {
2256 M_CHECK_READONLY (mt1, NULL);
2257 if (n < 0)
2258 MERROR (MERROR_RANGE, NULL);
2259 mtext_del (mt1, 0, mt1->nchars);
2260 if (mt2->nchars > 0)
2261 insert (mt1, 0, mt2, 0, mt2->nchars < n ? mt2->nchars : n);
2262 return mt1;
2263 }
2264
2265 /*=*/
2266
2267 /***en
2268 @brief Create a new M-text from a part of an existing M-text.
2269
2270 The mtext_duplicate () function creates a copy of sub-text of
2271 M-text $MT, starting at $FROM (inclusive) and ending at $TO
2272 (exclusive) while inheriting all the text properties of $MT. $MT
2273 itself is not modified.
2274
2275 @return
2276 If the operation was successful, mtext_duplicate ()
2277 returns a pointer to the created M-text. If an error is detected,
2278 it returns NULL and assigns an error code to the external variable
2279 #merror_code. */
2280
2281 /***ja
2282 @brief ��¸�� M-text �ΰ������鿷���� M-text ��Ĥ���.
2283
2284 �ؿ� mtext_duplicate () �ϡ�M-text $MT �� $FROM ��$FROM ���Τ�ޤ�ˤ���
2285 $TO ��$TO ���Τϴޤޤʤ��ˤޤǤ���ʬ�Υ��ԡ����롣���ΤȤ� $MT
2286 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT ���Τ�Τ��ѹ�����ʤ���
2287
2288 @return
2289 ��������������С�mtext_duplicate () �Ϻ��줿 M-text
2290 �ؤΥݥ����֤������顼�����Ф��줿���� @c NULL ���֤��������ѿ�
2291 #merror_code �˥��顼�����ɤ����ꤹ�롣
2292
2293 @latexonly \IPAlabel{mtext_duplicate} @endlatexonly */
2294
2295 /***
2296 @errors
2297 @c MERROR_RANGE
2298
2299 @seealso
2300 mtext_dup () */
2301
2302 MText *
mtext_duplicate(MText * mt,int from,int to)2303 mtext_duplicate (MText *mt, int from, int to)
2304 {
2305 MText *new = mtext ();
2306
2307 M_CHECK_RANGE (mt, from, to, NULL, new);
2308 new->format = mt->format;
2309 new->coverage = mt->coverage;
2310 insert (new, 0, mt, from, to);
2311 return new;
2312 }
2313
2314 /*=*/
2315
2316 /***en
2317 @brief Copy characters in the specified range into an M-text.
2318
2319 The mtext_copy () function copies the text between $FROM
2320 (inclusive) and $TO (exclusive) in M-text $MT2 to the region
2321 starting at $POS in M-text $MT1 while inheriting the text
2322 properties. The old text in $MT1 is overwritten and the length of
2323 $MT1 is extended if necessary. $MT2 is not modified.
2324
2325 @return
2326 If the operation was successful, mtext_copy () returns a pointer
2327 to the modified $MT1. Otherwise, it returns @c NULL and assigns
2328 an error code to the external variable #merror_code. */
2329
2330 /***ja
2331 @brief M-text �˻����ϰϤ�ʸ���ԡ�����.
2332
2333 �ؿ� mtext_copy () �ϡ� M-text $MT2 �� $FROM ��$FROM ���Τ�ޤ�ˤ���
2334 $TO ��$TO ���Τϴޤޤʤ��ˤޤǤ��ϰϤΥƥ����Ȥ� M-text $MT1 �ΰ��� $POS
2335 �������ԡ����롣$MT2 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT1
2336 ��Ĺ����ɬ�פ˱����ƿ��Ф���롣$MT2 ���ѹ�����ʤ���
2337
2338 @latexonly \IPAlabel{mtext_copy} @endlatexonly
2339
2340 @return
2341 ����������������硢mtext_copy () ���ѹ����줿 $MT1
2342 �ؤΥݥ����֤��������Ǥʤ���� @c NULL ���֤��������ѿ� #merror_code
2343 �˥��顼�����ɤ����ꤹ�롣 */
2344
2345 /***
2346 @errors
2347 @c MERROR_RANGE
2348
2349 @seealso
2350 mtext_cpy (), mtext_ncpy () */
2351
2352 MText *
mtext_copy(MText * mt1,int pos,MText * mt2,int from,int to)2353 mtext_copy (MText *mt1, int pos, MText *mt2, int from, int to)
2354 {
2355 M_CHECK_POS_X (mt1, pos, NULL);
2356 M_CHECK_READONLY (mt1, NULL);
2357 M_CHECK_RANGE_X (mt2, from, to, NULL);
2358 mtext_del (mt1, pos, mt1->nchars);
2359 return insert (mt1, pos, mt2, from, to);
2360 }
2361
2362 /*=*/
2363
2364
2365 /***en
2366 @brief Delete characters in the specified range destructively.
2367
2368 The mtext_del () function deletes the characters in the range
2369 $FROM (inclusive) and $TO (exclusive) from M-text $MT
2370 destructively. As a result, the length of $MT shrinks by ($TO -
2371 $FROM) characters.
2372
2373 @return
2374 If the operation was successful, mtext_del () returns 0.
2375 Otherwise, it returns -1 and assigns an error code to the external
2376 variable #merror_code. */
2377
2378 /***ja
2379 @brief �����ϰϤ�ʸ�����˲�Ū�˼�����.
2380
2381 �ؿ� mtext_del () �ϡ�M-text $MT �� $FROM ��$FROM ���Τ�ޤ�ˤ���
2382 $TO ��$TO ���Τϴޤޤʤ��ˤޤǤ�ʸ�����˲�Ū�˼����������Ū��
2383 $MT ��Ĺ���� ($TO @c - $FROM) �����̤ळ�Ȥˤʤ롣
2384
2385 @return
2386 ��������������� mtext_del () �� 0 ���֤��������Ǥʤ���� -1
2387 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣 */
2388
2389 /***
2390 @errors
2391 @c MERROR_RANGE
2392
2393 @seealso
2394 mtext_ins () */
2395
2396 int
mtext_del(MText * mt,int from,int to)2397 mtext_del (MText *mt, int from, int to)
2398 {
2399 int from_byte, to_byte;
2400 int unit_bytes = UNIT_BYTES (mt->format);
2401
2402 M_CHECK_READONLY (mt, -1);
2403 M_CHECK_RANGE (mt, from, to, -1, 0);
2404
2405 from_byte = POS_CHAR_TO_BYTE (mt, from);
2406 to_byte = POS_CHAR_TO_BYTE (mt, to);
2407
2408 if (mt->cache_char_pos >= to)
2409 {
2410 mt->cache_char_pos -= to - from;
2411 mt->cache_byte_pos -= to_byte - from_byte;
2412 }
2413 else if (mt->cache_char_pos > from)
2414 {
2415 mt->cache_char_pos -= from;
2416 mt->cache_byte_pos -= from_byte;
2417 }
2418
2419 mtext__adjust_plist_for_delete (mt, from, to - from);
2420 memmove (mt->data + from_byte * unit_bytes,
2421 mt->data + to_byte * unit_bytes,
2422 (mt->nbytes - to_byte + 1) * unit_bytes);
2423 mt->nchars -= (to - from);
2424 mt->nbytes -= (to_byte - from_byte);
2425 mt->cache_char_pos = from;
2426 mt->cache_byte_pos = from_byte;
2427 return 0;
2428 }
2429
2430
2431 /*=*/
2432
2433 /***en
2434 @brief Insert an M-text into another M-text.
2435
2436 The mtext_ins () function inserts M-text $MT2 into M-text $MT1, at
2437 position $POS. As a result, $MT1 is lengthen by the length of
2438 $MT2. On insertion, all the text properties of $MT2 are
2439 inherited. The original $MT2 is not modified.
2440
2441 @return
2442 If the operation was successful, mtext_ins () returns 0.
2443 Otherwise, it returns -1 and assigns an error code to the external
2444 variable #merror_code. */
2445
2446 /***ja
2447 @brief M-text ���̤� M-text ����������.
2448
2449 �ؿ� mtext_ins () �� M-text $MT1 �� $POS �ΰ��֤��̤� M-text $MT2
2450 ���������롣���η�� $MT1 ��Ĺ���� $MT2 ��Ĺ��ʬ���������롣�����κݡ�$MT2
2451 �Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣$MT2 ���Τ�Τ��ѹ�����ʤ���
2452
2453 @return
2454 ��������������� mtext_ins () �� 0 ���֤��������Ǥʤ���� -1
2455 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣 */
2456
2457 /***
2458 @errors
2459 @c MERROR_RANGE , @c MERROR_MTEXT
2460
2461 @seealso
2462 mtext_del () , mtext_insert () */
2463
2464 int
mtext_ins(MText * mt1,int pos,MText * mt2)2465 mtext_ins (MText *mt1, int pos, MText *mt2)
2466 {
2467 M_CHECK_READONLY (mt1, -1);
2468 M_CHECK_POS_X (mt1, pos, -1);
2469
2470 if (mt2->nchars == 0)
2471 return 0;
2472 insert (mt1, pos, mt2, 0, mt2->nchars);
2473 return 0;
2474 }
2475
2476 /*=*/
2477
2478 /***en
2479 @brief Insert sub-text of an M-text into another M-text.
2480
2481 The mtext_insert () function inserts sub-text of M-text $MT2
2482 between $FROM (inclusive) and $TO (exclusive) into M-text $MT1, at
2483 position $POS. As a result, $MT1 is lengthen by ($TO - $FROM).
2484 On insertion, all the text properties of the sub-text of $MT2 are
2485 inherited.
2486
2487 @return
2488 If the operation was successful, mtext_insert () returns
2489 0. Otherwise, it returns -1 and assigns an error code to the
2490 external variable #merror_code. */
2491
2492 /***ja
2493 @brief M-text �ΰ������̤� M-text ����������.
2494
2495 �ؿ� mtext_insert () �� M-text $MT1 ��� $POS �ΰ��֤ˡ��̤�
2496 M-text $MT2 �� $FROM ��$FROM ���Τ�ޤ�ˤ��� $TO ��$TO ���Τϴޤ�
2497 �ʤ��ˤޤǤ�ʸ�����������롣���Ū�� $MT1 ��Ĺ���� ($TO - $FROM)
2498 �������Ӥ롣�����κݡ� $MT2 ��Υƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����
2499 �롣
2500
2501 @return
2502 ��������������С�mtext_insert () �� 0 ���֤��������Ǥʤ���� -1
2503 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣 */
2504
2505 /***
2506 @errors
2507 @c MERROR_MTEXT , @c MERROR_RANGE
2508
2509 @seealso
2510 mtext_ins () */
2511
2512 int
mtext_insert(MText * mt1,int pos,MText * mt2,int from,int to)2513 mtext_insert (MText *mt1, int pos, MText *mt2, int from, int to)
2514 {
2515 M_CHECK_READONLY (mt1, -1);
2516 M_CHECK_POS_X (mt1, pos, -1);
2517 M_CHECK_RANGE (mt2, from, to, -1, 0);
2518
2519 insert (mt1, pos, mt2, from, to);
2520 return 0;
2521 }
2522
2523 /*=*/
2524
2525 /***en
2526 @brief Insert a character into an M-text.
2527
2528 The mtext_ins_char () function inserts $N copies of character $C
2529 into M-text $MT at position $POS. As a result, $MT is lengthen by
2530 $N.
2531
2532 @return
2533 If the operation was successful, mtext_ins () returns 0.
2534 Otherwise, it returns -1 and assigns an error code to the external
2535 variable #merror_code. */
2536
2537 /***ja
2538 @brief M-text ��ʸ������������.
2539
2540 �ؿ� mtext_ins_char () �� M-text $MT �� $POS �ΰ��֤�ʸ�� $C �Υ��ԡ��� $N
2541 ���������롣���η�� $MT1 ��Ĺ���� $N ���������롣
2542
2543 @return
2544 ��������������� mtext_ins_char () �� 0 ���֤��������Ǥʤ���� -1
2545 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣 */
2546
2547 /***
2548 @errors
2549 @c MERROR_RANGE
2550
2551 @seealso
2552 mtext_ins, mtext_del () */
2553
2554 int
mtext_ins_char(MText * mt,int pos,int c,int n)2555 mtext_ins_char (MText *mt, int pos, int c, int n)
2556 {
2557 int nunits;
2558 int unit_bytes = UNIT_BYTES (mt->format);
2559 int pos_unit;
2560 int i;
2561
2562 M_CHECK_READONLY (mt, -1);
2563 M_CHECK_POS_X (mt, pos, -1);
2564 if (c < 0 || c > MCHAR_MAX)
2565 MERROR (MERROR_MTEXT, -1);
2566 if (n <= 0)
2567 return 0;
2568 mtext__adjust_plist_for_insert (mt, pos, n, NULL);
2569
2570 if (c >= 0x80
2571 && (mt->format == MTEXT_FORMAT_US_ASCII
2572 || (c >= 0x10000 && (mt->format == MTEXT_FORMAT_UTF_16LE
2573 || mt->format == MTEXT_FORMAT_UTF_16BE))))
2574 {
2575 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_8);
2576 unit_bytes = 1;
2577 }
2578 else if (mt->format >= MTEXT_FORMAT_UTF_32LE)
2579 {
2580 if (mt->format != MTEXT_FORMAT_UTF_32)
2581 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_32);
2582 }
2583 else if (mt->format >= MTEXT_FORMAT_UTF_16LE)
2584 {
2585 if (mt->format != MTEXT_FORMAT_UTF_16)
2586 mtext__adjust_format (mt, MTEXT_FORMAT_UTF_16);
2587 }
2588
2589 nunits = CHAR_UNITS (c, mt->format);
2590 if ((mt->nbytes + nunits * n + 1) * unit_bytes > mt->allocated)
2591 {
2592 mt->allocated = (mt->nbytes + nunits * n + 1) * unit_bytes;
2593 MTABLE_REALLOC (mt->data, mt->allocated, MERROR_MTEXT);
2594 }
2595 pos_unit = POS_CHAR_TO_BYTE (mt, pos);
2596 if (mt->cache_char_pos > pos)
2597 {
2598 mt->cache_char_pos += n;
2599 mt->cache_byte_pos += nunits * n;
2600 }
2601 memmove (mt->data + (pos_unit + nunits * n) * unit_bytes,
2602 mt->data + pos_unit * unit_bytes,
2603 (mt->nbytes - pos_unit + 1) * unit_bytes);
2604 if (mt->format <= MTEXT_FORMAT_UTF_8)
2605 {
2606 unsigned char *p = mt->data + pos_unit;
2607
2608 for (i = 0; i < n; i++)
2609 p += CHAR_STRING_UTF8 (c, p);
2610 }
2611 else if (mt->format == MTEXT_FORMAT_UTF_16)
2612 {
2613 unsigned short *p = (unsigned short *) mt->data + pos_unit;
2614
2615 for (i = 0; i < n; i++)
2616 p += CHAR_STRING_UTF16 (c, p);
2617 }
2618 else
2619 {
2620 unsigned *p = (unsigned *) mt->data + pos_unit;
2621
2622 for (i = 0; i < n; i++)
2623 *p++ = c;
2624 }
2625 mt->nchars += n;
2626 mt->nbytes += nunits * n;
2627 return 0;
2628 }
2629
2630 /*=*/
2631
2632 /***en
2633 @brief Replace sub-text of M-text with another.
2634
2635 The mtext_replace () function replaces sub-text of M-text $MT1
2636 between $FROM1 (inclusive) and $TO1 (exclusive) with the sub-text
2637 of M-text $MT2 between $FROM2 (inclusive) and $TO2 (exclusive).
2638 The new sub-text inherits text properties of the old sub-text.
2639
2640 @return
2641 If the operation was successful, mtext_replace () returns
2642 0. Otherwise, it returns -1 and assigns an error code to the
2643 external variable #merror_code. */
2644
2645 /***ja
2646 @brief M-text �ΰ������̤� M-text �ΰ������ִ�����.
2647
2648 �ؿ� mtext_replace () �ϡ� M-text $MT1 �� $FROM1 ��$FROM1 ���Τ��
2649 ��ˤ��� $TO1 ��$TO1 ���Τϴޤޤʤ��ˤޤǤ� M-text $MT2 ��
2650 $FROM2 ��$FROM2 ���Τ�ޤ�ˤ��� $TO2 ��$TO2 ���Τϴޤޤʤ��ˤ���
2651 �������롣�������������줿��ʬ�ϡ��֤����������Υƥ����ȥץ�ѥƥ�
2652 ���٤Ƥ�Ѿ����롣
2653
2654 @return
2655 ��������������С� mtext_replace () �� 0 ���֤��������Ǥ�
2656 ����� -1 ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣 */
2657
2658 /***
2659 @errors
2660 @c MERROR_MTEXT , @c MERROR_RANGE
2661
2662 @seealso
2663 mtext_insert () */
2664
2665 int
mtext_replace(MText * mt1,int from1,int to1,MText * mt2,int from2,int to2)2666 mtext_replace (MText *mt1, int from1, int to1,
2667 MText *mt2, int from2, int to2)
2668 {
2669 int len1, len2;
2670 int from1_byte, from2_byte, old_bytes, new_bytes;
2671 int unit_bytes, total_bytes;
2672 unsigned char *p;
2673 int free_mt2 = 0;
2674
2675 M_CHECK_READONLY (mt1, -1);
2676 M_CHECK_RANGE_X (mt1, from1, to1, -1);
2677 M_CHECK_RANGE_X (mt2, from2, to2, -1);
2678
2679 if (from1 == to1)
2680 {
2681 struct MTextPlist *saved = mt2->plist;
2682
2683 mt2->plist = NULL;
2684 insert (mt1, from1, mt2, from2, to2);
2685 mt2->plist = saved;
2686 return 0;
2687 }
2688
2689 if (from2 == to2)
2690 {
2691 return mtext_del (mt1, from1, to1);
2692 }
2693
2694 if (mt1 == mt2)
2695 {
2696 mt2 = mtext_duplicate (mt2, from2, to2);
2697 to2 -= from2;
2698 from2 = 0;
2699 free_mt2 = 1;
2700 }
2701
2702 if (mt1->format != mt2->format
2703 && mt1->format == MTEXT_FORMAT_US_ASCII)
2704 mt1->format = MTEXT_FORMAT_UTF_8;
2705 if (mt1->format != mt2->format
2706 && mt1->coverage < mt2->coverage)
2707 mtext__adjust_format (mt1, mt2->format);
2708 if (mt1->format != mt2->format)
2709 {
2710 mt2 = mtext_duplicate (mt2, from2, to2);
2711 mtext__adjust_format (mt2, mt1->format);
2712 to2 -= from2;
2713 from2 = 0;
2714 free_mt2 = 1;
2715 }
2716
2717 len1 = to1 - from1;
2718 len2 = to2 - from2;
2719 mtext__adjust_plist_for_change (mt1, from1, len1, len2);
2720
2721 unit_bytes = UNIT_BYTES (mt1->format);
2722 from1_byte = POS_CHAR_TO_BYTE (mt1, from1) * unit_bytes;
2723 from2_byte = POS_CHAR_TO_BYTE (mt2, from2) * unit_bytes;
2724 old_bytes = POS_CHAR_TO_BYTE (mt1, to1) * unit_bytes - from1_byte;
2725 new_bytes = POS_CHAR_TO_BYTE (mt2, to2) * unit_bytes - from2_byte;
2726 total_bytes = mt1->nbytes * unit_bytes + (new_bytes - old_bytes);
2727 if (total_bytes + unit_bytes > mt1->allocated)
2728 {
2729 mt1->allocated = total_bytes + unit_bytes;
2730 MTABLE_REALLOC (mt1->data, mt1->allocated, MERROR_MTEXT);
2731 }
2732 p = mt1->data + from1_byte;
2733 if (to1 < mt1->nchars
2734 && old_bytes != new_bytes)
2735 memmove (p + new_bytes, p + old_bytes,
2736 (mt1->nbytes + 1) * unit_bytes - (from1_byte + old_bytes));
2737 memcpy (p, mt2->data + from2_byte, new_bytes);
2738 mt1->nchars += len2 - len1;
2739 mt1->nbytes += (new_bytes - old_bytes) / unit_bytes;
2740 if (mt1->cache_char_pos >= to1)
2741 {
2742 mt1->cache_char_pos += len2 - len1;
2743 mt1->cache_byte_pos += new_bytes - old_bytes;
2744 }
2745 else if (mt1->cache_char_pos > from1)
2746 {
2747 mt1->cache_char_pos = from1;
2748 mt1->cache_byte_pos = from1_byte;
2749 }
2750
2751 if (free_mt2)
2752 M17N_OBJECT_UNREF (mt2);
2753 return 0;
2754 }
2755
2756 /*=*/
2757
2758 /***en
2759 @brief Search a character in an M-text.
2760
2761 The mtext_character () function searches M-text $MT for character
2762 $C. If $FROM is less than $TO, the search begins at position $FROM
2763 and goes forward but does not exceed ($TO - 1). Otherwise, the search
2764 begins at position ($FROM - 1) and goes backward but does not
2765 exceed $TO. An invalid position specification is regarded as both
2766 $FROM and $TO being 0.
2767
2768 @return
2769 If $C is found, mtext_character () returns the position of its
2770 first occurrence. Otherwise it returns -1 without changing the
2771 external variable #merror_code. If an error is detected, it returns -1 and
2772 assigns an error code to the external variable #merror_code. */
2773
2774 /***ja
2775 @brief M-text ���ʸ����õ��.
2776
2777 �ؿ� mtext_character () �� M-text $MT ���ʸ�� $C ��õ�����⤷
2778 $FROM �� $TO ��꾮������С�õ���ϰ��� $FROM �������������ء�����
2779 ($TO - 1) �ޤǿʤࡣ�����Ǥʤ���а��� ($FROM - 1) ������Ƭ�����ء�����
2780 $TO �ޤǿʤࡣ���֤λ���˸�꤬������ϡ�$FROM �� $TO
2781 ��ξ���� 0 �����ꤵ�줿��ΤȤߤʤ���
2782
2783 @return
2784 �⤷ $C �����Ĥ���С�mtext_character ()
2785 �Ϥ��κǽ�νи����֤��֤������Ĥ���ʤ��ä����ϳ����ѿ� #merror_code
2786 ���ѹ������� -1 ���֤������顼�����Ф��줿���� -1 ���֤��������ѿ�
2787 #merror_code �˥��顼�����ɤ����ꤹ�롣 */
2788
2789 /***
2790 @seealso
2791 mtext_chr(), mtext_rchr () */
2792
2793 int
mtext_character(MText * mt,int from,int to,int c)2794 mtext_character (MText *mt, int from, int to, int c)
2795 {
2796 if (from < to)
2797 {
2798 /* We do not use M_CHECK_RANGE () because this function should
2799 not set merror_code. */
2800 if (from < 0 || to > mt->nchars)
2801 return -1;
2802 return find_char_forward (mt, from, to, c);
2803 }
2804 else
2805 {
2806 /* ditto */
2807 if (to < 0 || from > mt->nchars)
2808 return -1;
2809 return find_char_backward (mt, to, from, c);
2810 }
2811 }
2812
2813
2814 /*=*/
2815
2816 /***en
2817 @brief Return the position of the first occurrence of a character in an M-text.
2818
2819 The mtext_chr () function searches M-text $MT for character $C.
2820 The search starts from the beginning of $MT and goes toward the end.
2821
2822 @return
2823 If $C is found, mtext_chr () returns its position; otherwise it
2824 returns -1. */
2825
2826 /***ja
2827 @brief M-text ��ǻ��ꤵ�줿ʸ�����ǽ�˸������֤��֤�.
2828
2829 �ؿ� mtext_chr () �� M-text $MT ���ʸ�� $C ��õ����õ���� $MT
2830 ����Ƭ�������������˿ʤࡣ
2831
2832 @return
2833 �⤷ $C �����Ĥ���С�mtext_chr ()
2834 �Ϥ��νи����֤��֤������Ĥ���ʤ��ä����� -1 ���֤���
2835
2836 @latexonly \IPAlabel{mtext_chr} @endlatexonly */
2837
2838 /***
2839 @errors
2840 @c MERROR_RANGE
2841
2842 @seealso
2843 mtext_rchr (), mtext_character () */
2844
2845 int
mtext_chr(MText * mt,int c)2846 mtext_chr (MText *mt, int c)
2847 {
2848 return find_char_forward (mt, 0, mt->nchars, c);
2849 }
2850
2851 /*=*/
2852
2853 /***en
2854 @brief Return the position of the last occurrence of a character in an M-text.
2855
2856 The mtext_rchr () function searches M-text $MT for character $C.
2857 The search starts from the end of $MT and goes backwardly toward the
2858 beginning.
2859
2860 @return
2861 If $C is found, mtext_rchr () returns its position; otherwise it
2862 returns -1. */
2863
2864 /***ja
2865 @brief M-text ��ǻ��ꤵ�줿ʸ�����Ǹ�˸������֤��֤�.
2866
2867 �ؿ� mtext_rchr () �� M-text $MT ���ʸ�� $C ��õ����õ���� $MT
2868 �κǸ夫����Ƭ�����ؤȸ�����˿ʤࡣ
2869
2870 @return
2871 �⤷ $C �����Ĥ���С�mtext_rchr ()
2872 �Ϥ��νи����֤��֤������Ĥ���ʤ��ä����� -1 ���֤���
2873
2874 @latexonly \IPAlabel{mtext_rchr} @endlatexonly */
2875
2876 /***
2877 @errors
2878 @c MERROR_RANGE
2879
2880 @seealso
2881 mtext_chr (), mtext_character () */
2882
2883 int
mtext_rchr(MText * mt,int c)2884 mtext_rchr (MText *mt, int c)
2885 {
2886 return find_char_backward (mt, mt->nchars, 0, c);
2887 }
2888
2889
2890 /*=*/
2891
2892 /***en
2893 @brief Compare two M-texts character-by-character.
2894
2895 The mtext_cmp () function compares M-texts $MT1 and $MT2 character
2896 by character.
2897
2898 @return
2899 This function returns 1, 0, or -1 if $MT1 is found greater than,
2900 equal to, or less than $MT2, respectively. Comparison is based on
2901 character codes. */
2902
2903 /***ja
2904 @brief ��Ĥ� M-text ��ʸ��ñ�̤���Ӥ���.
2905
2906 �ؿ� mtext_cmp () �ϡ� M-text $MT1 �� $MT2 ��ʸ��ñ�̤���Ӥ��롣
2907
2908 @return
2909 ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2 ����礭�����
2910 1��$MT1 �� $MT2 ��꾮������� -1 ���֤�����Ӥ�ʸ�������ɤ˴�Ť���
2911
2912 @latexonly \IPAlabel{mtext_cmp} @endlatexonly */
2913
2914 /***
2915 @seealso
2916 mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2917 mtext_compare (), mtext_case_compare () */
2918
2919 int
mtext_cmp(MText * mt1,MText * mt2)2920 mtext_cmp (MText *mt1, MText *mt2)
2921 {
2922 return compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
2923 }
2924
2925
2926 /*=*/
2927
2928 /***en
2929 @brief Compare initial parts of two M-texts character-by-character.
2930
2931 The mtext_ncmp () function is similar to mtext_cmp (), but
2932 compares at most $N characters from the beginning.
2933
2934 @return
2935 This function returns 1, 0, or -1 if $MT1 is found greater than,
2936 equal to, or less than $MT2, respectively. */
2937
2938 /***ja
2939 @brief ��Ĥ� M-text ����Ƭ��ʬ��ʸ��ñ�̤���Ӥ���.
2940
2941 �ؿ� mtext_ncmp () �ϡ��ؿ� mtext_cmp () Ʊ�ͤ� M-text
2942 Ʊ�Τ���Ӥ���Ƭ������� $N ʸ���ޤǤ˴ؤ��ƹԤʤ���
2943
2944 @return
2945 ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2 ����礭�����
2946 1��$MT1 �� $MT2 ��꾮������� -1 ���֤���
2947
2948 @latexonly \IPAlabel{mtext_ncmp} @endlatexonly */
2949
2950 /***
2951 @seealso
2952 mtext_cmp (), mtext_casecmp (), mtext_ncasecmp ()
2953 mtext_compare (), mtext_case_compare () */
2954
2955 int
mtext_ncmp(MText * mt1,MText * mt2,int n)2956 mtext_ncmp (MText *mt1, MText *mt2, int n)
2957 {
2958 if (n < 0)
2959 return 0;
2960 return compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
2961 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
2962 }
2963
2964 /*=*/
2965
2966 /***en
2967 @brief Compare specified regions of two M-texts.
2968
2969 The mtext_compare () function compares two M-texts $MT1 and $MT2,
2970 character-by-character. The compared regions are between $FROM1
2971 and $TO1 in $MT1 and $FROM2 to $TO2 in MT2. $FROM1 and $FROM2 are
2972 inclusive, $TO1 and $TO2 are exclusive. $FROM1 being equal to
2973 $TO1 (or $FROM2 being equal to $TO2) means an M-text of length
2974 zero. An invalid region specification is regarded as both $FROM1
2975 and $TO1 (or $FROM2 and $TO2) being 0.
2976
2977 @return
2978 This function returns 1, 0, or -1 if $MT1 is found greater than,
2979 equal to, or less than $MT2, respectively. Comparison is based on
2980 character codes. */
2981
2982 /***ja
2983 @brief ��Ĥ� M-text �λ��ꤷ���ΰ�Ʊ�Τ���Ӥ���.
2984
2985 �ؿ� mtext_compare () ����Ĥ� M-text $MT1 �� $MT2
2986 ��ʸ��ñ�̤���Ӥ��롣��Ӥ��оݤ� $MT1 �Τ��� $FROM1 ���� $TO1 �ޤǤȡ�$MT2
2987 �Τ��� $FROM2 ���� $TO2 �ޤǤǤ��롣$FROM1 �� $FROM2 �ϴޤޤ졢$TO1
2988 �� $TO2 �ϴޤޤ�ʤ���$FROM1 �� $TO1 �ʤ��뤤�� $FROM2 �� $TO2
2989 �ˤ�����������Ĺ������� M-text ���̣���롣�ϰϻ���˸�꤬������ϡ�
2990 $FROM1 �� $TO1 �ʤ��뤤�� $FROM2 �� $TO2 �� ξ���� 0 �����ꤵ�줿��ΤȤߤʤ���
2991
2992 @return
2993 ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2 ����礭�����
2994 1 ��$MT1 �� $MT2 ��꾮������� -1 ���֤�����Ӥ�ʸ�������ɤ˴�Ť��� */
2995
2996 /***
2997 @seealso
2998 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
2999 mtext_case_compare () */
3000
3001 int
mtext_compare(MText * mt1,int from1,int to1,MText * mt2,int from2,int to2)3002 mtext_compare (MText *mt1, int from1, int to1, MText *mt2, int from2, int to2)
3003 {
3004 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
3005 from1 = to1 = 0;
3006
3007 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
3008 from2 = to2 = 0;
3009
3010 return compare (mt1, from1, to1, mt2, from2, to2);
3011 }
3012
3013 /*=*/
3014
3015 /***en
3016 @brief Search an M-text for a set of characters.
3017
3018 The mtext_spn () function returns the length of the initial
3019 segment of M-text $MT1 that consists entirely of characters in
3020 M-text $MT2. */
3021
3022 /***ja
3023 @brief ���뽸���ʸ���� M-text �����õ��.
3024
3025 �ؿ� mtext_spn () �ϡ�M-text $MT1 ����Ƭ���� M-text $MT2
3026 �˴ޤޤ��ʸ�������ǤǤ��Ƥ�����ʬ��Ĺ�����֤���
3027
3028 @latexonly \IPAlabel{mtext_spn} @endlatexonly */
3029
3030 /***
3031 @seealso
3032 mtext_cspn () */
3033
3034 int
mtext_spn(MText * mt,MText * accept)3035 mtext_spn (MText *mt, MText *accept)
3036 {
3037 return span (mt, accept, 0, Mnil);
3038 }
3039
3040 /*=*/
3041
3042 /***en
3043 @brief Search an M-text for the complement of a set of characters.
3044
3045 The mtext_cspn () returns the length of the initial segment of
3046 M-text $MT1 that consists entirely of characters not in M-text $MT2. */
3047
3048 /***ja
3049 @brief ���뽸���°���ʤ�ʸ���� M-text �����õ��.
3050
3051 �ؿ� mtext_cspn () �ϡ�M-text $MT1 ����Ƭ��ʬ�� M-text $MT2
3052 �˴ޤޤ�ʤ�ʸ�������ǤǤ��Ƥ�����ʬ��Ĺ�����֤���
3053
3054 @latexonly \IPAlabel{mtext_cspn} @endlatexonly */
3055
3056 /***
3057 @seealso
3058 mtext_spn () */
3059
3060 int
mtext_cspn(MText * mt,MText * reject)3061 mtext_cspn (MText *mt, MText *reject)
3062 {
3063 return span (mt, reject, 0, Mt);
3064 }
3065
3066 /*=*/
3067
3068 /***en
3069 @brief Search an M-text for any of a set of characters.
3070
3071 The mtext_pbrk () function locates the first occurrence in M-text
3072 $MT1 of any of the characters in M-text $MT2.
3073
3074 @return
3075 This function returns the position in $MT1 of the found character.
3076 If no such character is found, it returns -1. */
3077
3078 /***ja
3079 @brief ���뽸���°��ʸ���� M-text ���椫��õ��.
3080
3081 �ؿ� mtext_pbrk () �ϡ�M-text $MT1 ��� M-text $MT2
3082 ��ʸ���Τɤ줫���ǽ�˸������֤�Ĵ�٤롣
3083
3084 @return
3085 ���Ĥ��ä�ʸ���Ρ�$MT1
3086 ��ˤ�����и����֤��֤����⤷���Τ褦��ʸ�����ʤ���� -1 ���֤���
3087
3088 @latexonly \IPAlabel{mtext_pbrk} @endlatexonly */
3089
3090 int
mtext_pbrk(MText * mt,MText * accept)3091 mtext_pbrk (MText *mt, MText *accept)
3092 {
3093 int nchars = mtext_nchars (mt);
3094 int len = span (mt, accept, 0, Mt);
3095
3096 return (len == nchars ? -1 : len);
3097 }
3098
3099 /*=*/
3100
3101 /***en
3102 @brief Look for a token in an M-text.
3103
3104 The mtext_tok () function searches a token that firstly occurs
3105 after position $POS in M-text $MT. Here, a token means a
3106 substring each of which does not appear in M-text $DELIM. Note
3107 that the type of $POS is not @c int but pointer to @c int.
3108
3109 @return
3110 If a token is found, mtext_tok () copies the corresponding part of
3111 $MT and returns a pointer to the copy. In this case, $POS is set
3112 to the end of the found token. If no token is found, it returns
3113 @c NULL without changing the external variable #merror_code. If an
3114 error is detected, it returns @c NULL and assigns an error code
3115 to the external variable #merror_code. */
3116
3117 /***ja
3118 @brief M-text ��Υȡ������õ��.
3119
3120 �ؿ� mtext_tok () �ϡ�M-text $MT ����ǰ��� $POS
3121 �ʹߺǽ�˸����ȡ������õ���������ǥȡ�����Ȥ� M-text $DELIM
3122 ����˸����ʤ�ʸ����������ʤ���ʬʸ����Ǥ��롣$POS �η��� @c int �ǤϤʤ��� @c
3123 int �ؤΥݥ��Ǥ��뤳�Ȥ���ա�
3124
3125 @return
3126 �⤷�ȡ������Ĥ���� mtext_tok ()�Ϥ��Υȡ����������������ʬ��
3127 $MT �ԡ��������Υ��ԡ��ؤΥݥ����֤������ξ�硢$POS
3128 �ϸ��Ĥ��ä��ȡ�����ν�ü�˥��åȤ���롣�ȡ������Ĥ���ʤ��ä����ϳ����ѿ�
3129 #merror_code ���Ѥ����� @c NULL ���֤������顼�����Ф��줿����
3130 @c NULL ���֤��������ѿ� #merror_code �˥��顼�����ɤ����ꤹ�롣
3131
3132 @latexonly \IPAlabel{mtext_tok} @endlatexonly */
3133
3134 /***
3135 @errors
3136 @c MERROR_RANGE */
3137
3138 MText *
mtext_tok(MText * mt,MText * delim,int * pos)3139 mtext_tok (MText *mt, MText *delim, int *pos)
3140 {
3141 int nchars = mtext_nchars (mt);
3142 int pos2;
3143
3144 M_CHECK_POS (mt, *pos, NULL);
3145
3146 /*
3147 Skip delimiters starting at POS in MT.
3148 Never do *pos += span(...), or you will change *pos
3149 even though no token is found.
3150 */
3151 pos2 = *pos + span (mt, delim, *pos, Mnil);
3152
3153 if (pos2 == nchars)
3154 return NULL;
3155
3156 *pos = pos2 + span (mt, delim, pos2, Mt);
3157 return (insert (mtext (), 0, mt, pos2, *pos));
3158 }
3159
3160 /*=*/
3161
3162 /***en
3163 @brief Locate an M-text in another.
3164
3165 The mtext_text () function finds the first occurrence of M-text
3166 $MT2 in M-text $MT1 after the position $POS while ignoring
3167 difference of the text properties.
3168
3169 @return
3170 If $MT2 is found in $MT1, mtext_text () returns the position of it
3171 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
3172 returns 0. */
3173
3174 /***ja
3175 @brief M-text ����̤� M-text ��õ��.
3176
3177 �ؿ� mtext_text () �ϡ�M-text $MT1 ��ǰ��� $POS �ʹߤ˸�����
3178 M-text $MT2 �κǽ�ΰ��֤�Ĵ�٤롣�ƥ����ȥץ�ѥƥ��ΰ㤤��̵�뤵��롣
3179
3180 @return
3181 $MT1 ��� $MT2 �����Ĥ���С�mtext_text()
3182 �Ϥ��κǽ�νи����֤��֤������Ĥ���ʤ����� -1 ���֤����⤷ $MT2 �����ʤ�� 0 ���֤���
3183
3184 @latexonly \IPAlabel{mtext_text} @endlatexonly */
3185
3186 int
mtext_text(MText * mt1,int pos,MText * mt2)3187 mtext_text (MText *mt1, int pos, MText *mt2)
3188 {
3189 int from = pos;
3190 int c = mtext_ref_char (mt2, 0);
3191 int nbytes2 = mtext_nbytes (mt2);
3192 int limit;
3193 int use_memcmp = (mt1->format == mt2->format
3194 || (mt1->format < MTEXT_FORMAT_UTF_8
3195 && mt2->format == MTEXT_FORMAT_UTF_8));
3196 int unit_bytes = UNIT_BYTES (mt1->format);
3197
3198 if (from + mtext_nchars (mt2) > mtext_nchars (mt1))
3199 return -1;
3200 limit = mtext_nchars (mt1) - mtext_nchars (mt2) + 1;
3201
3202 while (1)
3203 {
3204 int pos_byte;
3205
3206 if ((pos = mtext_character (mt1, from, limit, c)) < 0)
3207 return -1;
3208 pos_byte = POS_CHAR_TO_BYTE (mt1, pos);
3209 if (use_memcmp
3210 ? ! memcmp (mt1->data + pos_byte * unit_bytes,
3211 mt2->data, nbytes2 * unit_bytes)
3212 : ! compare (mt1, pos, mt2->nchars, mt2, 0, mt2->nchars))
3213 break;
3214 from = pos + 1;
3215 }
3216 return pos;
3217 }
3218
3219 /***en
3220 @brief Locate an M-text in a specific range of another.
3221
3222 The mtext_search () function searches for the first occurrence of
3223 M-text $MT2 in M-text $MT1 in the region $FROM and $TO while
3224 ignoring difference of the text properties. If $FROM is less than
3225 $TO, the forward search starts from $FROM, otherwise the backward
3226 search starts from $TO.
3227
3228 @return
3229 If $MT2 is found in $MT1, mtext_search () returns the position of the
3230 first occurrence. Otherwise it returns -1. If $MT2 is empty, it
3231 returns 0. */
3232
3233 /***ja
3234 @brief M-text ���������ΰ���̤� M-text ��õ��.
3235
3236 �ؿ� mtext_search () �ϡ�M-text $MT1 ��� $FROM ���� $TO
3237 �ޤǤδ֤��ΰ��M-text $MT2
3238 ���ǽ�˸�������֤�Ĵ�٤롣�ƥ����ȥץ�ѥƥ��ΰ㤤��̵�뤵��롣�⤷
3239 $FROM �� $TO ��꾮�������õ���ϰ��� $FROM �������������ء������Ǥʤ����
3240 $TO ������Ƭ�����ؿʤࡣ
3241
3242 @return
3243 $MT1 ��� $MT2 �����Ĥ���С�mtext_search()
3244 �Ϥ��κǽ�νи����֤��֤������Ĥ���ʤ����� -1 ���֤����⤷ $MT2 �����ʤ�� 0 ���֤���
3245 */
3246
3247 int
mtext_search(MText * mt1,int from,int to,MText * mt2)3248 mtext_search (MText *mt1, int from, int to, MText *mt2)
3249 {
3250 int c = mtext_ref_char (mt2, 0);
3251 int from_byte;
3252 int nbytes2 = mtext_nbytes (mt2);
3253
3254 if (mt1->format > MTEXT_FORMAT_UTF_8
3255 || mt2->format > MTEXT_FORMAT_UTF_8)
3256 MERROR (MERROR_MTEXT, -1);
3257
3258 if (from < to)
3259 {
3260 to -= mtext_nchars (mt2);
3261 if (from > to)
3262 return -1;
3263 while (1)
3264 {
3265 if ((from = find_char_forward (mt1, from, to, c)) < 0)
3266 return -1;
3267 from_byte = POS_CHAR_TO_BYTE (mt1, from);
3268 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3269 break;
3270 from++;
3271 }
3272 }
3273 else if (from > to)
3274 {
3275 from -= mtext_nchars (mt2);
3276 if (from < to)
3277 return -1;
3278 while (1)
3279 {
3280 if ((from = find_char_backward (mt1, to, from + 1, c)) < 0)
3281 return -1;
3282 from_byte = POS_CHAR_TO_BYTE (mt1, from);
3283 if (! memcmp (mt1->data + from_byte, mt2->data, nbytes2))
3284 break;
3285 from--;
3286 }
3287 }
3288
3289 return from;
3290 }
3291
3292 /*=*/
3293
3294 /***en
3295 @brief Compare two M-texts ignoring cases.
3296
3297 The mtext_casecmp () function is similar to mtext_cmp (), but
3298 ignores cases on comparison.
3299
3300 @return
3301 This function returns 1, 0, or -1 if $MT1 is found greater than,
3302 equal to, or less than $MT2, respectively. */
3303
3304 /***ja
3305 @brief ��Ĥ� M-text ����ʸ������ʸ���ζ��̤�̵�뤷����Ӥ���.
3306
3307 �ؿ� mtext_casecmp () �ϡ��ؿ� mtext_cmp () Ʊ�ͤ� M-text
3308 Ʊ�Τ���Ӥ���ʸ������ʸ���ζ��̤�̵�뤷�ƹԤʤ���
3309
3310 @return
3311 ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2
3312 ����礭����� 1��$MT1 �� $MT2 ��꾮������� -1 ���֤���
3313
3314 @latexonly \IPAlabel{mtext_casecmp} @endlatexonly */
3315
3316 /***
3317 @seealso
3318 mtext_cmp (), mtext_ncmp (), mtext_ncasecmp ()
3319 mtext_compare (), mtext_case_compare () */
3320
3321 int
mtext_casecmp(MText * mt1,MText * mt2)3322 mtext_casecmp (MText *mt1, MText *mt2)
3323 {
3324 return case_compare (mt1, 0, mt1->nchars, mt2, 0, mt2->nchars);
3325 }
3326
3327 /*=*/
3328
3329 /***en
3330 @brief Compare initial parts of two M-texts ignoring cases.
3331
3332 The mtext_ncasecmp () function is similar to mtext_casecmp (), but
3333 compares at most $N characters from the beginning.
3334
3335 @return
3336 This function returns 1, 0, or -1 if $MT1 is found greater than,
3337 equal to, or less than $MT2, respectively. */
3338
3339 /***ja
3340 @brief ��Ĥ� M-text ����Ƭ��ʬ����ʸ������ʸ���ζ��̤�̵�뤷����Ӥ���.
3341
3342 �ؿ� mtext_ncasecmp () �ϡ��ؿ� mtext_casecmp () Ʊ�ͤ� M-text
3343 Ʊ�Τ���Ӥ���Ƭ������� $N ʸ���ޤǤ˴ؤ��ƹԤʤ���
3344
3345 @return
3346 ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2
3347 ����礭����� 1��$MT1 �� $MT2 ��꾮������� -1 ���֤���
3348
3349 @latexonly \IPAlabel{mtext_ncasecmp} @endlatexonly */
3350
3351 /***
3352 @seealso
3353 mtext_cmp (), mtext_casecmp (), mtext_casecmp ()
3354 mtext_compare (), mtext_case_compare () */
3355
3356 int
mtext_ncasecmp(MText * mt1,MText * mt2,int n)3357 mtext_ncasecmp (MText *mt1, MText *mt2, int n)
3358 {
3359 if (n < 0)
3360 return 0;
3361 return case_compare (mt1, 0, (mt1->nchars < n ? mt1->nchars : n),
3362 mt2, 0, (mt2->nchars < n ? mt2->nchars : n));
3363 }
3364
3365 /*=*/
3366
3367 /***en
3368 @brief Compare specified regions of two M-texts ignoring cases.
3369
3370 The mtext_case_compare () function compares two M-texts $MT1 and
3371 $MT2, character-by-character, ignoring cases. The compared
3372 regions are between $FROM1 and $TO1 in $MT1 and $FROM2 to $TO2 in
3373 MT2. $FROM1 and $FROM2 are inclusive, $TO1 and $TO2 are
3374 exclusive. $FROM1 being equal to $TO1 (or $FROM2 being equal to
3375 $TO2) means an M-text of length zero. An invalid region
3376 specification is regarded as both $FROM1 and $TO1 (or $FROM2 and
3377 $TO2) being 0.
3378
3379 @return
3380 This function returns 1, 0, or -1 if $MT1 is found greater than,
3381 equal to, or less than $MT2, respectively. Comparison is based on
3382 character codes. */
3383
3384 /***ja
3385 @brief ��Ĥ� M-text �λ��ꤷ���ΰ����ʸ������ʸ���ζ��̤�̵�뤷����Ӥ���.
3386
3387 �ؿ� mtext_compare () ����Ĥ� M-text $MT1 �� $MT2
3388 ����ʸ������ʸ���ζ��̤�̵�뤷��ʸ��ñ�̤���Ӥ��롣��Ӥ��оݤ� $MT1
3389 �� $FROM1 ���� $TO1 �ޤǡ�$MT2 �� $FROM2 ���� $TO2 �ޤǤǤ��롣
3390 $FROM1 �� $FROM2 �ϴޤޤ졢$TO1 �� $TO2 �ϴޤޤ�ʤ���$FROM1 �� $TO1
3391 �ʤ��뤤�� $FROM2 �� $TO2 �ˤ�����������Ĺ������� M-text
3392 ���̣���롣�ϰϻ���˸�꤬������ϡ�$FROM1 �� $TO1 �ʤ��뤤��
3393 $FROM2 �� $TO2 ��ξ���� 0 �����ꤵ�줿��Τȸ��ʤ���
3394
3395 @return
3396 ���δؿ��ϡ�$MT1 �� $MT2 ����������� 0��$MT1 �� $MT2 ����礭�����
3397 1��$MT1 �� $MT2 ��꾮������� -1���֤�����Ӥ�ʸ�������ɤ˴�Ť���
3398
3399 @latexonly \IPAlabel{mtext_case_compare} @endlatexonly
3400 */
3401
3402 /***
3403 @seealso
3404 mtext_cmp (), mtext_ncmp (), mtext_casecmp (), mtext_ncasecmp (),
3405 mtext_compare () */
3406
3407 int
mtext_case_compare(MText * mt1,int from1,int to1,MText * mt2,int from2,int to2)3408 mtext_case_compare (MText *mt1, int from1, int to1,
3409 MText *mt2, int from2, int to2)
3410 {
3411 if (from1 < 0 || from1 > to1 || to1 > mt1->nchars)
3412 from1 = to1 = 0;
3413
3414 if (from2 < 0 || from2 > to2 || to2 > mt2->nchars)
3415 from2 = to2 = 0;
3416
3417 return case_compare (mt1, from1, to1, mt2, from2, to2);
3418 }
3419
3420 /*=*/
3421
3422 /***en
3423 @brief Lowercase an M-text.
3424
3425 The mtext_lowercase () function destructively converts each
3426 character in M-text $MT to lowercase. Adjacent characters in $MT
3427 may affect the case conversion. If the Mlanguage text property is
3428 attached to $MT, it may also affect the conversion. The length of
3429 $MT may change. Characters that cannot be converted to lowercase
3430 is left unchanged. All the text properties are inherited.
3431
3432 @return
3433 This function returns the length of the updated $MT.
3434 */
3435
3436 /***ja
3437 @brief M-text ��ʸ���ˤ���.
3438
3439 �ؿ� mtext_lowercase () �� M-text $MT ��γ�ʸ�����˲�Ū�˾�ʸ������
3440 �����롣�Ѵ��˺ݤ������ܤ���ʸ���αƶ�������뤳�Ȥ����롣$MT �˥�
3441 �����ȥץ�ѥƥ� Mlanguage ���դ��Ƥ�����ϡ�������Ѵ��˱ƶ���
3442 Ϳ�����롣$MT ��Ĺ�����Ѥ�뤳�Ȥ����롣��ʸ�����Ѵ��Ǥ��ʤ��ä�ʸ
3443 ���Ϥ��ΤޤĤ롣�ƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣
3444
3445 @return
3446 ���δؿ��Ϲ������ $MT ��Ĺ�����֤���
3447 */
3448
3449 /***
3450 @seealso
3451 mtext_titlecase (), mtext_uppercase ()
3452 */
3453
3454 int
mtext_lowercase(MText * mt)3455 mtext_lowercase (MText *mt)
3456
3457 {
3458 CASE_CONV_INIT (-1);
3459
3460 return mtext__lowercase (mt, 0, mtext_len (mt));
3461 }
3462
3463 /*=*/
3464
3465 /***en
3466 @brief Titlecase an M-text.
3467
3468 The mtext_titlecase () function destructively converts the first
3469 character with the cased property in M-text $MT to titlecase and
3470 the others to lowercase. The length of $MT may change. If the
3471 character cannot be converted to titlecase, it is left unchanged.
3472 All the text properties are inherited.
3473
3474 @return
3475 This function returns the length of the updated $MT.
3476 */
3477
3478 /***ja
3479 @brief M-text ���ȥ륱�����ˤ���.
3480
3481 �ؿ� mtext_titlecase () �� M-text $MT ��� cased �ץ�ѥƥ������
3482 �ǽ��ʸ�����ȥ륱�����ˡ������Ƥ���ʹߤ�ʸ����ʸ�����˲�Ū
3483 ���Ѵ����롣$MT ��Ĺ�����Ѥ�뤳�Ȥ����롣�����ȥ륱�����ˤ��Ѵ���
3484 ���ʤ��ä����Ϥ��Τޤޤ��Ѥ��ʤ����ƥ����ȥץ�ѥƥ��Ϥ��٤Ʒ�
3485 ������롣
3486
3487 @return
3488 ���δؿ��Ϲ������ $MT ��Ĺ�����֤���
3489 */
3490
3491 /***
3492 @seealso
3493 mtext_lowercase (), mtext_uppercase ()
3494 */
3495
3496 int
mtext_titlecase(MText * mt)3497 mtext_titlecase (MText *mt)
3498 {
3499 int len = mtext_len (mt), from, to;
3500
3501 CASE_CONV_INIT (-1);
3502
3503 /* Find 1st cased character. */
3504 for (from = 0; from < len; from++)
3505 {
3506 int csd = (int) mchartable_lookup (cased, mtext_ref_char (mt, from));
3507
3508 if (csd > 0 && csd & CASED)
3509 break;
3510 }
3511
3512 if (from == len)
3513 return len;
3514
3515 if (from == len - 1)
3516 return (mtext__titlecase (mt, from, len));
3517
3518 /* Go through following combining characters. */
3519 for (to = from + 1;
3520 (to < len
3521 && ((int) mchartable_lookup (combining_class, mtext_ref_char (mt, to))
3522 > 0));
3523 to++);
3524
3525 /* Titlecase the region and prepare for next lowercase operation.
3526 MT may be shortened or lengthened. */
3527 from = mtext__titlecase (mt, from, to);
3528
3529 return (mtext__lowercase (mt, from, mtext_len (mt)));
3530 }
3531
3532 /*=*/
3533
3534 /***en
3535 @brief Uppercase an M-text.
3536
3537
3538 The mtext_uppercase () function destructively converts each
3539 character in M-text $MT to uppercase. Adjacent characters in $MT
3540 may affect the case conversion. If the Mlanguage text property is
3541 attached to $MT, it may also affect the conversion. The length of
3542 $MT may change. Characters that cannot be converted to uppercase
3543 is left unchanged. All the text properties are inherited.
3544
3545 @return
3546 This function returns the length of the updated $MT.
3547 */
3548
3549 /***ja
3550 @brief M-text ����ʸ���ˤ���.
3551
3552 �ؿ� mtext_uppercase () �� M-text $MT ��γ�ʸ�����˲�Ū����ʸ������
3553 �����롣�Ѵ��˺ݤ������ܤ���ʸ���αƶ�������뤳�Ȥ����롣$MT �˥�
3554 �����ȥץ�ѥƥ� Mlanguage ���դ��Ƥ�����ϡ�������Ѵ��˱ƶ���
3555 Ϳ�����롣$MT ��Ĺ�����Ѥ�뤳�Ȥ����롣��ʸ�����Ѵ��Ǥ��ʤ��ä�ʸ
3556 ���Ϥ��ΤޤĤ롣�ƥ����ȥץ�ѥƥ��Ϥ��٤ƷѾ�����롣
3557
3558 @return
3559 ���δؿ��Ϲ������ $MT ��Ĺ�����֤���
3560 */
3561
3562 /***
3563 @seealso
3564 mtext_lowercase (), mtext_titlecase ()
3565 */
3566
3567 int
mtext_uppercase(MText * mt)3568 mtext_uppercase (MText *mt)
3569 {
3570 CASE_CONV_INIT (-1);
3571
3572 return (mtext__uppercase (mt, 0, mtext_len (mt)));
3573 }
3574
3575 /*** @} */
3576
3577 #include <stdio.h>
3578
3579 /*** @addtogroup m17nDebug */
3580 /*=*/
3581 /*** @{ */
3582
3583 /***en
3584 @brief Dump an M-text.
3585
3586 The mdebug_dump_mtext () function prints the M-text $MT in a human
3587 readable way to the stderr or to what specified by the environment
3588 variable MDEBUG_OUTPUT_FILE. $INDENT specifies how many columns
3589 to indent the lines but the first one. If $FULLP is zero, this
3590 function prints only a character code sequence. Otherwise, it
3591 prints the internal byte sequence and text properties as well.
3592
3593 @return
3594 This function returns $MT. */
3595 /***ja
3596 @brief M-text �����פ���.
3597
3598 �ؿ� mdebug_dump_mtext () �� M-text $MT ��ɸ�२�顼���Ϥ⤷���ϴ�
3599 ���ѿ� MDEBUG_DUMP_FONT �ǻ��ꤵ�줿�ե�����˿ʹ֤˲��ɤʷ��ǰ���
3600 ���롣 $INDENT �ϣ����ܰʹߤΥ���ǥ�Ȥ���ꤹ�롣$FULLP �� 0 �ʤ�
3601 �С�ʸ���������������������롣�����Ǥʤ���С������Х�����ȥƥ�
3602 ���ȥץ�ѥƥ���������롣
3603
3604 @return
3605 ���δؿ��� $MT ���֤��� */
3606
3607 MText *
mdebug_dump_mtext(MText * mt,int indent,int fullp)3608 mdebug_dump_mtext (MText *mt, int indent, int fullp)
3609 {
3610 int i;
3611
3612 if (! fullp)
3613 {
3614 fprintf (mdebug__output, "\"");
3615 for (i = 0; i < mt->nchars; i++)
3616 {
3617 int c = mtext_ref_char (mt, i);
3618
3619 if (c == '"' || c == '\\')
3620 fprintf (mdebug__output, "\\%c", c);
3621 else if ((c >= ' ' && c < 127) || c == '\n')
3622 fprintf (mdebug__output, "%c", c);
3623 else
3624 fprintf (mdebug__output, "\\x%02X", c);
3625 }
3626 fprintf (mdebug__output, "\"");
3627 return mt;
3628 }
3629
3630 fprintf (mdebug__output,
3631 "(mtext (size %d %d %d) (cache %d %d)",
3632 mt->nchars, mt->nbytes, mt->allocated,
3633 mt->cache_char_pos, mt->cache_byte_pos);
3634
3635 if (mt->nchars > 0)
3636 {
3637 char *prefix = (char *) alloca (indent + 1);
3638 unsigned char *p;
3639
3640 memset (prefix, 32, indent);
3641 prefix[indent] = 0;
3642
3643 fprintf (mdebug__output, "\n%s (bytes \"", prefix);
3644 for (i = 0; i < mt->nbytes; i++)
3645 fprintf (mdebug__output, "\\x%02x", mt->data[i]);
3646 fprintf (mdebug__output, "\")\n");
3647 fprintf (mdebug__output, "%s (chars \"", prefix);
3648 p = mt->data;
3649 for (i = 0; i < mt->nchars; i++)
3650 {
3651 int len;
3652 int c = STRING_CHAR_AND_BYTES (p, len);
3653
3654 if (c == '"' || c == '\\')
3655 fprintf (mdebug__output, "\\%c", c);
3656 else if (c >= ' ' && c < 127)
3657 fputc (c, mdebug__output);
3658 else
3659 fprintf (mdebug__output, "\\x%X", c);
3660 p += len;
3661 }
3662 fprintf (mdebug__output, "\")");
3663 if (mt->plist)
3664 {
3665 fprintf (mdebug__output, "\n%s ", prefix);
3666 dump_textplist (mt->plist, indent + 1);
3667 }
3668 }
3669 fprintf (mdebug__output, ")");
3670 return mt;
3671 }
3672
3673 /*** @} */
3674
3675 /*
3676 Local Variables:
3677 coding: euc-japan
3678 End:
3679 */
3680